from math import acos, pi, sin, cos, sqrt
import textwrap
import time
import tempfile
import copy
import itertools
import numpy as np
from scipy.linalg import inv
from pwtools import common, signal, num, atomic_data, constants, _flib
from pwtools.common import assert_cond
from pwtools.decorators import crys_add_doc
from pwtools.base import FlexibleGetters
from pwtools.constants import Angstrom
from pwtools.num import fempty, rms3d, match_mask, norm
import warnings
##warnings.simplefilter('always')
#-----------------------------------------------------------------------------
# misc math
#-----------------------------------------------------------------------------
[docs]
def angle(x,y):
"""Angle between vectors `x` and `y` in degrees.
Parameters
----------
x,y : 1d numpy arrays
"""
# Numpy's `acos' is "acrcos", but we take the one from math for scalar
# args.
return acos(np.dot(x,y)/norm(x)/norm(y))*180.0/pi
#-----------------------------------------------------------------------------
# crystallographic constants and basis vectors
#-----------------------------------------------------------------------------
[docs]
@crys_add_doc
def volume_cell(cell):
"""Volume of the unit cell from cell vectors. Calculates the triple
product::
np.dot(np.cross(a,b), c) == det(cell)
of the basis vectors a,b,c contained in `cell`. Note that (mathematically)
the vectors can be either the rows or the cols of `cell`.
Parameters
----------
%(cell_doc)s
Returns
-------
volume, unit: [a]**3
Examples
--------
>>> a = [1,0,0]; b = [2,3,0]; c = [1,2,3.];
>>> m = np.array([a,b,c])
>>> volume_cell(m)
9.0
>>> volume_cell(m.T)
9.0
>>> m = rand(3,3)
>>> volume_cell(m)
0.11844733769775126
>>> volume_cell(m.T)
0.11844733769775123
>>> np.linalg.det(m)
0.11844733769775125
>>> np.linalg.det(m.T)
0.11844733769775125
"""
assert_cond(cell.shape == (3,3), "input must be (3,3) array")
## return np.dot(np.cross(cell[0,:], cell[1,:]), cell[2,:])
return abs(np.linalg.det(cell))
[docs]
def volume_cell3d(cell, axis=0):
"""Same as :func:`volume_cell` for 3d arrays.
Parameters
----------
cell : 3d array
axis : time axis (e.g. cell.shape = (100,3,3) -> axis=0)
"""
assert cell.ndim == 3
sl = [slice(None)]*cell.ndim
ret = []
for ii in range(cell.shape[axis]):
sl[axis] = ii
ret.append(volume_cell(cell[tuple(sl)]))
return np.array(ret)
[docs]
@crys_add_doc
def volume_cc(cryst_const):
"""Volume of the unit cell from crystallographic constants [1]_.
Parameters
----------
%(cryst_const_doc)s
Returns
-------
volume, unit: [a]**3
References
----------
.. [1] http://en.wikipedia.org/wiki/Parallelepiped
"""
assert len(cryst_const) == 6, "shape must be (6,)"
a = cryst_const[0]
b = cryst_const[1]
c = cryst_const[2]
alpha = cryst_const[3]*pi/180
beta = cryst_const[4]*pi/180
gamma = cryst_const[5]*pi/180
return a*b*c*sqrt(1+ 2*cos(alpha)*cos(beta)*cos(gamma)
- cos(alpha)**2 - cos(beta)**2 - cos(gamma)**2)
[docs]
def volume_cc3d(cryst_const, axis=0):
"""Same as :func:`volume_cc` for 2d arrays (the name "3d" is just to indicate
that we work w/ trajectories).
Parameters
----------
cryst_const : 2d array
axis : time axis (e.g. cryst_const.shape = (100,6) -> axis=0)
"""
assert cryst_const.ndim == 2
sl = [slice(None)]*cryst_const.ndim
ret = []
for ii in range(cryst_const.shape[axis]):
sl[axis] = ii
ret.append(volume_cc(cryst_const[tuple(sl)]))
return np.array(ret)
[docs]
@crys_add_doc
def cell2cc(cell):
"""From `cell` to crystallographic constants a, b, c, alpha, beta,
gamma.
This mapping is unique in the sense that multiple cells will have
the same `cryst_const`, i.e. the representation of the cell in
`cryst_const` is independent from the spacial orientation of the cell
w.r.t. a cartesian coord sys.
Parameters
----------
%(cell_doc)s
Returns
-------
%(cryst_const_doc)s,
unit: [a]**3
"""
cell = np.asarray(cell)
assert_cond(cell.shape == (3,3), "cell must be (3,3) array")
cryst_const = np.empty((6,), dtype=float)
# a = |a|, b = |b|, c = |c|
cryst_const[:3] = np.sqrt((cell**2.0).sum(axis=1))
va = cell[0,:]
vb = cell[1,:]
vc = cell[2,:]
# alpha
cryst_const[3] = angle(vb,vc)
# beta
cryst_const[4] = angle(va,vc)
# gamma
cryst_const[5] = angle(va,vb)
return cryst_const
[docs]
def cell2cc3d(cell, axis=0):
"""Same as :func:`cell2cc` for 3d arrays.
Parameters
----------
cell : 3d array
axis : time axis (e.g. cell.shape = (100,3,3) -> axis=0)
"""
assert cell.ndim == 3
sl = [slice(None)]*cell.ndim
ret = []
for ii in range(cell.shape[axis]):
sl[axis] = ii
ret.append(cell2cc(cell[tuple(sl)]))
return np.array(ret)
[docs]
@crys_add_doc
def cc2cell(cryst_const):
"""From crystallographic constants a, b, c, alpha, beta,
gamma to `cell`.
This mapping not NOT unique in the sense that one set of `cryst_const` can
have arbitrarily many representations in terms of cells. We stick to a
common convention. See notes below.
Parameters
----------
%(cryst_const_doc)s
Returns
-------
%(cell_doc)s
unit: [a]**3
Notes
-----
Basis vectors fulfilling the crystallographic constants are arbitrary
w.r.t. their orientation in space. We choose the common convention that
| va : along x axis
| vb : in the x-y plane
Then, vc is fixed.
"""
a = cryst_const[0]
b = cryst_const[1]
c = cryst_const[2]
alpha = cryst_const[3]*pi/180
beta = cryst_const[4]*pi/180
gamma = cryst_const[5]*pi/180
va = np.array([a,0,0])
vb = np.array([b*cos(gamma), b*sin(gamma), 0])
# vc must be calculated:
# cx: projection onto x axis (va)
cx = c*cos(beta)
# Now need cy and cz ...
#
# Maxima solution
#
## vol = volume_cc(cryst_const)
## cz = vol / (a*b*sin(gamma))
## cy = sqrt(a**2 * b**2 * c**2 * sin(beta)**2 * sin(gamma)**2 - \
## vol**2) / (a*b*sin(gamma))
## cy2 = sqrt(c**2 - cx**2 - cz**2)
#
# PWscf , WIEN2K's sgroup, results are the same as with Maxima but the
# formulas are shorter :)
cy = c*(cos(alpha) - cos(beta)*cos(gamma))/sin(gamma)
cz = sqrt(c**2 - cy**2 - cx**2)
vc = np.array([cx, cy, cz])
return np.array([va, vb, vc])
[docs]
def cc2cell3d(cryst_const, axis=0):
"""Same as :func:`cc2cell` for 2d arrays (the name "3d" is just to indicate
that we work w/ trajectories).
Parameters
----------
cryst_const : 2d array
axis : time axis (e.g. cryst_const.shape = (100,6) -> axis=0)
"""
assert cryst_const.ndim == 2
sl = [slice(None)]*cryst_const.ndim
ret = []
for ii in range(cryst_const.shape[axis]):
sl[axis] = ii
ret.append(cc2cell(cryst_const[tuple(sl)]))
return np.array(ret)
[docs]
@crys_add_doc
def recip_cell(cell):
"""Reciprocal lattice vectors ``{a,b,c}* = 2*pi / V * {b,c,a} x {c,a,b}``.
The reciprocal volume is ``(2*pi)**3/V``. The length unit of the reciprocal
vectors is 1/(length unit of `cell`), e.g. 1/Angstrom.
Parameters
----------
%(cell_doc)s
Returns
-------
rcell : array (3,3)
Reciprocal vectors as rows.
Examples
--------
>>> # the length of recip. cell vectors for a cubic cell of 1 Ang side
>>> # length is 2*pi -> reciprocal length unit is 1/Ang
>>> crys.recip_cell(identity(3))/2/pi
array([[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]])
>>> crys.recip_cell(identity(3)*2)/2/pi
array([[ 0.5, 0. , 0. ],
[ 0. , 0.5, 0. ],
[ 0. , 0. , 0.5]])
"""
cell = np.asarray(cell, dtype=float)
assert_cond(cell.shape == (3,3), "cell must be (3,3) array")
rcell = np.empty_like(cell)
vol = volume_cell(cell)
a = cell[0,:]
b = cell[1,:]
c = cell[2,:]
rcell[0,:] = 2*pi/vol * np.cross(b,c)
rcell[1,:] = 2*pi/vol * np.cross(c,a)
rcell[2,:] = 2*pi/vol * np.cross(a,b)
return rcell
[docs]
def grid_in_cell(cell, h=None, size=None, minpoints=1, even=False, fullout=False):
"""For a given cell, generate grid `size` from grid spacing `h` or vice
versa.
Usually used to calculate k-grids for reciprocal cells. See also `kgrid()`.
Parameters
----------
cell : array (3,3)
Cell with vectors as rows.
h : float
1d target grid spacing along a cell axis. Unit is that of the cell
sides.
size : sequence (3,)
Use either `h` or `size`.
minpoints : int
Minimal number of grid points in each direction. May result in smaller
effective `h`. `minpoints=1` (default) asserts that at least the
Gamma point [1,1,1] is returned. Otherwise, big cells or big `h`
values will create zero grid points.
even : bool
Force even grid point numbers. Here, we add 1 to odd points, thus
creating a grid more dense than requested by `h`.
fullout : bool
See below.
Returns
-------
size : if `h != None` + `fullout=False`
size, spacing : if `h != None` + `fullout=True`
spacing : if `size` != None and `h=None`
size : array (3,) [nx, ny, nz]
Integer numbers of grid points along each reciprocal axis.
spacing : 1d array (3,)
Result spacing along each reciprocal axis if `size` would be used.
Notes
-----
* B/c an integer array is created by rounding, the effective grid spacing
will mostly be slightly bigger/smaller then `h` (see `fullout`).
Examples
--------
>>> crys.grid_in_cell(diag([1,2,3]), h=1)
array([1, 2, 3])
>>> crys.grid_in_cell(diag([1,2,3]), h=0.5)
array([2, 4, 6])
>>> crys.grid_in_cell(diag([1,2,3]), h=0.5, fullout=True)
(array([2, 4, 6]), array([ 0.5, 0.5, 0.5]))
>>> crys.grid_in_cell(diag([1,2,3]), size=[2,2,2])
array([ 0.5, 1. , 1.5])
"""
spacing = h
assert None in [spacing, size], "use either `h` or `size`"
assert minpoints >= 0
cell = np.asarray(cell, dtype=float)
norms = np.sqrt((cell**2.0).sum(axis=1))
if size is None:
size = np.round(norms / spacing)
if even:
size += (size % 2.0)
size = size.astype(int)
mask = size < minpoints
if mask.any():
size[mask] = minpoints
# only possible if minpoints=0
if (size == 0).any():
raise Exception("at least one point count is zero, decrease `spacing`, "
"size=%s" %str(size))
if fullout:
return size, norms * 1.0 / size
else:
return size.astype(int)
else:
size = np.array(size)
return norms * 1.0 / size
[docs]
def kgrid(cell, **kwds):
"""Calculate k-point grid for given real-space cell or grid spacing from
grid size.
This is a convenience and backward compat function which does
``grid_in_cell(recip_cell(cell), **kwds)``.
Parameters
----------
cell : array (3,3)
Real space unit cell.
**kwds : See grid_in_cell()
Returns
-------
See grid_in_cell().
Notes
-----
* Since the reciprocal cell is calculated with `recip_cell`, ``h=0.5``
1/Ang seems to produce a sufficiently dense grid for insulators. Metals
need a finer k-grid for electrons.
Examples
--------
>>> import numpy as np
>>> from pwtools.crys import kgrid
>>> cell = np.diag([5,5,8])
>>> kgrid(cell, h=0.5)
array([3, 3, 2])
>>> # see effective grid spacing
>>> kgrid(cell, h=0.5, fullout=True)
(array([3, 3, 2]), array([ 0.41887902, 0.41887902, 0.39269908]))
>>> # reverse: effective grid spacing for given size
>>> kgrid(cell, size=[3,3,2])
array([ 0.41887902, 0.41887902, 0.39269908])
>>> # even grid
>>> kgrid(cell, h=0.5, even=True)
array([4, 4, 2])
>>> # big cell, at least Gamma with minpoints=1
>>> kgrid(cell*10, h=0.5)
array([1, 1, 1])
>>> # Create MP mesh
>>> ase.dft.monkhorst_pack(kgrid(cell, h=0.5))
>>> # cell: 1 Ang side length, recip cell 2*pi/Ang side length,
>>> # unit of h: 1/Ang
>>> crys.recip_cell(np.identity(3))
array([[ 6.28318531, 0. , 0. ],
[ 0. , 6.28318531, 0. ],
[ 0. , 0. , 6.28318531]])
>>> kgrid(np.identity(3), h=pi, fullout=True)
(array([2, 2, 2]), array([ 3.14159265, 3.14159265, 3.14159265]))
"""
if 'dk' in kwds:
warnings.warn("`dk` is deprecated, use `h` instead",
DeprecationWarning)
kwds['h'] = kwds['dk']
kwds.pop('dk')
return grid_in_cell(recip_cell(cell), **kwds)
[docs]
@crys_add_doc
def cc2celldm(cryst_const, fac=1.0):
"""
Convert cryst_const to PWscf `celldm`.
Parameters
----------
%(cryst_const_doc)s
fac : float, optional
conversion a[any unit] -> a[Bohr]
Returns
-------
%(celldm)s
"""
assert len(cryst_const) == 6, ("cryst_const has length != 6")
celldm = np.empty((6,), dtype=np.float64)
a,b,c,alpha,beta,gamma = np.asarray(cryst_const, dtype=np.float64)
celldm[0] = a*fac
celldm[1] = b/a
celldm[2] = c/a
celldm[3] = cos(alpha*pi/180.0)
celldm[4] = cos(beta*pi/180.0)
celldm[5] = cos(gamma*pi/180.0)
return celldm
[docs]
@crys_add_doc
def celldm2cc(celldm, fac=1.0):
"""Convert PWscf celldm to cryst_const.
Parameters
----------
%(celldm)s
fac : float, optional
conversion a[Bohr] -> a[any unit]
"""
assert len(celldm) == 6, ("celldm has length != 6")
cryst_const = np.empty((6,), dtype=np.float64)
a,ba,ca,cos_alpha,cos_beta,cos_gamma = np.asarray(celldm, dtype=np.float64)
a = a*fac
cryst_const[0] = a
cryst_const[1] = ba * a
cryst_const[2] = ca * a
cryst_const[3] = acos(cos_alpha) / pi * 180.0
cryst_const[4] = acos(cos_beta) / pi * 180.0
cryst_const[5] = acos(cos_gamma) / pi * 180.0
return cryst_const
#-----------------------------------------------------------------------------
# super cell building
#-----------------------------------------------------------------------------
[docs]
def scell_mask(nx, ny, nz, direc=1):
"""Build a mask for the creation of a nx x ny x nz supercell (for 3d
coordinates).
Return all possible permutations with repitition of the integers ix, iy, iz
= 0, ..., nx-1, ny-1, nz-1 . Dimensions can also be negative, in
which case i = 0,-1,...,-n+1 . Parameter `direc` reverses the ordering.
Parameters
----------
nx, ny, nz : int
direc : int
1 or -1, order mask 0,...,n-1 (cells placed "center to edge") or
reverse n-1,...,0 ("egde to center")
Returns
-------
mask : 2d array, shape (nx*ny*nz, 3)
Examples
--------
>>> # 2x2x2 supercell
>>> scell_mask(2,2,2)
array([[ 0., 0., 0.],
[ 0., 0., 1.],
[ 0., 1., 0.],
[ 0., 1., 1.],
[ 1., 0., 0.],
[ 1., 0., 1.],
[ 1., 1., 0.],
[ 1., 1., 1.]])
>>> # 2x2x1 slab = "plane" of 4 cells
>>> scell_mask(2,2,1)
array([[ 0., 0., 0.],
[ 0., 1., 0.],
[ 1., 0., 0.],
[ 1., 1., 0.]])
>>> # direction reversed
>>> scell_mask(2,2,1,direc=-1)
array([[ 1., 1., 0.],
[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 0.]])
"""
if direc == 1:
mkrange = lambda x: range(0,x) if x >= 0 else range(0,x,-1)
elif direc == -1:
mkrange = lambda x: range(x-1,-1,-1) if x >= 0 else range(x+1,1)
return np.array([k for k in itertools.product(mkrange(nx), mkrange(ny),
mkrange(nz))],
dtype=float)
[docs]
def scell(obj, dims, method=1, **kwds):
"""Build supercell based on `dims`.
Uses coords_frac and cell.
Parameters
----------
obj : Structure or Trajectory
dims : tuple (nx, ny, nz) for a N = nx * ny * nz supercell
method : int, optional
Switch between numpy-ish (1) or loop (2) implementation. (2) should
always produce correct results but is sublty slower. Only for
Structure.
**kwds : see :func:`scell_mask`
Notes
-----
The mask for the supercell is created by :func:`scell_mask` and applied to
each atom in `obj` one after another, i.e. each atom is repeated nx*ny*nz
times according to the mask pattern, independently of how the pattern looks
like (e.g. the `direc` parameter in :func:`scell_mask`). So, just as rows
in np.repeat(), we have:
| original: symbols=[A,B,C,D]
| 2 x 1 x 1: symbols=[A,A,B,B,C,C,D,D]
| nx x ny x nz: symbols=[(nx*ny*nz) x A, (nx*ny*nz) x B, ...]
Returns
-------
scell : Structure
"""
# Place each atom N = nx*ny*nz times in the supercell, i.e. copy unit cell
# N times. Actually, N-1, since ix=iy=iz=0 is the unit cell itself.
#
# Let k = {x,y,z}.
#
# mask[j,:] = [ix, iy, iz], ik = integers (floats actually, but
# mod(ik, floor(ik)) == 0.0)
#
# original cell:
# coords_frac[i,:] = position vect of atom i in the unit cell in *crystal*
# coords!!
#
# super cell:
# sc_coords_frac[i,:] = coords_frac[i,:] + [ix, iy, iz]
# for all permutations (see scell_mask()) of ix, iy, iz.
# ik = 0, ..., nk - 1
#
# sc_coords_frac : crystal coords w.r.t the *old* cell, i.e. the entries are in
# [0,(max(dims))], not [0,1], is scaled below
#
if 'direc' not in kwds:
kwds['direc'] = 1
mask = scell_mask(*tuple(dims), **kwds)
nmask = mask.shape[0]
if obj.is_struct:
sc_cell = obj.cell * np.asarray(dims)[:,None]
container = Structure
elif obj.is_traj:
# (nstep,3,3) * (1,3,1) -> (nstep, 3,3)
sc_cell = obj.cell * np.asarray(dims)[None,:,None]
container = Trajectory
else:
raise Exception("unknown input type")
if method == 1:
sc_symbols = np.array(obj.symbols).repeat(nmask).tolist() if (obj.symbols
is not None) else None
if obj.is_struct:
# (natoms, 1, 3) + (1, nmask, 3) -> (natoms, nmask, 3)
sc_coords_frac = (obj.coords_frac[:,None,:]
+ mask[None,...]).reshape(obj.natoms*nmask,3)
elif obj.is_traj:
# cool, eh?
# (nstep, natoms, 1, 3) + (1, 1, nmask, 3) -> (nstep, natoms, nmask, 3)
sc_coords_frac = (obj.coords_frac[...,None,:]
+ mask[None,None,...]).reshape(obj.nstep,obj.natoms*nmask,3)
else:
raise Exception("huh!?")
# explicit loop version for testing, this is the reference implementation,
# only for Structure
elif method == 2:
if obj.is_struct:
sc_symbols = []
sc_coords_frac = np.empty((nmask*obj.natoms, 3), dtype=float)
k = 0
for iatom in range(obj.natoms):
for j in range(nmask):
if obj.symbols is not None:
sc_symbols.append(obj.symbols[iatom])
sc_coords_frac[k,:] = obj.coords_frac[iatom,:] + mask[j,:]
k += 1
else:
raise Exception("method=2 only implemented for Structure")
else:
raise Exception("unknown method: %s" %repr(method))
sc_coords_frac[...,0] /= dims[0]
sc_coords_frac[...,1] /= dims[1]
sc_coords_frac[...,2] /= dims[2]
return container(coords_frac=sc_coords_frac,
cell=sc_cell,
symbols=sc_symbols)
[docs]
def scell3d(traj, dims, **kwds):
"""Supercell for Trajectory. Deprecated. Use :func:`scell` instead."""
warnings.warn("scell3d() is deprecated, use scell() for Trajectory as well",
DeprecationWarning)
return scell(traj, dims, **kwds)
#-----------------------------------------------------------------------------
# atomic coords processing / evaluation, MD analysis
#-----------------------------------------------------------------------------
[docs]
def velocity_traj(arr, dt=1.0, axis=0, endpoints=True):
"""Calculate velocity from `arr` (usually coordinates) along time`axis`
using timestep `dt`.
Central differences are used (example x-coord of atom 0:
``x=coords[:,0,0]``)::
v[i] = [ x[i+1] - x[i-1] ] / (2*dt)
which returns nstep-2 points belonging to the the middle of the
trajectory x[1:-1]. To get an array which is `nstep` long, the fist and
last velocity are set to the first and last calculated value (if
``endpoints=True``)::
v[0,...] == v[1,...]
v[-1,...] == v[-2,...]
"""
# Central diffs are more accurate than simple finite diffs
#
# v[i] = [ x[i+1] - x[i] ] / dt
#
# These return nstep-1 points (one more then central diffs) but we
# have the problem of assigning the velocity array to the time axis:
# t[1:] or t[:-1] are both shifted w.r.t. to the real time axis
# position -- the correct way would be to assign it to t[:-1] + 0.5*dt.
# In contrast, central diffs belong to t[1:-1] by definition.
#
# If self.timestep is small (i.e. the trajectory is smooth), all this is
# not really a problem, but central diffs are just better and more
# consistent. Even forces calculated from these velocities (force =
# mass * dv / dt) are reasonably accurate compared to the forces from
# the MD trajectory input. One could implement get_forces() like that
# if needed, but so far all MD codes provide us their forces, of
# course. Also, one *could* create 3*natoms Spline objects thru coords
# (splines along time axis) and calc 1st and 2nd deriv from that. But
# that's probably very slow.
if endpoints:
vv = np.empty_like(arr)
# To support general axis stuff, use slice magic ala slicetake/sliceput
assert axis == 0, ("only axis==0 implemented ATM")
tmp = (arr[2:,...] - arr[:-2,...]) / 2.0 / dt
if endpoints:
vv[1:-1,...] = tmp
vv[0,...] = tmp[0,...]
vv[-1,...] = tmp[-1,...]
else:
vv = tmp
return vv
[docs]
def rmsd(traj, ref_idx=0):
"""Root mean square distance over an MD trajectory.
The normalization constant is the number of atoms. Takes the RMS of the
difference of *cartesian* coords at each time step. Only meaningful if
``tr.coords`` are *not* pbc-wrapped.
Parameters
----------
traj : Trajectory object
ref_idx : int, optional
time index of the reference structure (i.e. 0 to compare with the
start structure, -1 for the last along `axis`).
Returns
-------
rmsd : 1d array (traj.nstep,)
Examples
--------
>>> # We only need traj.{coords,nstep,timeaxis}, no symbols, cell, ...
>>> traj = crys.Trajectory(coords=rand(500,10,3))
>>> # The RMSD w.r.t. the start structure. See when the structure starts to
>>> # "converge" to a stable mean configuration during an MD.
>>> rmsd(traj, ref_idx=0)
>>> # For a relaxation run, the RMSD w.r.t. the final converged structure. The
>>> # RMSD should converge to zero here.
>>> rmsd(traj, ref_idx=-1)
"""
# sl_ref : pull out 2d array of coords of the reference structure
# sl_newaxis : slice to broadcast (newaxis) this 2d array to 3d for easy
# substraction
assert traj.coords.ndim == 3
ndim = 3
coords = traj.coords.copy()
sl_ref = [slice(None)]*ndim
sl_ref[traj.timeaxis] = ref_idx
sl_newaxis = [slice(None)]*ndim
sl_newaxis[traj.timeaxis] = None
ref = coords[tuple(sl_ref)].copy()
coords -= ref[tuple(sl_newaxis)]
return rms3d(coords, axis=traj.timeaxis, nitems=float(traj.natoms))
[docs]
def pbc_wrap_coords(coords_frac, copy=True, mask=[True]*3, xyz_axis=-1):
"""Apply periodic boundary conditions to array of fractional coords.
Wrap atoms with fractional coords > 1 or < 0 into the cell.
Parameters
----------
coords_frac : array 2d or 3d
fractional coords, if 3d then one axis is assumed to be a time axis and
the array is a MD trajectory or such
copy : bool
Copy coords_frac before applying pbc.
mask : sequence of bools, len = 3 for x,y,z
Apply pbc only x, y or z. E.g. [True, True, False] would not wrap the z
coordinate.
xyz_axis : the axis of `coords_frac` where the indices 0,1,2 pull out the x,y,z
coords. For a usual 2d array of coords with shape (natoms,3),
xyz_axis=1 (= last axis = -1). For a 3d array (natoms, nstep, 3),
xyz_axis=2 (also -1).
Returns
-------
coords_frac : array_like(coords_frac)
Array with all values in [0,1] except for those where ``mask[i]=False``.
Notes
-----
About the copy arg: If ``copy=False``, then this is an in-place operation
and the array in the global scope is modified! In fact, then these do the
same::
>>> a = pbc_wrap_coords(a, copy=False)
>>> pbc_wrap_coords(a, copy=False)
"""
assert coords_frac.shape[xyz_axis] == 3, "dim of xyz_axis of `coords_frac` must be == 3"
ndim = coords_frac.ndim
assert ndim in [2,3], "coords_frac must be 2d or 3d array"
tmp = coords_frac.copy() if copy else coords_frac
for i in range(3):
if mask[i]:
sl = [slice(None)]*ndim
sl[xyz_axis] = i
tsl = tuple(sl)
tmp[tsl] = np.remainder(tmp[tsl], 1.0)
return tmp
[docs]
def pbc_wrap(obj, copy=True, **kwds):
"""Apply periodic boundary conditions to fractional coords.
Same as :func:`pbc_wrap_coords` but accepts a Structure or Trajectory
instead of the array ``coords_frac``. Returns an object with atoms
(coords_frac and coords) wrapped into the cell.
Parameters
----------
obj : Structure or Trajectory
copy : bool
Return copy or in-place modified object.
**kwds : keywords
passed to :func:`pbc_wrap_coords`
"""
out = obj.copy() if copy else obj
# set to None so that it will be re-calculated by set_all()
out.coords = None
# copy=False: in-place modify b/c we copied the whole object before if
# requested by user
pbc_wrap_coords(out.coords_frac, copy=False, **kwds)
out.set_all()
return out
[docs]
def coord_trans(coords, old=None, new=None, copy=True, axis=-1):
"""General-purpose n-dimensional coordinate transformation. `coords` can
have arbitrary dimension, i.e. it can contain many vectors to be
transformed at once. But `old` and `new` must have ndim=2, i.e. only one
old and new coord sys for all vectors in `coords`.
The most general case is that you want to transform an MD trajectory from a
variable cell run, you have smth like this:
| coords.shape = (nstep,natoms,3)
| old.shape/new.shape = (nstep,3,3)
You have a set of old and new coordinate systems at each step.
Then, use a loop over all time steps and call this function nstep times.
See also coord_trans3d().
Parameters
----------
coords : array (d0, d1, ..., M)
Array of arbitrary rank with coordinates (length M vectors) in old
coord sys `old`. The only shape resiriction is that the last dim must
equal the number of coordinates (coords.shape[-1] == M == 3 for normal
3-dim x,y,z).
| 1d : trivial, transform that vector (length M)
| 2d : The matrix must have shape (N,M), i.e. N vectors to be
| transformed are the *rows*.
| 3d : coords must have shape (..., M)
If `coords` has a different shape, use `axis` to define the M-axis.
old, new : 2d arrays (M,M)
Matrices with the old and new basis vectors as *rows*. Note that in the
usual math literature, columns are used. In that case, use ``old.T`` and/or
``new.T``.
copy : bool, optional
True: overwrite `coords`
False: return new array
axis : the axis along which the length-M vectors are placed in `coords`,
default is -1, i.e. coords.shape = (...,M)
Returns
-------
array of shape = coords.shape, coordinates in system `new`
Examples
--------
>>> # Taken from [1]_.
>>> import numpy as np
>>> import math
>>> v_I = np.array([1.0,1.5])
>>> I = np.identity(2)
>>> X = math.sqrt(2)/2.0*np.array([[1,-1],[1,1]]).T
>>> Y = np.array([[1,1],[0,1]]).T
>>> coord_trans(v_I,I,I)
array([ 1. , 1.5])
>>> v_X = coord_trans(v,I,X)
>>> v_Y = coord_trans(v,I,Y)
>>> v_X
array([ 1.76776695, 0.35355339])
>>> v_Y
array([-0.5, 1.5])
>>> coord_trans(v_Y,Y,I)
array([ 1. , 1.5])
>>> coord_trans(v_X,X,I)
array([ 1. , 1.5])
>>> # 3d example
>>> c_old = np.random.rand(30,200,3)
>>> old = np.random.rand(3,3)
>>> new = np.random.rand(3,3)
>>> c_new = coord_trans(c_old, old=old, new=new)
>>> c_old2 = coord_trans(c_new, old=new, new=old)
>>> np.testing.assert_almost_equal(c_old, c_old2)
>>> # If you have an array of shape, say (10,3,100), i.e. the last
>>> # dimension is NOT 3, then use numpy.swapaxes() or axis:
>>> coord_trans(arr, old=..., new=..., axis=1)
>>> coord_trans(arr.swapaxes(1,2), old=..., new=...).swapaxes(1,2)
References
----------
.. [1] http://www.mathe.tu-freiberg.de/~eiermann/Vorlesungen/HM/index_HM2.htm, Ch.6
See Also
--------
coord_trans3d
"""
common.assert_cond(old.ndim == new.ndim == 2,
"`old` and `new` must be rank 2 arrays")
common.assert_cond(old.shape == new.shape,
"`old` and `new` must have th same shape")
common.assert_cond(old.shape[0] == old.shape[1],
"`old` and `new` must be square")
# arr.T and arr.swapaxes() are no in-place operations, just views, input
# arrays are not changed, but copy() b/c we can overwrite coords
_coords = coords.copy() if copy else coords
mx_axis = _coords.ndim - 1
axis = mx_axis if (axis == -1) else axis
# must use `coords[:] = ...`, just `coords = ...` is a new array
if axis != mx_axis:
# bring xyz-axis to -1 for broadcasting
_coords[:] = _trans(_coords.swapaxes(-1, axis),
old,
new).swapaxes(-1, axis)
else:
_coords[:] = _trans(_coords,
old,
new)
return _coords
[docs]
def _trans(coords, old, new):
"""Helper for coord_trans()."""
common.assert_cond(coords.shape[-1] == old.shape[0],
"last dim of `coords` must match first dim"
" of `old` and `new`")
# The equation works for ``old.T`` and ``new.T`` = columns.
return np.dot(coords, np.dot(inv(new.T), old.T).T)
[docs]
def coord_trans3d(coords, old=None, new=None, copy=True, axis=-1, timeaxis=0):
"""Special case version for debugging mostly. It does the loop for the
general case where coords+old+new are 3d arrays (e.g. variable cell MD
trajectory).
This may be be slow for large ``nstep``. All other cases (``coords`` has
arbitrary many dimensions, i.e. ndarray + old/new are fixed) are covered
by coord_trans(). Also some special cases may be possible to solve with
np.dot() alone if the transformation simplifes. Check your math.
Parameters
----------
coords : 3d array
one axis (`axis`) must have length-M vectors, another (`timeaxis`) must
be length `nstep`
old,new : 2d arrays, two axes must be of equal length
copy : see coord_trans()
axis : axis where length-M vecs are placed if the timeaxis is removed
timeaxis : time axis along which 2d arrays are aligned
Examples
--------
| M = 3
| coords : (nstep,natoms,3)
| old,new : (nstep,3,3)
| timeaxis = 0
| axis = 1 == -1 (remove timeaxis -> 2d slices (natoms,3) and (3,3) -> axis=1)
"""
a,b,c = coords.ndim, old.ndim, new.ndim
assert a == b == c, "ndim: coords: %i, old: %i, new: %i" %(a,b,c)
a,b,c = coords.shape[timeaxis], old.shape[timeaxis], new.shape[timeaxis]
assert a == b == c, "shape[timeaxis]: coords: %i, old: %i, new: %i" %(a,b,c)
ndim = coords.ndim
nstep = coords.shape[timeaxis]
ret = []
sl = [slice(None)]*ndim
ret = []
for ii in range(nstep):
sl[timeaxis] = ii
tsl = tuple(sl)
ret.append(coord_trans(coords[tsl],
old=old[tsl],
new=new[tsl],
axis=axis,
copy=copy))
ret = np.array(ret)
if timeaxis != 0:
return np.rollaxis(ret, 0, start=timeaxis+1)
else:
return ret
[docs]
def min_image_convention(sij, copy=False):
"""Apply minimum image convention to differences of fractional coords.
Handles also cases where coordinates are separated by an arbitrary number
of periodic images.
Parameters
----------
sij : ndarray
Differences of fractional coordinates, usually (natoms, natoms, 3),
i.e. a, "matrix" of distance vectors, obtained by smth like
``sij = coords_frac[:,None,:] - coords_frac[None,:,:]`` where
``coords_frac.shape = (natoms,3)``.
copy : bool, optional
Returns
-------
sij in-place modified or copy
"""
sij = sij.copy() if copy else sij
mask = sij >= 0.5
while mask.any():
sij[mask] -= 1.0
mask = sij >= 0.5
mask = sij < -0.5
while mask.any():
sij[mask] += 1.0
mask = sij < -0.5
return sij
[docs]
@crys_add_doc
def rmax_smith(cell):
"""Calculate rmax as in [Smith]_, where rmax = the maximal distance up to
which minimum image nearest neighbor distances are correct.
The cell vecs must be the rows of `cell`.
Parameters
----------
%(cell_doc)s
Returns
-------
rmax : float
References
----------
.. [Smith] W. Smith, The Minimum Image Convention in Non-Cubic MD Cells,
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.57.1696
1989
"""
a = cell[0,:]
b = cell[1,:]
c = cell[2,:]
bxc = np.cross(b,c)
cxa = np.cross(c,a)
axb = np.cross(a,b)
wa = abs(np.dot(a, bxc)) / norm(bxc)
wb = abs(np.dot(b, cxa)) / norm(cxa)
wc = abs(np.dot(c, axb)) / norm(axb)
rmax = 0.5*min(wa,wb,wc)
return rmax
[docs]
def rpdf(trajs, dr=0.05, rmax='auto', amask=None, tmask=None,
dmask=None, pbc=True, norm_vmd=False, maxmem=2.0):
"""Radial pair distribution (pair correlation) function for Structures and
Trajectories. In case of trajectories, the time-averaged RPDF is returned.
Can also handle non-orthorhombic unit cells (simulation boxes).
Only fixed-cell MD at the moment.
Parameters
----------
trajs : Structure or Trajectory or list of one or two such objects
The case ``len(trajs)==1`` is the same as providing the object directly
(most common case). Internally we expand the input to ``[trajs,
trajs]``, i.e. the RPDF of the 2nd coord set w.r.t. to the first is
calculated -- the order matters! This is like selection 1 and 2 in VMD,
but nornmally you would use `amask` instead. The option to provide a
list of two Trajectory objects exists for cases where you don't want to
use `amask`, but create two different Trajectory objects outside.
dr : float, optional
Radius spacing. Must have the same unit as `cell`, e.g. Angstrom.
rmax : {'auto', float}, optional
Max. radius up to which minimum image nearest neighbors are counted.
For cubic boxes of side length L, this is L/2 [AT,MD].
| 'auto' : the method of [Smith] is used to calculate the max. sphere
| raduis for any cell shape
| float : set value yourself
amask : None, list of one or two bool 1d arrays, list of one or two strings
Optional atom mask. This is the complementary functionality to
`sel` in :func:`vmd_measure_gofr`. If ``len(amask)==1``, then we expand
to ``[amask, amask]`` internally, which would calculate the RPDF
between the same atom selection. If two masks are given, then the first
is applied to ``trajs[0]`` and the second to ``trajs[1]``. Use this to
select only certain atoms in each Trajectory. The default is to provide
bool arrays. If you provide strings, they are assumed to be atom names
and we create a bool array ``np.array(symbols) == amask[i]``.
tmask : None or slice object, optional
Time mask. Slice for the time axis, e.g. to use only every 100th step,
starting from step 2000 to the end, use ``tmask=slice(2000,None,100)``,
which is the same as ``np.s_[2000::100]``.
dmask : None or string, optional
Distance mask. Restrict to certain distances using numpy syntax for
creating bool arrays::
'>=1.0'
'{d} >=1.0' # the same
'({d} > 1.0) & ({d} < 3.0)'
where ``{d}`` is a placeholder for the distance array (you really have to
use ``{d}``). The placeholder is optional in some pattern. This is similar
to VMD's "within" (``pbc=False``) or "pbwithin" (``pbc=True``) syntax.
pbc : bool, optional
apply minimum image convention to distances
norm_vmd : bool, optional
Normalize `g(r)` like in VMD by counting duplicate atoms and normalize to
``natoms0 * natoms1 - duplicates`` instead of ``natoms0*natoms1``. Affects
all-all correlations only. `num_int` is not affected. Use this only for
testing.
maxmem : float, optional
Maximal allowed memory to use, in GB.
Returns
-------
array (len(rad), 3), the columns are
rad : 1d array
radius (x-axis) with spacing `dr`, each value r[i] is the middle of a
histogram bin
hist : 1d array, (len(rad),)
the function values `g(r)`
num_int : 1d array, (len(rad),)
the (averaged) number integral ``number_density*hist*4*pi*r**2.0*dr``
Notes
-----
`rmax` : The maximal `rmax` for which g(r) is correctly normalized is the
result of :func:`rmax_smith`, i.e. the radius if the biggest sphere which
fits entirely into the cell. This is simply L/2 for cubic boxes of side
length L and volume L**3, for instance. We do explicitely allow `rmax` >
`rmax_smith` for testing, but be aware that `g(r)` and the number integral
are *wrong* for `rmax` > `rmax_smith`.
Even though the number integral will always converge to the number of all
neighbors for r -> infinity, the integral value (the number of neigbors) is
correct only up to `rmax_smith`.
See ``examples/rpdf/`` for educational evidence. For notes on how VMD does
this, see comments in the code below.
selection : The selection mechanism with `amask` is in principle as capable
as VMD's, but relies completely on the user's ability to create bool arrays
to filter the atoms. In practice, anything more complicated than
``array(symbols)=='O'`` ("name O" in VMD) is much more difficult than VMD's
powerful selection syntax.
Curently, the atom distances are calculated by using numpy fancy indexing.
That creates (big) arrays in memory. For data from long MDs, you may run
into trouble here. For a 20000 step MD, start by using every 200th step or
so (use ``tmask=slice(None,None,200)``) and look at the histogram, as you
take more and more points into account (every 100th, 50th step, ...).
Especially for Car Parrinello, where time steps are small and the structure
doesn't change much, there is no need to use every step. See also `maxmem`.
Examples
--------
>>> # simple all-all RPDF, time-averaged over all MD steps
>>> d = rpdf(traj)
>>> # the same as rpdf(traj,...)
>>> d = rpdf([traj], ...)
>>> d = rpdf([traj, traj], ...)
>>> # 2 selections: RPDF of all H's around all O's, average time step 3000 to
>>> # end, take every 50th step
>>> traj = io.read_cp2k_md('cp2k.out')
>>> d = rpdf(traj, dr=0.1, amask=['O', 'H'],tmask=np.s_[3000::50])
>>> plot(d[:,0], d[:,1], label='g(r)')
>>> twinx()
>>> plot(d[:,0], d[:,2], label='number integral')
>>> # use bool arrays for `amask`, need this for more complicated pattern
>>> sy = np.array(traj.symbols)
>>> # VMD: sel1='name O', sel2='name H', same as amask=['O', 'H']
>>> d = rpdf(traj, dr=0.1, amask=[sy=='O', sy=='H'],tmask=np.s_[3000::50])
>>> # VMD: sel1='name O', sel2='name H Cl', note that the bool arrays must
>>> # be logically OR'ed (| operator) to get the ffect of "H and Cl"
>>> d = rpdf(traj, dr=0.1, amask=[sy=='O', (sy=='H') | (sy=='Cl')],tmask=np.s_[3000::50])
>>> # skip distances >1 Ang
>>> d = rpdf(traj, dr=0.1, amask=['O', 'H'],tmask=np.s_[3000::50]
... dmask='{d}>1.0')
References
----------
[AT] M. P. Allen, D. J. Tildesley, Computer Simulation of Liquids,
Clarendon Press, 1989
[MD] R. Haberlandt, S. Fritzsche, G. Peinel, K. Heinzinger,
Molekulardynamik - Grundlagen und Anwendungen,
Friedrich Vieweg & Sohn Verlagsgesellschaft 1995
[Smith] W. Smith, The Minimum Image Convention in Non-Cubic MD Cells,
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.57.1696
1989
"""
# Theory
# ======
#
# 1) N equal particles (atoms) in a volume V.
#
# Below, "density" always means number density, i.e. (N atoms in the unit
# cell) / (unit cell volume V).
#
# g(r) is (a) the average atom density in a shell [r,r+dr] around an atom at
# r=0, relative to an "ideal gas" (random distribution) of density N/V.
# (b) Equivalent: The average number of atom pairs with distance r relative
# to the number of pairs with distance r in a random distribution.
#
# For each atom i=1,N, count the number of atoms j around it in the shell
# [r,r+dr] with r_ij = r_i - r_j, r < r_ij <= r+dr
#
# s(r) = sum(i=1,N) sum(j=1,N, j!=i) delta(r - r_ij)
#
# In practice, this is done by calculating all distances r_ij and bin them
# into a histogram s(k) with k = r_ij / dr the histogram index.
#
# g(r) = s(r) / [N * (N/V) * V(r)]
# = s(r) / [N**2/V * V(r)]
# V(r) = 4*pi*r**2*dr = 4/3*pi*[(r+dr)**3 - r**3]
#
# where V(r) the volume of the shell. Normalization to V(r) is necessary
# b/c the shell [r, r+dr] has on average more atoms for increasing "r".
#
# We sum over N atoms, so we have to divide by N : s(r) / N -- that's why
# g(r) is an average.
#
# Interpretation (a): Since we further divide by the shell volume V(r), the
# average shell atom count s(r) / N becomes the average shell atom density
# s(r) / [N * V(r)]. We finally normalize by the ideal gas density N/V to
# get a the dimensionless g(r).
#
# Interpretation (b): The average shell atom count is s(r) / N. It has no
# unit. The denominator is then (N/V) * V(r), which is the shell atom count
# for a random distribution of density N/V.
#
# g(r) -> 1 for r -> inf in liquids, i.e. long distances are not
# correlated. Their distribution is random. In a crystal, we get an
# infinite series of delta peaks at the distances of the 1st, 2nd, ...
# nearest neighbor shell.
#
# The number integral is
#
# I(r1,r2) = int(r=r1,r2) N/V*g(r)*4*pi*r**2*dr
# = int(r=r1,r2) N/V*g(r)*V(r)*dr
# = int(r=r1,r2) 1/N*s(r)*dr
#
# This can be used to calculate coordination numbers, i.e. it counts the
# average (that's why 1/N) number of atoms around an atom in a shell
# [r1,r2].
#
# Integrating to infinity
#
# I(0,inf) = N-1
#
# gives the average number of *all* atoms around an atom, *excluding* the
# central one. This integral will converge to N-1 with or without PBC, but
# w/o PBC, the nearest neigbor numbers I(r1,r2) will be wrong! Always use
# PBC (minimum image convention). Have a look at the following table.
# rmax_auto is the rmax value for the given unit cell by the method of
# [Smith], which is L/2 for a cubic box of side length L. It is the radius
# of the biggest sphere which still fits entirely into the cell. In the
# table: "+" = OK, "-" = wrong.
#
# nearest neighb. I(0,rmax) = N-1
# 1.) pbc=Tue, rmax < rmax_auto + -
# 2.) pbc=Tue, rmax >> rmax_auto + (< rmax_auto) +
# 3.) pbc=False, rmax < rmax_auto - -
# 4.) pbc=False, rmax >> rmax_auto - +
#
# (1) is the use case in [Smith]. Always use this.
#
# (2) appears to be also useful. However, it can be shown that nearest
# neigbors are correct only up to rmax_auto! See examples/rpdf/rpdf_aln.py.
# This is because if rmax > rmax_auto (say > L/2), then the shell is empty
# for all r outside of the box, which means that the counted number of
# surrounding atoms will be to small.
#
# For a crystal, integrating over a peak [r-dr/2, r+dr/2] gives *exactly*
# the number of nearest neighbor atoms for that distance r b/c the
# normalization factor -- the number of atoms in an ideal gas for a narrow
# shell of width dr -- is 1.
#
# 2) 2 selections
#
# Lets say you have 10 waters -> 10 x O (atom type A), 20 x H (type B),
# then let A = 10, B = 20.
#
# s(r) = sum(i=1,A) sum(j=1,B) delta(r - r_ij)
# = s_AB(r) + s_BA(r)
#
# where s_AB(r) is the number of B's around A's and vice versa. With the
# densities A/V and B/V, we get
#
# g(r) = g_AB(r) + g_BA(r) =
# s_AB(r) / [A * (B/V) * V(r)] +
# s_BA(r) / [B * (A/V) * V(r)]
#
# Note that the density used is always the density of the *sourrounding*
# atom type. g_AB(r) or g_BA(r) is the result that you want. Finally, we
# can also write g(r) for the all-all case, i.e. 1 atom type.
#
# g(r) = [s_AB(r) + s_BA(r)] / [A*B/V * V(r)]
#
# Note the similarity to the case of one atom type:
#
# g(r) = s(r) / [N**2/V * V(r)]
#
# The integrals are:
#
# I_AB(r1,r2) = int(r=r1,r2) (B/V)*g_AB(r)*4*pi*r**2*dr
# int(r=r1,r2) 1/A*s_AB(r)*dr
# I_BA(r1,r2) = int(r=r1,r2) (A/V)*g_BA(r)*4*pi*r**2*dr
# int(r=r1,r2) 1/B*s_BA(r)*dr
#
# Note the similarity to the one-atom case:
#
# I(r1,r2) = int(r=r1,r2) 1/N*s(r)*dr
#
# These integrals converge to the total number of *sourrounding*
# atoms of the other type:
#
# I_AB(0,inf) = B (not B-1 !)
# I_BA(0,inf) = A (not A-1 !)
#
# Verification
# ============
#
# This function was tested against VMD's "measure gofr" command. VMD can
# only handle orthorhombic boxes. To test non-orthorhombic boxes, see
# examples/rpdf/.
#
# Make sure to convert all length to Angstrom of you compare with VMD.
#
# Implementation details
# ======================
#
# Number integral mehod
# ---------------------
#
# To match with VMD results, we use the "rectangle rule", i.e. just y_i*dx.
# This is even cheaper than the trapezoidal rule, but by far accurate
# enough for small ``dr``. Try yourself by using a more sophisticated
# method like
# >>> num_int2 = scipy.integrate.cumtrapz(hist, rad)
# >>> plot(rad[:-1]+0.5*dr, num_int2)
#
# distance calculation
# --------------------
# sij : "matrix" of distance vectors in crystal coords
# rij : in cartesian coords, same unit as `cell`, e.g. Angstrom
#
# sij: (natoms0, natoms1, 3) # coords 2d
# sij: (nstep, natoms0, natoms1, 3) # coords 3d
#
# broadcasting 2d:
#
# coords0: (natoms0, 1, 3)
# coords1: (1, natoms1, 3)
# sij: (natoms0, natoms1, 3)
# >>> coords0[:,None,:] - coords1[None,:,:]
#
# broadcasting 3d:
#
# coords0: (nstep, natoms0, 1, 3)
# coords1: (nstep, 1, natoms1, 3)
# sij: (nstep, natoms0, natoms1, 3)
# >>> coords0[:,:,None,:] - coords1[:,None,:,:]
#
# If we have arbitrary selections, we cannot use np.tri() to select only
# the upper (or lower) triangle of this "matrix" to skip duplicates (zero
# distance on the main diagonal). Note that if we used tri(), we'd have to
# multiply the histogram by two, b/c now, we always double-count ij and ji
# distances, which seems to be correct (compare w/ VMD).
#
# We can easily create a MemoryError b/c of the temp arrays that numpy
# creates. But even w/ numexpr, which avoids big temp arrays, we store the
# result sij, which is a 4d array. For natoms=100, nstep=1e5, we already
# have a 24 GB array in RAM! The only solution is to code this section
# using Fortran/Cython/whatever in loops:
# * distances
# * apply min_image_convention() (optional)
# * sij -> rij transform
# * redcution to distances
#
# Variable cell
# -------------
# Currently, we allow only fixed cell data b/c then we can use numpy
# broadcasting to convert fractional to cartesian coords. But if we
# implement the distance calculation in Fortran, we can easily allow
# variable cell b/c then, we explicitely loop over time steps and can
# perform the conversion at every step.
#
# Differences to VMD's measure gofr
# =================================
#
# duplicates
# ----------
# In vmd/src/Measure.C, they count the number of identical atoms in both
# selections (variable ``duplicates``). These atoms lead to an r=0 peak in
# the histogram, which is bogus and must be corrected. VMD subtracts these
# number from the first histogram bin, while we simply set it to zero and
# don't count ``duplicates`` at all.
#
# normalization
# -------------
# For normalizing g(r) to account for growing shell volumes around the
# central atom for increasing r, we use the textbook formulas, which lead
# to
#
# norm_fac = volume / volume_shells / (natoms0 * natoms1)
#
# while VMD uses smth similar to
#
# norm_fac = volume / volume_shells / (natoms0 * natoms1 - duplicates)
#
# VMD calculates g(r) using this norm_fac, but the num_int is always
# calculated using the textbook result
#
# I_AB(r1,r2) = int(r=r1,r2) 1/A*s_AB(r)
#
# which is what we do, i.e. just integrate the histogram. That means VMD's
# results are inconsistent if duplicates != 0. In that case g(r) is
# slightly wrong, but num_int is still correct. This is only the case for
# simple all-all correlation (i.e. all atoms are considered the same),
# duplicates = natoms0 = natoms1 = the number of zeros on the distance
# matrix' main diagonal. Then we have a small difference in g(r), where
# VMD's is always a little higher b/c norm_fac is smaller then it should.
#
# As a result, VMD's g(r) -> 1.0 for random points (= ideal gas) and
# num_int -> N (would VMD's g(r) be integrated directly), while our g(r) ->
# < 1.0 (e.g. 0.97) and num_int -> N-1.
#
# rmax
# ----
# VMD has a unique feature that lets you use a higher rmax. VMD extends the
# range of rmax over rmax_auto, up to rmax_vmd=2*sqrt(0.5)*rmax_auto (~
# 14.14 for rmax_auto=10) which is just the length of the vector
# [rmax_auto, rmax_auto], i.e. the radius of a sphere which touches one
# vertice of the box. Then, we have a spherical cap, which partly covers
# the smallest box side (remember that VMD can do orthorhombic boxes only).
# VMD corrects the volume of the shells for normalization in that case for
# rmax_auto < r < rmax_vmd.
#
# For distinct selections, our g(r) and VMD's are exactly the same up to
# rmax_auto. After that, VMD's are correct up to rmax_vmd. At that value,
# VMD sets g(r) and num_int to 0.0.
#
# The problem is: Even if g(r) is normalized correctly for rmax_auto < r <
# rmax_vmd, the num_int in that region will be wrong b/c the integral
# formula must be changed for that region to account for the changed
# normalization factor, which VMD doesn't do, as far as I read the code. If
# I'm wrong, send me an email. All in all, VMD's num_int sould be trusted
# up to rmax_auto, just as in our case. The only advantage is a correctly
# normalized g(r) for rmax_auto < r < rmax_vmd, which is however of little
# use, if the num_int doesn't match.
dup_trajs = False
if amask is None:
amask = [slice(None)]
if tmask is None:
tmask = slice(None)
if type(trajs) != type([]):
trajs = [trajs]
if len(trajs) == 1:
trajs *= 2
dup_trajs = True
if len(amask) == 1:
amask *= 2
trajs = list(map(struct2traj, trajs))
assert len(trajs) == 2, "len(trajs) != 2"
assert len(amask) == 2, "len(amask) != 2"
if not dup_trajs:
assert trajs[0].symbols == trajs[1].symbols, ("symbols differ")
assert trajs[0].coords_frac.ndim == trajs[1].coords_frac.ndim == 3, \
("coords do not both have ndim=3")
assert trajs[0].nstep == trajs[1].nstep, ("nstep differs")
assert (trajs[0].cell == trajs[1].cell).all(), ("cells are not the same")
# special case: amask is string: 'Ca' -> sy=='Ca' bool array
sy = np.array(trajs[0].symbols)
for ii in range(len(amask)):
if type(amask[ii]) == type('x'):
amask[ii] = sy==amask[ii]
clst = [trajs[0].coords_frac[tmask,amask[0],:],
trajs[1].coords_frac[tmask,amask[1],:]]
# Add time axis back if removed after time slice, e.g. if tmask=np.s_[-1]
# (only one step). One could also slice ararys and put them thru the
# Trajectory() machinery again to assert 3d arrays.
for ii in range(len(clst)):
if len(clst[ii].shape) == 2:
clst[ii] = clst[ii][None,...]
assert len(clst[ii].shape) == 3
assert clst[ii].shape[2] == 3
natoms0 = clst[0].shape[1]
natoms1 = clst[1].shape[1]
# assume fixed cell, 2d
cell = trajs[0].cell[0,...]
volume = trajs[0].volume[0]
nstep = clst[0].shape[0]
rmax_auto = rmax_smith(cell)
if rmax == 'auto':
rmax = rmax_auto
bins = np.arange(0, rmax+dr, dr)
rad = bins[:-1]+0.5*dr
volume_shells = 4.0/3.0*pi*(bins[1:]**3.0 - bins[:-1]**3.0)
norm_fac_pre = volume / volume_shells
if nstep * natoms0 * natoms1 * 24.0 / 1e9 > maxmem:
raise Exception("would use more than maxmem=%f GB of memory, "
"try `tmask` to reduce time steps" %maxmem)
# distances
# sij: (nstep, natoms0, natoms1, 3)
sij = clst[0][:,:,None,:] - clst[1][:,None,:,:]
assert sij.shape == (nstep, natoms0, natoms1, 3)
if pbc:
sij = min_image_convention(sij)
# sij: (nstep, atoms0 * natoms1, 3)
sij = sij.reshape(nstep, natoms0*natoms1, 3)
# rij: (nstep, natoms0 * natoms1, 3)
rij = np.dot(sij, cell)
# dists_all: (nstep, natoms0 * natoms1)
dists_all = np.sqrt((rij**2.0).sum(axis=2))
if norm_vmd:
msk = dists_all < 1e-15
dups = [len(np.nonzero(entry)[0]) for entry in msk]
else:
dups = np.zeros((nstep,))
# Not needed b/c bins[-1] == rmax, but doesn't hurt. Plus, test_rpdf.py
# would fail b/c old reference data calculated w/ that setting (difference
# 1%, only the last point differs).
dists_all[dists_all >= rmax] = 0.0
if dmask is not None:
placeholder = '{d}'
if placeholder in dmask:
_dmask = dmask.replace(placeholder, 'dists_all')
else:
_dmask = 'dists_all ' + dmask
dists_all[np.invert(eval(_dmask))] = 0.0
hist_sum = np.zeros(len(bins)-1, dtype=float)
number_integral_sum = np.zeros(len(bins)-1, dtype=float)
# Calculate hists for each time step and average them. This Python loop is
# the bottleneck if we have many timesteps.
for idx in range(int(nstep)):
# rad_hist == bins
hist, rad_hist = np.histogram(dists_all[idx,...], bins=bins)
if bins[0] == 0.0:
hist[0] = 0.0
norm_fac = norm_fac_pre / (natoms0 * natoms1 - dups[idx])
hist_sum += hist * norm_fac
number_integral_sum += 1.0 * np.cumsum(hist) / natoms0
out = np.empty((len(rad), 3))
out[:,0] = rad
out[:,1] = hist_sum / float(nstep)
out[:,2] = number_integral_sum / float(nstep)
return out
[docs]
def call_vmd_measure_gofr(trajfn, dr=None, rmax=None, sel=['all','all'],
fntype='xsf', first=0, last=-1, step=1, usepbc=1,
datafn=None, scriptfn=None, logfn=None, tmpdir=None,
verbose=False):
"""Call VMD's "measure gofr" command. This is a simple interface which does
in fact the same thing as the gofr GUI, only scriptable. Accepts a file
with trajectory data.
Only orthogonal boxes are allowed (like in VMD).
Parameters
----------
trajfn : filename of trajectory which is fed to VMD (e.g. foo.axsf)
dr : float
dr in Angstrom
rmax : float
Max. radius up to which minimum image nearest neighbors are counted.
For cubic boxes of side length L, this is L/2 [AT,MD].
sel : list of two strings, optional
string to select atoms, ["name Ca", "name O"], ["all", "all"], ...,
where sel[0] is selection 1, sel[1] is selection 2 in VMD
fntype : str, optional
file type of `fn` for the VMD "mol" command
first, last, step : int, optional
Select which MD steps are averaged. Like Python, VMD starts counting at
0. Last is -1, like in Python.
usepbc : int {1,0}, optional
Whether to use the minimum image convention.
datafn : str, optional
temp file where VMD results are written to and loaded
scriptfn : str, optional
temp file where VMD tcl input script is written to
logfn : str, optional
file where VMD output is logged
tmpdir : str, optional
dir where auto-generated tmp files are written
verbose : bool, optional
display VMD output
Returns
-------
array (len(rad), 3), colums 0,1,2:
rad : 1d array
radius (x-axis) with spacing `dr`, each value r[i] is the middle of a
histogram bin
hist : 1d array, (len(rad),)
the function values g(r)
num_int : 1d array, (len(rad),)
the (averaged) number integral ``number_density*hist*4*pi*r**2.0*dr``
"""
vmd_tcl = textwrap.dedent("""
# VMD interface script. Call "measure gofr" and write RPDF to file.
# Tested with VMD 1.8.7, 1.9
#
# Automatically generated by pwtools, XXXTIME
#
# Format of the output file (columns):
#
# radius avg(g(r)) avg(number integral)
# [Ang]
# Load molecule file with MD trajectory. Typically, foo.axsf with type=xsf
mol new XXXTRAJFN type XXXFNTYPE waitfor all
# "top" is the current top molecule (the one labeled with "T" in the GUI).
set molid top
set selstr1 "XXXSELSTR1"
set selstr2 "XXXSELSTR2"
set first XXXFIRST
set last XXXLAST
set step XXXSTEP
set delta XXXDR
set rmax XXXRMAX
set usepbc XXXUSEPBC
set sel1 [atomselect $molid "$selstr1"]
set sel2 [atomselect $molid "$selstr2"]
# $result is a list of 5 lists, we only need the first 3
set result [measure gofr $sel1 $sel2 delta $delta rmax $rmax first $first last $last step $step usepbc $usepbc]
set rad [lindex $result 0]
set hist [lindex $result 1]
set num_int [lindex $result 2]
# write to file
set fp [open "XXXDATAFN" w]
foreach r $rad h $hist i $num_int {
puts $fp "$r $h $i"
}
quit
""")
# Skip test if cell is orthogonal, VMD will complain anyway if it isn't
assert None not in [dr, rmax], "`dr` or `rmax` is None"
assert len(sel) == 2
assert fntype == 'xsf', ("only XSF files supported")
if tmpdir is None:
tmpdir = '/tmp'
if datafn is None:
datafn = tempfile.mkstemp(dir=tmpdir, prefix='vmd_data_', text=True)[1]
if scriptfn is None:
scriptfn = tempfile.mkstemp(dir=tmpdir, prefix='vmd_script_', text=True)[1]
if logfn is None:
logfn = tempfile.mkstemp(dir=tmpdir, prefix='vmd_log_', text=True)[1]
dct = {}
dct['trajfn'] = trajfn
dct['fntype'] = fntype
dct['selstr1'] = sel[0]
dct['selstr2'] = sel[1]
dct['first'] = first
dct['last'] = last
dct['step'] = step
dct['dr'] = dr
dct['rmax'] = rmax
dct['usepbc'] = usepbc
dct['datafn'] = datafn
dct['time'] = time.asctime()
for key,val in dct.items():
vmd_tcl = vmd_tcl.replace('XXX'+key.upper(), str(val))
common.file_write(scriptfn, vmd_tcl)
cmd = "vmd -dispdev none -eofexit -e %s " %scriptfn
if verbose:
cmd += "2>&1 | tee %s" %logfn
else:
cmd += " > %s 2>&1" %logfn
out = common.backtick(cmd).strip()
if out != '':
print(out)
data = np.loadtxt(datafn)
return data
[docs]
def vmd_measure_gofr(traj, dr=0.05, rmax='auto', sel=['all','all'], first=0,
last=-1, step=1, usepbc=1,
slicefirst=True, verbose=False, tmpdir=None):
"""Call call_vmd_measure_gofr(), accept Structure / Trajectory as input.
This is intended as a complementary function to rpdf() and should, of
course, produce the "same" results.
Only orthogonal boxes are allowed (like in VMD).
Parameters
----------
traj : Structure or Trajectory
dr : float
dr in Angstrom
rmax : {'auto', float}, optional
Max. radius up to which minimum image nearest neighbors are counted.
For cubic boxes of side length L, this is L/2 [AT,MD].
| 'auto' : the method of [Smith] is used to calculate the max. sphere
| raduis for any cell shape
| float : set value yourself
sel : list of two strings, optional
string to select atoms, ["name Ca", "name O"], ["all", "all"], ...,
where sel[0] is selection 1, sel[1] is selection 2 in VMD
first,last,step : int, optional
Select which MD steps are averaged. Like Python, VMD starts counting at
zero. Last is -1, like in Python.
usepbc : int {1,0}, optional
Whether to use the minimum image convention.
slicefirst : bool, optional
Whether to slice coords here in the wrapper based on first,last,step.
This will write a smaller XSF file, which can save time. In the VMD
script, we always use first=0,last=-1,step=1 in that case.
verbose : bool, optional
display VMD output
tmpdir : str, optional
dir where auto-generated tmp files are written
Returns
-------
array (len(rad), 3), colums 0,1,2:
rad : 1d array
radius (x-axis) with spacing `dr`, each value r[i] is the middle of a
histogram bin
hist : 1d array, (len(rad),)
the function values g(r)
num_int : 1d array, (len(rad),)
the (averaged) number integral ``number_density*hist*4*pi*r**2.0*dr``
"""
# Need to import here b/c of cyclic dependency crys -> io -> crys ...
from pwtools import io
traj = struct2traj(traj)
# Speed: The VMD command "measure gofr" is multithreaded and written in C.
# That's why it is faster than the pure Python rpdf() above when we have to
# average many timesteps. But the writing of the .axsf file here is
# actually the bottleneck and makes this function slower.
if tmpdir is None:
tmpdir = '/tmp'
trajfn = tempfile.mkstemp(dir=tmpdir, prefix='vmd_xsf_', text=True)[1]
cell = traj.cell[0,...]
cc = traj.cryst_const[0,...]
if np.abs(cc[3:] - 90.0).max() > 0.1:
print(cell)
raise Exception("`cell` is not orthogonal, check angles")
rmax_auto = rmax_smith(cell)
if rmax == 'auto':
rmax = rmax_auto
# Slice here and write less to xsf file (speed!). Always use first=0,
# last=-1, step=1 in vmd script.
if slicefirst:
sl = slice(first, None if last == -1 else last+1, step)
traj2 = Trajectory(coords_frac=traj.coords_frac[sl,...],
cell=cell,
symbols=traj.symbols)
first = 0
last = -1
step = 1
else:
traj2 = traj
io.write_axsf(trajfn, traj2)
ret = call_vmd_measure_gofr(trajfn, dr=dr, rmax=rmax, sel=sel,
fntype='xsf', first=first,
last=last, step=step, usepbc=usepbc,
verbose=verbose,tmpdir=tmpdir)
return ret
[docs]
def distances(struct, pbc=False, squared=False, fullout=False):
"""
Wrapper for _flib.distsq_frac(). Calculate distances of all atoms in
`struct`.
Parameters
----------
struct : Structure instance
pbc : bool, optional
Apply PBC wrapping to distances (minimum image distances)
squared : bool, optional
Return squared distances
fullout : bool
See below
Returns
-------
dists : if fullout=False
dists, distvecs, distvecs_frac : if fullout=True
dists : 2d array (natoms, natoms)
(Squared, see `squared` arg) distances. Note that ``dists[i,j] ==
dists[j,i]``.
distvecs : (natoms,natoms,3)
Cartesian distance vectors.
distvecs_frac : (natoms,natoms,3)
Fractional distance vectors.
"""
# numpy version (10x slower):
#
# cf = struct.coords_frac
# cell = struct.cell
# distvecs_frac = cf[:,None,:] - cf[None,:,:]
# if pbc:
# distvecs_frac = min_image_convention(distvecs_frac)
# distvecs = np.dot(distvecs_frac, cell)
# distsq = (distvecs**2.0).sum(axis=2)
# dists = np.sqrt(distsq)
nn = struct.natoms
distsq = fempty((nn,nn))
distvecs = fempty((nn,nn,3))
distvecs_frac = fempty((nn,nn,3))
_flib.distsq_frac(coords_frac=struct.coords_frac,
cell=struct.cell,
pbc=int(pbc),
distsq=distsq,
distvecs=distvecs,
distvecs_frac=distvecs_frac)
dists = distsq if squared else np.sqrt(distsq)
if fullout:
return dists, distvecs, distvecs_frac
else:
del distvecs
del distvecs_frac
return dists
[docs]
def distances_traj(traj, pbc=False):
"""Cartesian distances along a trajectory.
Wrapper for _flib.distances_traj().
Parameters
----------
traj : Trajectory
pbc : bool
Use minimum image distances.
Returns
-------
dists : (nstep, natoms, natoms)
"""
nn = traj.natoms
dists = fempty((traj.nstep,nn,nn))
_flib.distances_traj(coords_frac=np.asarray(traj.coords_frac, order='F'),
cell=np.asarray(traj.cell, order='F'),
pbc=int(pbc),
dists=dists)
return dists
[docs]
def angles(struct, pbc=False, mask_val=999.0, deg=True):
"""
Wrapper for _flib.angles(), which accepts a Structure.
Calculate all angles between atom triples in `struct`.
Parameters
----------
struct : Structure instance
pbc : bool, optional
Apply PBC wrapping to distances (minimum image distances)
mask_val : float
Fill value for ``anglesijk[ii,jj,kk]`` where ``ii==jj`` or ``ii==kk``
or ``jj==kk``, i.e. no angle defined. Can be used to create bool mask
arrays in numpy. Should be outside of [-1,1] (``deg=False``) or [0,180]
(``deg=True``).
deg : bool
Return angles in degree (True) or cosine values (False).
Returns
-------
anglesijk : 3d array (natoms,natoms,natoms)
All angles. See also `mask_val`.
Examples
--------
>>> natoms = struct.natoms
>>> mask_val = 999
>>> anglesijk = crys.angles(struct, mask_val=mask_val)
>>> # angleidx holds all ii,jj,kk triples which we would get from:
>>> angleidx = []
... for ii in range(natoms):
... for jj in range(natoms):
... for kk in range(natoms):
... if (ii != jj) and (ii != kk) and (jj != kk):
... angleidx.append([ii,jj,kk])
>>> # which is the same as
>>> angleidx2 = [x for x in itertools.permutations(range(natoms),3)]
>>> # or
>>> angleidx3 = np.array(zip(*(anglesijk != mask_val).nonzero()))
>>> # the number of valid angles
>>> len(angleidx) == natoms * (natoms - 1) * (natoms - 2)
>>> len(angleidx) == factorial(natoms) / factorial(natoms - 3)
>>> # angles in 1d array for histogram or whatever
>>> angles1d = anglesijk[anglesijk != mask_val]
>>> y,x = np.histogram(angles1d, bins=100)
>>> plot(x[:-1]+0.5*(x[1]-x[0]), y)
"""
if deg:
assert not (0 <= mask_val <= 180), "mask_val must be outside [0,180]"
else:
assert not (-1 <= mask_val <= 1), "mask_val must be outside [-1,1]"
nn = struct.natoms
dists, distvecs, distvecs_frac = distances(struct, pbc=pbc, squared=False,
fullout=True)
del distvecs_frac
anglesijk = fempty((nn,nn,nn))
_flib.angles(distvecs=distvecs,
dists=dists,
mask_val=mask_val,
deg=int(deg),
anglesijk=anglesijk)
return anglesijk
[docs]
def nearest_neighbors_from_dists(dists, symbols, idx=None, skip=None,
cutoff=None, num=None, pbc=True,
sort=True, fullout=False):
"""Core part of nearest_neighbors(), which accepts pre-calculated
distances.
Can be more efficient in loops where many different
nearest neighbors should be calculated from the same distances.
Parameters
----------
dists : 2d array (natoms,natoms)
Cartesian distances (see distances()).
symbols : sequence of strings (natoms,)
Atom symbols, i.e. struct.symbols
Rest see nearest_neighbors().
"""
assert idx is not None, "idx is None"
assert None in [num,cutoff], "use either num or cutoff"
# dists: distance matrix (natoms, natoms), each row or col is sorted like
# struct.symbols
#
# dist from atom `idx` to all atoms, same as dists[idx,:] b/c `dist` is
# symmetric
dist1d = dists[:,idx]
# order by distance, `idx` first with dist=0
idx_lst_sort = np.argsort(dist1d)
dist1d_sort = dist1d[idx_lst_sort]
symbols_sort = np.array(symbols)[idx_lst_sort]
skip = common.asseq(skip)
if skip != [None]:
msk = symbols_sort == skip[0]
for item in skip[1:]:
msk = msk | (symbols_sort == item)
only_msk = np.invert(msk)
else:
only_msk = np.ones((len(symbols_sort),), dtype=bool)
if cutoff is None:
# ``1:`` : central atom excluded
cut_msk = np.s_[1:num+1]
ret_idx = idx_lst_sort[only_msk][cut_msk]
else:
cut_msk = (dist1d_sort > 0) & (dist1d_sort < cutoff)
ret_idx = idx_lst_sort[cut_msk & only_msk]
if not sort:
orig_idx = np.arange(len(dist1d))
ret_idx = orig_idx[match_mask(orig_idx,ret_idx)]
if fullout:
return ret_idx, dist1d[ret_idx]
else:
return ret_idx
[docs]
def nearest_neighbors(struct, idx=None, skip=None, cutoff=None, num=None, pbc=True,
sort=True, fullout=False):
"""Indices of the nearest neighbor atoms to atom `idx`, skipping atoms
whose symbols are `skip`.
Parameters
----------
struct : Structure
idx : int
Atom index of the central atom.
skip : str or sequence of strings
Symbol(s) of the atoms to skip.
num : int
number of requested nearest neighbors
cutoff : float
Cutoff radius in unit defined in `struct`, e.g. Angstrom. Return all
neighbors within that radius. Use either `num` of `cutoff`.
pbc : bool
Apply PBC to distances.
sort : bool
Sort `nn_idx` and `nn_dist` by distance.
fullout : bool
See below.
Returns
-------
nn_idx : fullout=False
nn_idx,nn_dist : fullout=True
nn_idx : 1d array
Indices into struct.symbols / coords.
nn_dist : 1d array
Distances ordered as in `nn_idx`.
See Also
--------
num.match_mask
Notes
-----
`num` : Depending on `struct`, there may not be `num` nearest neighbors,
especially if you use `skip` to leave certain species out. Then the
number of returned indices may be less then `num`.
Ordering : If ``sort=True``, then returnd indices `nn_idx` and distances
`nn_dist` are sorted small -> high. If ``sort=False``, then they are in the
same order as the symbols in ``struct.symbols``.
For structs with high symmetry (i.e. bulk crystals) where many
nearest neighbors have the same distance from the central atom, the
ordering of depends on how ``numpy.argsort`` sorts equal values in an
array.
Examples
--------
>>> ni=nearest_neighbors(struct, idx=struct.symbols.index('Ca'), num=6, skip='H')
>>> ni=nearest_neighbors(struct, idx=23, cutoff=5.3, skip=['H','Cl'])
>>> # simple rock salt example (used ASE to build dummy struct)
>>> from ase import lattice
>>> at=lattice.bulk('AlN', a=4, crystalstructure='rocksalt')
>>> st=crys.atoms2struct(at); st=crys.scell(st,(2,2,2))
>>> ni,nd=crys.nearest_neighbors(st, idx=0, num=8, fullout=True)
>>> ni
array([ 9, 10, 11, 12, 13, 14, 1, 2])
>>> nd
[ 2. 2. 2. 2. 2. 2. 2.82842712 2.82842712]
>>> # Use `ni` or bool array created from that for indexing
>>> array(st.symbols)[ni]
array(['Al', 'Al', 'N', 'N', 'N', 'N', 'N', 'N'], dtype='|S2')
>>> msk=num.match_mask(arange(st.natoms), ni)
>>> array(st.symbols)[msk]
array(['Al', 'Al', 'N', 'N', 'N', 'N', 'N', 'N'], dtype='|S2')
>>> # If you have many different symbols to skip and you don't want to type
>>> # a longish `skip` list, then use smth like this to include only 'O'
>>> # for example
>>> symbols=['Ca', 'Cl', 'Cl'] + ['O']*10 + ['H']*20
>>> skip=filter(lambda x: x!='O', set(symbols))
>>> ['H', 'Ca', 'Cl']
"""
# Distance matrix (natoms, natoms). Each row or col is sorted like
# struct.symbols. If used in loops over trajs, the distances() call is the
# most costly part, even though coded in Fortran.
dists = distances(struct, pbc=pbc)
return nearest_neighbors_from_dists(dists=dists, symbols=struct.symbols, idx=idx,
skip=skip, cutoff=cutoff, num=num,
sort=sort, fullout=fullout)
[docs]
def nearest_neighbors_struct(struct, **kwds):
"""Return Structure with only nearest neighbors.
Calls ``nearest_neighbors()`` and takes the same arguments. The returned
Structure contains the central atom set by the `idx` keyword to
nearest_neighbors().
Examples
--------
>>> from pwtools import crys, visualize
>>> st = crys.nearest_neighbors_struct(struct, cutoff=3.3, skip='H')
>>> visualize.view_avogadro(st)
"""
ni = nearest_neighbors(struct, **kwds)
# include `idx` atom
ni = np.concatenate((ni, [kwds['idx']]))
msk = num.match_mask(np.arange(struct.natoms), ni)
new_struct = Structure(coords_frac=struct.coords_frac[msk,:],
cell=struct.cell,
symbols=np.array(struct.symbols)[msk].tolist())
return new_struct
[docs]
def center_on_atom(obj_in, idx=None, copy=True):
"""Shift all coords in `obj` such that the atom with index `idx` is at the
center of the cell: [0.5,0.5,0.5] fractional coords.
"""
assert idx is not None, ("provide atom index")
obj = obj_in.copy() if copy else obj_in
obj.coords = None
# [...,idx,:] works for (natoms,3) and (nstep,natoms,3) -- numpy rocks!
obj.coords_frac = obj.coords_frac - obj.coords_frac[...,idx,:][...,None,:] + 0.5
obj.set_all()
return obj
#-----------------------------------------------------------------------------
# Container classes for crystal structures and trajectories.
#-----------------------------------------------------------------------------
[docs]
class UnitsHandler(FlexibleGetters):
"""Base class for :class:`Structure`, providing unit conversion methods."""
[docs]
def __init__(self):
# XXX cryst_const is not in 'length' and needs to be treated specially,
# see _apply_units_raw()
# map physical quantity to variable names in Structure/Trajectory
self.units_map = \
{'length': ['cell', 'coords', 'abc'],
'energy': ['etot', 'ekin'],
'stress': ['stress'],
'forces': ['forces'],
'temperature': ['temperature'],
'velocity': ['velocity'],
'time': ['timestep'],
}
self._default_units = dict([(key, 1.0) for key in self.units_map.keys()])
self.units_applied = False
# Init all unit factors in self.units to 1.0
self.units = self._default_units.copy()
def _apply_units_raw(self):
"""Only used by derived classes. Apply unit factors to all attrs in
self.units_map."""
assert not self.units_applied, ("_apply_units_raw() already called")
# XXX special-case cryst_const for trajectory case here (ndim = 2), it
# would be better to split cryst_const into self.abc and self.angles or
# so, but that would break too much code, BUT we could just add
# backward compat get_cryst_const, which concatenates these ...
if self.is_set_attr('cryst_const'):
cc = self.cryst_const.copy()
if cc.ndim == 1:
cc[:3] *= self.units['length']
elif cc.ndim == 2:
cc[:,:3] *= self.units['length']
else:
raise Exception("self.cryst_const has ndim != [1,2]")
self.cryst_const = cc
for unit, lst in self.units_map.items():
if self.units[unit] != 1.0:
for attr_name in lst:
if self.is_set_attr(attr_name):
attr = getattr(self, attr_name)
setattr(self, attr_name, attr * self.units[unit])
self.units_applied = True
[docs]
def apply_units(self):
"""Like _apply_units_raw(), make sure that units are only applied once."""
if not self.units_applied:
self._apply_units_raw()
[docs]
def update_units(self, units):
"""Update self.units dict from `units`. All units not contained in
`units` remain at the default (1.0), see self._default_units.
Parameters
----------
units : dict, {'length': 5, 'energy': 30, ...}
"""
if units is not None:
all_units = list(self.units_map.keys())
for key in list(units.keys()):
if key not in all_units:
raise Exception("unknown unit: %s" %str(key))
self.units.update(units)
[docs]
class Structure(UnitsHandler):
"""Container class for representing a single crystal structure (unit
cell + atoms).
Derived classes may add attributes and getters but the idea is that this
class is the minimal API for how to pass an atomic structure around.
Units are supposed to be similar to ASE:
=========== ============== ===============================
what unit SI
=========== ============== ===============================
length Angstrom (1e-10 m)
energy eV (1.602176487e-19 J)
forces eV / Angstrom
stress GPa (not eV/Angstrom**3)
temperature K
velocity Angstrom / fs
time fs (1e-15 s)
mass amu (1.6605387820000001e-27 kg)
=========== ============== ===============================
Unit conversion factors, which are applied to input arguments for
conversion to the above units can be given by the `units` input keyword.
Note that we cannot verify the unit of input args to the constructor, but
all functions in this package, which use Structure / Trajectory as
container classes, assume these units.
This class is very much like ase.Atoms, but without the "calculators".
You can use :meth:`get_ase_atoms` to get an Atoms object or
:meth:`get_fake_ase_atoms` for a minimal Atoms-like object.
Examples
--------
>>> symbols=['N', 'Al', 'Al', 'Al', 'N', 'N', 'Al']
>>> coords_frac=rand(len(symbols),3)
>>> cryst_const=np.array([5,5,5,90,90,90.0])
>>> st=Structure(coords_frac=coords_frac,
... cryst_const=cryst_const,
... symbols=symbols)
>>> st.symbols
['N', 'Al', 'Al', 'Al', 'N', 'N', 'Al']
>>> st.symbols_unique
['Al', 'N']
>>> st.order
{'Al': 1, 'N': 2}
>>> st.typat
[2, 1, 1, 1, 2, 2, 1]
>>> st.znucl_unique
[13, 7]
>>> st.nspecies
{'Al': 4, 'N': 3}
>>> st.coords
array([[ 1.1016541 , 4.52833103, 0.57668453],
[ 0.18088339, 3.41219704, 4.93127985],
[ 2.98639824, 2.87207221, 2.36208784],
[ 2.89717342, 4.21088541, 3.13154023],
[ 2.28147351, 2.39398397, 1.49245281],
[ 3.16196033, 3.72534409, 3.24555934],
[ 4.90318748, 2.02974457, 2.49846847]])
>>> st.coords_frac
array([[ 0.22033082, 0.90566621, 0.11533691],
[ 0.03617668, 0.68243941, 0.98625597],
[ 0.59727965, 0.57441444, 0.47241757],
[ 0.57943468, 0.84217708, 0.62630805],
[ 0.4562947 , 0.47879679, 0.29849056],
[ 0.63239207, 0.74506882, 0.64911187],
[ 0.9806375 , 0.40594891, 0.49969369]])
>>> st.cryst_const
array([ 5., 5., 5., 90., 90., 90.])
>>> st.cell
array([[ 5.00000000e+00, 0.00000000e+00, 0.00000000e+00],
[ 3.06161700e-16, 5.00000000e+00, 0.00000000e+00],
[ 3.06161700e-16, 3.06161700e-16, 5.00000000e+00]])
>>> st.get_ase_atoms(pbc=True)
Atoms(symbols='NAl3N2Al', positions=..., cell=[[2.64588604295, 0.0, 0.0],
[1.6201379367036871e-16, 2.64588604295, 0.0], [1.6201379367036871e-16,
1.6201379367036871e-16, 2.64588604295]], pbc=[True, True, True])
"""
# attrs_nstep arrays have shape (nstep,...), i.e time along `timeaxis`
timeaxis = 0
is_traj = False
is_struct = True
[docs]
def __init__(self, set_all_auto=True, units=None, **kwds):
"""
Parameters
----------
coords : (natoms, 3) [Ang]
Cartesian coords.
Optional if `coords_frac` given.
coords_frac : (natoms, 3)
Fractional coords w.r.t. `cell`.
Optional if `coords` given.
symbols : sequence of strings (natoms,)
atom symbols
cell : (3,3)
Unit cell vectors as rows. [Ang]
Optional if `cryst_const` given.
cryst_const : (6,)
[a,b,c,alpha,beta,gamma]; a,b,c in [Ang]
Optional if `cell` given.
forces : (natoms, 3), optional
[eV/Ang]
stress : (3,3), optional
stress tensor [GPa]
etot : float, optional
total energy [eV]
units : optional, dict,
see :class:`UnitsHandler`
set_all_auto : optional, bool
Call :meth:`set_all` in :meth:`__init__`.
Only Trajectory
ekin : (nstep,)
[eV]
forces : (nstep,natoms,3)
[eV/Ang]
pressure : (nstep,)
[GPa]
stress : (nstep,3,3)
[GPa]
temperature : (nstep,)
[K]
timestep : float
[fs]
velocity : (nstep, natoms, 3)
[Ang/fs]
volume : (nstep,)
[Ang^3]
Notes
-----
cell, cryst_const : Provide either `cell` or `cryst_const`, or both
(which is redundant). If only one is given, the other is calculated
from it. See {cell2cc,cc2cell}.
coords, coords_frac : Provide either `coords` or `coords_frac`, or both
(which is redundant). If only one is given, the other is calculated
from it. See coord_trans().
"""
# accepted by input, some derived if not given
self.input_attr_lst = [
'cell',
'coords',
'coords_frac',
'cryst_const',
'ekin',
'etot',
'forces',
'pressure',
'stress',
'symbols',
'temperature',
'timestep',
'velocity',
'volume',
]
# not as input, only derived from input attrs
self.derived_attr_lst = [
'mass',
'mass_unique',
'natoms',
'nspecies',
'nstep',
'ntypat',
'order',
'symbols_unique',
'typat',
'time',
'znucl',
'znucl_unique',
]
# If these are given as input args, then they must be 3d.
self.attrs_nstep_3d = [
'coords',
'coords_frac',
'stress',
'forces',
'velocity',
'cell', # can be 2d, see _extend()
]
self.attrs_nstep_2d = [
'cryst_const', # can be 1d, see _extend()
]
self.attrs_nstep_1d = [
'pressure',
'volume',
'etot',
'ekin',
'temperature',
'time',
]
self.attrs_only_traj = [
'nstep',
'timestep',
'time',
'ekin',
'velocity',
'temperature',
]
self.attrs_nstep = self.attrs_nstep_3d + self.attrs_nstep_2d + \
self.attrs_nstep_1d
self.attrs_nstep_2d_3d = self.attrs_nstep_3d + self.attrs_nstep_2d
# init all in self.attr_lst to None
self.attr_lst = self.input_attr_lst + self.derived_attr_lst
self.init_attr_lst()
# hackish but virtually no overhead here: create Structure by deleting
# stuff in attributes lists
if self.is_struct:
del self.attrs_nstep
for name in self.attrs_only_traj:
# while: for some reason list.remove() doesn't always work
while name in self.attr_lst:
self.attr_lst.pop(self.attr_lst.index(name))
super(Structure, self).__init__()
self.np_array_t = type(np.array([1]))
# for iteration
self._index = -1
# initialize the self.units dictionary with unit conversion factors,
# used in self.apply_units()
self.update_units(units)
self.set_all_auto = set_all_auto
# assign input args, overwrite default None
# self.foo = foo
# self.bar = bar
# ...
for name in list(kwds.keys()):
assert name in self.input_attr_lst, \
"illegal input arg: '%s', allowed: %s" %(name, str(self.input_attr_lst))
# cell can be 2d and will be treated by _extend() later
if self.is_traj and (name in self.attrs_nstep_3d) and \
name != 'cell':
assert kwds[name].ndim == 3, "input '%s' is not 3d" %name
setattr(self, name, kwds[name])
# calculate all missing attrs if requested, their units are based on
# the ones set above
if self.set_all_auto:
self.set_all()
[docs]
def set_all(self):
"""Extend arrays, apply units, call all getters."""
self._extend_arrays_apply_units()
super(Structure, self).set_all()
def _extend_arrays_apply_units(self):
self.apply_units()
if self.is_traj:
self._extend()
def _extend(self):
if self.check_set_attr('nstep'):
if self.is_set_attr('cell'):
self.cell = self._extend_cell(self.cell)
if self.is_set_attr('cryst_const'):
self.cryst_const = self._extend_cc(self.cryst_const)
def _extend_array(self, arr, nstep=None):
if nstep is None:
self.assert_set_attr('nstep')
nstep = self.nstep
return num.extend_array(arr, nstep, axis=self.timeaxis)
def _extend_cell(self, cell):
if cell is None:
return cell
if cell.shape == (3,3):
return self._extend_array(cell)
elif cell.shape == (1,3,3):
return self._extend_array(cell[0,...])
else:
return cell
def _extend_cc(self, cc):
if cc is None:
return cc
if cc.shape == (6,):
return self._extend_array(cc)
elif cc.shape == (1,6):
return self._extend_array(cc[0,...])
else:
return cc
[docs]
def compress(self, forget=['forces', 'stress',
'coords','cryst_const'], dtype=np.float32):
"""Compress Trajectory by deleting unused or redundant attrs (see
`forget`). Cast float arrays to `dtype`. float32 is usually quite OK
for MD data.
Parameters
----------
forget : list
Names of attributes to delete. They will be set to None.
dtype : numpy dtype
"""
for name in self.attr_lst:
if name in forget:
setattr(self, name, None)
else:
attr = getattr(self, name)
if (type(attr) == self.np_array_t) and (attr.dtype.kind == 'f') and \
attr.dtype != dtype:
setattr(self, name, attr.astype(dtype))
[docs]
def copy(self):
"""Return a copy of the inctance."""
if self.is_struct:
obj = Structure(set_all_auto=False)
elif self.is_traj:
obj = Trajectory(set_all_auto=False)
# Copy attrs over
for name in self.attr_lst:
val = getattr(self, name)
if val is None:
setattr(obj, name, None)
# dict.copy() is shallow, use deepcopy instead
elif hasattr(val, 'copy') and not isinstance(val, dict):
setattr(obj, name, val.copy())
else:
setattr(obj, name, copy.deepcopy(val))
return obj
[docs]
def get_velocity(self):
"""Calculate `velocity` from `coords` and `timestep` if
`velocity=None`.
"""
if self.is_struct:
raise NotImplementedError("only in Trajectory")
if not self.is_set_attr('velocity'):
if self.check_set_attr_lst(['coords', 'timestep']):
return velocity_traj(self.coords, dt=self.timestep, axis=0,
endpoints=True)
else:
return None
else:
return self.velocity
[docs]
def get_ekin(self):
""" ekin [eV] """
if self.is_struct:
raise NotImplementedError("only in Trajectory")
if not self.is_set_attr('ekin'):
if self.check_set_attr_lst(['mass', 'velocity']):
# velocity [Ang/fs], mass [amu]
vv = self.velocity
mm = self.mass
amu = constants.amu # kg
fs = constants.fs
eV = constants.eV
assert self.timeaxis == 0
return ((vv**2.0).sum(axis=2)*mm[None,:]/2.0).sum(axis=1) * (Angstrom/fs)**2 * amu / eV
else:
return None
else:
return self.ekin
[docs]
def get_temperature(self):
""" [K] """
if self.is_struct:
raise NotImplementedError("only in Trajectory")
if not self.is_set_attr('temperature'):
if self.check_set_attr_lst(['ekin', 'natoms']):
return self.ekin * constants.eV / self.natoms / constants.kb * (2.0/3.0)
else:
return None
else:
return self.temperature
[docs]
def get_natoms(self):
if self.is_traj:
axis = 1
else:
axis = 0
if self.is_set_attr('symbols'):
return len(self.symbols)
elif self.is_set_attr('coords'):
return self.coords.shape[axis]
elif self.is_set_attr('coords_frac'):
return self.coords_frac.shape[axis]
else:
return None
[docs]
def get_coords(self):
if self.is_struct:
if not self.is_set_attr('coords'):
if self.is_set_attr('coords_frac') and self.check_set_attr('cell'):
return np.dot(self.coords_frac, self.cell)
else:
return None
else:
return self.coords
else:
if not self.is_set_attr('coords'):
if self.is_set_attr('coords_frac') and \
self.check_set_attr_lst(['cell', 'natoms']):
nstep = self.coords_frac.shape[self.timeaxis]
req_shape_coords_frac = (nstep,self.natoms,3)
assert self.coords_frac.shape == req_shape_coords_frac, ("shape "
"mismatch: coords_frac: %s, need: %s" %(str(self.coords_frac.shape),
str(req_shape_coords_frac)))
assert self.cell.shape == (nstep,3,3), ("shape mismatch: "
"cell: %s, coords_frac: %s" %(self.cell.shape, self.coords_frac.shape))
return _flib.frac2cart_traj(self.coords_frac, self.cell)
else:
return None
else:
return self.coords
[docs]
def get_coords_frac(self):
if self.is_struct:
if not self.is_set_attr('coords_frac'):
if self.is_set_attr('coords') and self.check_set_attr('cell'):
return _flib.cart2frac(self.coords, self.cell)
else:
return None
else:
return self.coords_frac
else:
if not self.is_set_attr('coords_frac'):
if self.is_set_attr('coords') and \
self.check_set_attr_lst(['cell', 'natoms']):
nstep = self.coords.shape[self.timeaxis]
req_shape_coords = (nstep,self.natoms,3)
assert self.coords.shape == req_shape_coords, ("shape "
"mismatch: coords: %s, need: %s" %(str(self.coords.shape),
str(req_shape_coords)))
assert self.cell.shape == (nstep,3,3), ("shape mismatch: "
"cell: %s, coords: %s" %(self.cell.shape, self.coords.shape))
return _flib.cart2frac_traj(self.coords, self.cell)
else:
return None
else:
return self.coords_frac
[docs]
def get_volume(self):
if not self.is_set_attr('volume'):
if self.check_set_attr('cell'):
if self.is_traj:
return volume_cell3d(self.cell, axis=self.timeaxis)
else:
return volume_cell(self.cell)
else:
return None
else:
return self.volume
[docs]
def get_cell(self):
if not self.is_set_attr('cell'):
if self.is_set_attr('cryst_const'):
if self.is_traj:
cc = self._extend_cc(self.cryst_const)
return cc2cell3d(cc, axis=self.timeaxis)
else:
return cc2cell(self.cryst_const)
else:
return None
else:
return self.cell
[docs]
def get_cryst_const(self):
if not self.is_set_attr('cryst_const'):
if self.is_set_attr('cell'):
if self.is_traj:
cell = self._extend_cell(self.cell)
return cell2cc3d(cell, axis=self.timeaxis)
else:
return cell2cc(self.cell)
else:
return None
else:
return self.cryst_const
[docs]
def get_pressure(self):
if not self.is_set_attr('pressure'):
if self.check_set_attr('stress'):
if self.is_traj:
assert self.timeaxis == 0
return np.trace(self.stress,axis1=1, axis2=2)/3.0
else:
return np.trace(self.stress)/3.0
else:
return None
else:
return self.pressure
[docs]
def get_time(self):
if self.is_struct:
raise NotImplementedError("only in Trajectory")
else:
if self.check_set_attr_lst(['timestep', 'nstep']):
return np.linspace(0, (self.nstep-1)*self.timestep, self.nstep)
else:
return None
[docs]
def get_timestep(self):
if self.is_struct:
raise NotImplementedError("only in Trajectory")
else:
return self.timestep
[docs]
def get_nstep(self):
if self.is_struct:
raise NotImplementedError("only in Trajectory")
if self.is_set_attr('coords'):
return self.coords.shape[self.timeaxis]
elif self.is_set_attr('coords_frac'):
return self.coords_frac.shape[self.timeaxis]
else:
return None
[docs]
def get_symbols(self):
"""List of atomic symbols."""
return self.symbols
[docs]
def get_forces(self):
"""Forces."""
return self.forces
[docs]
def get_stress(self):
"""Stress tensor"""
return self.stress
[docs]
def get_etot(self):
"""Total anergy."""
return self.etot
[docs]
def get_symbols_unique(self):
"""List of unique atom symbols.
``[Al,N]`` if ``symbols=['Al']*10 + ['N']*10``.
``len(self.symbols_unique)`` = number of atomic species"""
return np.unique(self.symbols).tolist() if \
self.check_set_attr('symbols') else None
[docs]
def get_order(self):
"""Dict which maps ``symbols_unique`` to numbers, starting at 1.
``{'Al': 1, 'N':2, 'O': 3, 'Si': 4}`` for ``symbols=['Al']*5 + ['N']*5
+ ['O']*10 + ['Si']*20``.
Can be used in mapping a atom "type" number to a symbol (e.g. in
LAMMPS)."""
if self.check_set_attr('symbols_unique'):
return dict([(sym, num+1) for num, sym in
enumerate(self.symbols_unique)])
else:
return None
[docs]
def get_typat(self):
"""List of atom type integers in ``self.order``, same length as
`symbols`.
``[1]*10 + [2]*10`` for ````symbols=['Al']*10 + ['N']*10``.
"""
if self.check_set_attr_lst(['symbols', 'order']):
return [self.order[ss] for ss in self.symbols]
else:
return None
[docs]
def get_znucl_unique(self):
"""Unique atomic numbers.
``[13,7]`` for ``symbols = ['Al','Al','N',N']``.
"""
if self.check_set_attr('symbols_unique'):
return [atomic_data.numbers[sym] for sym in self.symbols_unique]
else:
return None
[docs]
def get_znucl(self):
"""All atomic numbers.
``[13,13,7,7]`` for ``symbols = ['Al','Al','N',N']``.
"""
if self.check_set_attr('symbols'):
return [atomic_data.numbers[sym] for sym in self.symbols]
else:
return None
[docs]
def get_ntypat(self):
"""Number of atomic species.
2 for ``symbols=['Al','Al','N',N']``.
"""
if self.check_set_attr('symbols_unique'):
return len(self.symbols_unique)
else:
return None
[docs]
def get_nspecies(self):
"""Dict with number of atoms per species."""
if self.check_set_attr_lst(['order', 'typat']):
return dict([(sym, self.typat.count(idx)) for sym, idx in
self.order.items()])
else:
return None
[docs]
def get_mass(self):
"""1D array of atomic masses in amu (atomic mass unit 1.660538782e-27
kg as in periodic table). The order is the one from self.symbols."""
if self.check_set_attr('symbols'):
return np.array([atomic_data.pt[sym]['mass'] for sym in
self.symbols])
else:
return None
[docs]
def get_mass_unique(self):
if self.check_set_attr('znucl_unique'):
return np.array([atomic_data.masses[z] for z in self.znucl_unique])
else:
return None
[docs]
def get_ase_atoms(self, **kwds):
"""Return ASE Atoms object.
Obviously, you must have ASE installed. We use
``scaled_positions=self.coords_frac``, so only ``self.cell`` must be in
[Ang].
Parameters
----------
**kwds :
additional keywords passed to the Atoms() constructor.
See Also
--------
:meth:`get_fake_ase_atoms`
Notes
-----
By default, we use ``Atoms(...,pbc=False)`` to avoid pbc-wrapping
``atoms.scaled_positions`` (we don't want that for MD structures, for
instance). If you need the pbc flag in your Atoms object, then use::
>>> # Note that the `pbc` flag is passed to ase.Atoms, so you can use
>>> # whatever that accepts, like pbc=[1,1,1] etc.
>>> atoms=struct.get_ase_atoms(pbc=True)
>>> # or
>>> atoms=struct.get_ase_atoms()
>>> atoms.set_pbc(True)
but then, ``scaled_positions`` will be wrapped by ASE and I'm not sure
if ``atoms.positions`` is updated in that case. Please test that -- I
don't use ASE much.
"""
req = ['coords_frac', 'cell', 'symbols']
if self.check_set_attr_lst(req):
# We don't wanna make ase a dependency. Import only when needed.
from ase import Atoms
_kwds = {'pbc': False}
_kwds.update(kwds)
at = Atoms(symbols=self.symbols,
scaled_positions=self.coords_frac,
cell=self.cell,
**_kwds)
return at
else:
return None
[docs]
def get_fake_ase_atoms(self):
""":class:`FakeASEAtoms` instance representing this Structure."""
return FakeASEAtoms(scaled_positions=self.get_coords_frac(),
cell=self.get_cell(),
symbols=self.get_symbols())
[docs]
def get_traj(self, nstep):
"""Return a Trajectory object, where this Structure is copied `nstep`
times."""
tr = Trajectory(set_all_auto=False)
for attr_name in self.attr_lst:
attr = getattr(self, attr_name)
if attr is None:
new_attr = None
elif attr_name in tr.attrs_nstep:
if type(attr) == self.np_array_t:
new_attr = num.extend_array(attr, nstep, axis=self.timeaxis)
else:
new_attr = np.array([attr]*nstep)
else:
new_attr = copy.deepcopy(attr)
setattr(tr, attr_name, new_attr)
# re-calculate nstep
tr.nstep = None
tr.set_all()
return tr
[docs]
def get_spglib(self):
"""Return spglib input tuple (cell, coords_frac, znucl)."""
return (self.cell, self.coords_frac, self.znucl)
[docs]
class Trajectory(Structure):
"""Like :class:`Structure`, but all attrs in `attrs_nstep` have a timeaxis
along axis=0 and length `nstep`:
=========== ============ ================
attribute Structure Trajectory
=========== ============ ================
coords (nstoms,3) (nstep,natoms,3)
coords_frac (nstoms,3) (nstep,natoms,3)
forces (nstoms,3) (nstep,natoms,3)
velocity -- (nstep,natoms,3)
cryst_const (6,) (nstep,6)
cell (3,3) (nstep,3,3)
stress (3,3) (nstep,3,3)
etot scalar (nstep,)
volume scalar (nstep,)
pressure scalar (nstep,)
ekin -- (nstep,)
temperature -- (nstep,)
time -- (nstep,)
=========== ============ ================
Also, we have additional attrs which are only defined for
:class:`Trajectory`, see `attrs_only_traj`:
| nstep
| timestep
| time
| ekin
| velocity
| temperature
"""
is_traj = True
is_struct = False
[docs]
def __init__(self, *args, **kwds):
super(Trajectory, self).__init__(*args, **kwds)
def __iter__(self):
return self
[docs]
def __getitem__(self, idx):
want_traj = False
if isinstance(idx, slice):
obj = Trajectory(set_all_auto=False)
timestep_fac = idx.step if idx.step is not None else 1.0
want_traj = True
else:
obj = Structure(set_all_auto=False)
timestep_fac = None
for name in self.attr_lst:
if not want_traj and name in self.attrs_only_traj:
continue
attr = getattr(self, name)
if attr is not None:
if name in self.attrs_nstep:
# the timeaxis check may be a problem for parsed MD data
# where some arrays are 1 or two steps longer/shorter than
# coords (from which we get nstep), for example lammps:
# temperature, volume, etc can be longer if multiple runs
# are done from the same input file and the parser
# currently doesn't handle that
if name in self.attrs_nstep_2d_3d \
and attr.shape[self.timeaxis] == self.nstep:
setattr(obj, name, attr[idx,...])
elif name in self.attrs_nstep_1d \
and attr.shape[self.timeaxis] == self.nstep:
setattr(obj, name, attr[idx])
else:
setattr(obj, name, attr)
else:
setattr(obj, name, None)
# After possible slicing, calculate new nstep
if want_traj:
obj.nstep = obj.get_nstep()
if obj.is_set_attr('timestep'):
obj.timestep *= timestep_fac
return obj
def __next__(self):
self._index += 1
if self._index == self.nstep:
self._index = -1
raise StopIteration
else:
return self[self._index]
[docs]
def get_ase_atoms(self):
raise NotImplementedError("only in Structure")
[docs]
def get_fake_ase_atoms(self):
raise NotImplementedError("only in Structure")
[docs]
def get_traj(self):
raise NotImplementedError("only in Structure")
[docs]
def get_spglib(self):
raise NotImplementedError("only in Structure")
[docs]
def compress(traj, copy=True, **kwds):
"""Wrapper for :meth:`Trajectory.compress`.
Parameters
----------
copy : bool
Return compressed copy or in-place modified object.
**kwds : keywords
keywords to :meth:`Trajectory.compress`
Examples
--------
>>> trc = compress(tr, copy=True, forget=['coords'])
>>> trc.dump('very_small_file.pk')
"""
if copy:
out = traj.copy()
else:
out = traj
out.compress(**kwds)
return out
[docs]
def atoms2struct(at):
"""Transform ASE Atoms object to Structure."""
return Structure(symbols=at.get_chemical_symbols(),
cell=np.array(at.get_cell()),
coords_frac=at.get_scaled_positions())
[docs]
def struct2atoms(st, **kwds):
"""Transform Structure to ASE Atoms object."""
return st.get_ase_atoms(**kwds)
[docs]
def struct2traj(obj):
"""Transform Structure to Trajectory with nstep=1."""
if obj.is_traj:
return obj
else:
return obj.get_traj(nstep=1)
[docs]
class FakeASEAtoms(Structure):
"""Mimic the basic behavior of ``ase.Atoms``.
Used to be used as input for ``spglib`` in symmetry.py, but not anymore as
of spglib 1.9.x. Now, we use :meth:`Structure.get_spglib`.
"""
[docs]
def __init__(self, scaled_positions=None, cell=None, symbols=None):
super(FakeASEAtoms, self).__init__(coords_frac=scaled_positions,
cell=cell,
symbols=symbols)
self.get_scaled_positions = self.get_coords_frac
self.get_positions = self.get_coords
self.get_number_of_atoms = self.get_natoms
[docs]
def get_magnetic_moments(self):
return None
[docs]
def get_atomic_numbers(self):
return np.array(self.get_znucl())
[docs]
def populated_attrs(lst):
"""Set with attr names which are not None in all objects in `lst`."""
attr_lists = [[name for name in obj.attr_lst \
if getattr(obj,name) is not None] for obj in lst]
return set.intersection(*(set(x) for x in attr_lists))
[docs]
def concatenate(lst):
"""Concatenate Structure or Trajectory objects into one Trajectory.
For non-nstep attrs (symbols,...), the first item is used and no check is
made whether they are the same in the others.
Parameters
----------
lst : sequence of Structure or Trajectory instances or both
Returns
-------
tr : Trajectory
"""
trlst = [struct2traj(obj) for obj in lst]
traj = Trajectory(set_all_auto=False)
com_attrs = populated_attrs(trlst)
attr_lst = set.intersection(com_attrs, set(traj.attrs_nstep))
for name in attr_lst:
attr = np.concatenate(tuple(getattr(x,name) for x in trlst),
axis=0)
setattr(traj, name, attr)
attrs_traj = traj.attrs_nstep + traj.attrs_only_traj
for name in set.symmetric_difference(com_attrs, set(attrs_traj)):
setattr(traj, name, getattr(trlst[0], name))
traj.timestep = None
traj.time = None
traj.nstep = traj.get_nstep()
return traj
[docs]
def mean(traj):
"""Mean of Trajectory along `timeaxis`, like numpy.mean(array,axis=0).
Parameters
----------
traj : Trajectory
Returns
-------
Structure :
instance with extra velocity, temperature, ekin attrs which can hold
the mean of the input `traj`
Examples
--------
>>> # a slice of the Trajectory
>>> st = mean(tr[200:500])
>>> # Say we know that coords_frac is pbc-wrpapped for some reason but
>>> # coords is not. Make sure that we average only coords and force a
>>> # recalculation of coords_frac by setting it to None and calling
>>> # set_all() at the end.
>>> tr.coords_frac = None
>>> st = mean(tr)
>>> st.set_all()
"""
assert traj.is_traj
struct = Structure(set_all_auto=False)
# add some non-Structure attrs like velocity,ekin,temperature
attrs_only_traj = ['time', 'timestep', 'nstep']
extra = list(set.difference(set(traj.attrs_only_traj),
set(attrs_only_traj)))
struct.attr_lst += extra
for attr_name in set.difference(set(traj.attrs_nstep),
set(attrs_only_traj)):
attr = getattr(traj, attr_name)
if attr is not None:
setattr(struct, attr_name, attr.mean(axis=traj.timeaxis))
attrs_traj = traj.attrs_nstep + attrs_only_traj
for attr_name in set.difference(set(traj.attr_lst),
set(attrs_traj)):
attr = getattr(traj, attr_name)
if attr is not None:
setattr(struct, attr_name, attr)
return struct
[docs]
def smooth(traj, kern, method=1):
"""Smooth Trajectory along `timeaxis`.
Each array in `traj.attrs_nstep` is smoothed by convolution with `kern`
along `timeaxis`, i.e. coords, coords_frac, etot, ... The kernel is only
required to be a 1d array and is automatically broadcast to the shape of
each array. A similar feature can be found in VMD -> Representations ->
Trajectory.
Parameters
----------
traj : Trajectory
kern : 1d array
Convolution kernel (smoothing window, see :func:`~pwtools.signal.smooth`).
method : int
Choose how to do the convolution:
| 1 : loops over 1d convolutions, easy on memory, sometimes faster
than method=2 (default)
| 2 : up to 3d kernel by broadcasting, can be very memory hungry for
big `traj` (i.e. 1e5 timesteps, 128 atoms)
Returns
-------
tr : Trajectory
Has the same `nstep` and `timestep` as the input Trajectory.
Examples
--------
>>> kern = scipy.signal.hann(101)
>>> trs = smooth(tr, kern)
"""
assert traj.is_traj
assert kern.ndim == 1, "need 1d kernel"
if traj.timeaxis == 0:
kern1d = kern
kern2d = kern[:,None]
kern3d = kern[:,None,None]
else:
# ... but is trivial to add
raise Exception("timeaxis != 0 not implemented")
out = Trajectory(set_all_auto=False)
for attr_name in traj.attrs_nstep:
attr = getattr(traj, attr_name)
if attr is not None:
if method == 1:
# Remove that if we want to generalize to timeaxis != 0 and
# adapt code below.
if attr.ndim > 1:
assert traj.timeaxis == 0
if attr.ndim == 1:
tmp = signal.smooth(attr, kern, axis=traj.timeaxis)
elif attr.ndim == 2:
tmp = np.empty_like(attr)
for jj in range(attr.shape[1]):
tmp[:,jj] = signal.smooth(attr[:,jj], kern)
elif attr.ndim == 3:
tmp = np.empty_like(attr)
for jj in range(attr.shape[1]):
for kk in range(attr.shape[2]):
tmp[:,jj,kk] = signal.smooth(attr[:,jj,kk], kern)
else:
raise Exception("ndim != 1,2,3 not allowed")
setattr(out, attr_name, tmp)
elif method == 2:
if attr.ndim == 1:
krn = kern1d
elif attr.ndim == 2:
krn = kern2d
elif attr.ndim == 3:
krn = kern3d
else:
raise Exception("ndim != 1,2,3 not allowed")
setattr(out, attr_name, signal.smooth(attr, krn,
axis=traj.timeaxis))
else:
raise Exception("unknown method")
# nstep and timestep are the same for the smoothed traj, so we can copy all
# non-nstep attrs over
for attr_name in set.difference(set(traj.attr_lst),
set(traj.attrs_nstep)):
setattr(out, attr_name, getattr(traj, attr_name))
return out
[docs]
def mix(st1, st2, alpha):
"""Linear interpolation between two Structures based on the numbers in
`alpha`. Returns a :class:`Trajectory`.
Mix two structures as (1-alpha)*st1 + alpha*st2. `coords` and `cell` are
used, as well as `forces` if present.
Parameters
----------
st1, st2 : Structures
alpha : 1d sequence
parameter values for mixing
Returns
-------
tr : Trajectory
tr.nstep == len(alpha)
Examples
--------
>>> mix(st1, st2, linspace(0,1,50))
"""
assert st1.coords.ndim == 2
assert st1.cell.ndim == 2
assert st1.coords.shape == st2.coords.shape
assert st1.symbols == st2.symbols
coords = np.empty_like(st1.coords)
cell = np.empty_like(st1.cell)
rr = alpha[:,None,None]
coords = rr * st2.coords[None,:,:] + (1.0 - rr) * st1.coords[None,:,:]
cell = rr * st2.cell[None,:,:] + (1.0 - rr) * st1.cell[None,:,:]
if (st1.forces is not None) and (st2.forces is not None):
forces = rr * st2.forces[None,:,:] + (1.0 - rr) * st1.forces[None,:,:]
return Trajectory(coords=coords, cell=cell, symbols=st1.symbols,
forces=forces)
else:
# cannot use forces=None here, Structure.__init__ complains that it
# is None ... this is by design but seems stupid -> change input
# checking logic there
return Trajectory(coords=coords, cell=cell, symbols=st1.symbols)
[docs]
def align_cart(obj, x=None, y=None, vecs=None, indices=None, cart=None,
eps=1e-5):
"""Align obj w.r.t. a new cartesian coord sys defined by x,y and
z=cross(x,y).
The new coord sys can be defined either by `x` + `y` or `vecs` or
`indices` or `cart`. Vectors need not be normalized.
Parameters
----------
obj : Structure or Trajectory
x, y : (3,)
The two vectors spanning the x-y plane.
vecs : (3,3)
Array with 3 vectors as rows `[v0, v1, v2]` and ``x = v1 - v0``,
``y = v2 - v0``
indices : sequence (4,) or (3,)
Indices of atoms in `obj` with positions `v0,v1,v2`. Length 4 for
obj=Trajectory: ``indices=[time_step, idx0, idx1, idx2]`` and length 3
for obj=Structure: ``[idx0, idx1, idx2]`` with
| ``v0 = obj.coords[time_step, idx0, ...]`` (Trajectory)
| ``v1 = obj.coords[time_step, idx1, ...]``
| ``v2 = obj.coords[time_step, idx2, ...]``
or
| ``v0 = obj.coords[idx0, ...]`` (Structure)
| ``v1 = obj.coords[idx1, ...]``
| ``v2 = obj.coords[idx2, ...]``
cart : (3,3)
new cartesian coord sys ``[x,y,z]``, matrix must be orthogonal
eps : float
Threshold for orthogonality check. Use `eps <= 0` to disable the check.
Returns
-------
out : Structure or Trajectory
Notes
-----
In case of a :class:`Trajectory`, the same rotation is applied to all
structs, so the *relative* orientation within the Trajectory is not
changed. That is OK if each struct shall be rotated in the same way.
If however each struct has a different orientation, then you need
to loop over the Trajectory like::
>>> from pwtools.crys import align_cart, concatenate
>>> trnew = concatenate([align_cart(st, cart=...) for st in tr])
"""
if cart is None:
if x is None and y is None:
if indices is None:
v0 = vecs[0,:]
v1 = vecs[1,:]
v2 = vecs[2,:]
else:
if len(indices) == 4:
v0 = obj.coords[indices[0], indices[1], ...]
v1 = obj.coords[indices[0], indices[2], ...]
v2 = obj.coords[indices[0], indices[3], ...]
else:
v0 = obj.coords[indices[0], ...]
v1 = obj.coords[indices[1], ...]
v2 = obj.coords[indices[2], ...]
x = v1 - v0
y = v2 - v0
xx = x.copy() / norm(x)
yy = y.copy() / norm(y)
cart = np.array([xx, yy, np.cross(xx, yy)])
if eps > 0:
assert np.allclose(inv(cart), cart.T, atol=eps)
if obj.is_traj:
container = Trajectory
else:
container = Structure
obj_new = container(coords_frac=obj.coords_frac.copy(),
symbols=obj.symbols,
cell=np.dot(obj.cell, cart.T),
)
return obj_new
[docs]
def tensor2voigt(tensor):
"""Convert stress tensor to Voigt notation.
Parameters
----------
tensor : (3,3)
Returns
-------
voigt: 1d array
[xx,yy,zz,yz,xz,xy]
"""
assert tensor.shape == (3,3), "tensor must be (3,3)"
voigt = np.empty(6)
voigt[0] = tensor[0,0]
voigt[1] = tensor[1,1]
voigt[2] = tensor[2,2]
voigt[3] = tensor[1,2]
voigt[4] = tensor[0,2]
voigt[5] = tensor[0,1]
return voigt
[docs]
def voigt2tensor(voigt):
"""Convert Voigt stress array to stress tensor.
Parameters
----------
voigt: 1d array
[xx,yy,zz,yz,xz,xy]
Returns
-------
tensor : (3,3)
"""
assert len(voigt) == 6, "voigt must be length 6 vector"
tensor = np.empty((3,3))
tensor[0,0] = voigt[0]
tensor[1,1] = voigt[1]
tensor[2,2] = voigt[2]
tensor[1,2] = voigt[3]
tensor[0,2] = voigt[4]
tensor[0,1] = voigt[5]
tensor[2,1] = tensor[1,2]
tensor[2,0] = tensor[0,2]
tensor[1,0] = tensor[0,1]
return tensor
[docs]
def voigt2tensor3d(voigt):
"""Same as :func:`voigt2tensor` for trajectories.
Parameters
----------
voigt: (nstep,6)
Returns
-------
tensor : (nstep,3,3)
"""
nstep = voigt.shape[0]
assert voigt.ndim == 2, "voigt must be (nstep,6)"
assert voigt.shape[1] == 6, "voigt must be (nstep,6)"
tensor = np.empty((nstep,3,3))
tensor[:,0,0] = voigt[:,0]
tensor[:,1,1] = voigt[:,1]
tensor[:,2,2] = voigt[:,2]
tensor[:,1,2] = voigt[:,3]
tensor[:,0,2] = voigt[:,4]
tensor[:,0,1] = voigt[:,5]
tensor[:,2,1] = tensor[:,1,2]
tensor[:,2,0] = tensor[:,0,2]
tensor[:,1,0] = tensor[:,0,1]
return tensor
[docs]
def tensor2voigt3d(tensor):
"""Same as :func:`tensor2voigt` for trajectories.
Parameters
----------
tensor : (nstep,3,3)
Returns
-------
voigt: (nstep,6)
"""
assert tensor.ndim == 3, "tensor must be (nstep,3,3)"
assert tensor.shape[1:] == (3,3), "tensor must be (nstep,3,3)"
nstep = tensor.shape[0]
voigt = np.empty((nstep,6))
voigt[:,0] = tensor[:,0,0]
voigt[:,1] = tensor[:,1,1]
voigt[:,2] = tensor[:,2,2]
voigt[:,3] = tensor[:,1,2]
voigt[:,4] = tensor[:,0,2]
voigt[:,5] = tensor[:,0,1]
return voigt