# coding: utf8
"""Read dcd files. Some timings (in ipython). Reading lammps files, not using
`convang` here, so angles are not converted but this is only a speed test.
::
>>> %timeit cc,co=dcd.read_dcd_data_ref('lmp.out.dcd')
100 loops, best of 3: 3.35 ms per loop
>>> %timeit cc,co=dcd.read_dcd_data_f('lmp.out.dcd')
1000 loops, best of 3: 455 µs per loop
>>> %timeit cc,co=dcd.read_dcd_data_f('lmp.out.dcd', nstephdr=True)
1000 loops, best of 3: 241 µs per loop
>>> # pure numpy wins!
>>> %timeit cc,co=dcd.read_dcd_data('lmp.out.dcd')
10000 loops, best of 3: 114 µs per loop
"""
import numpy as np
import os
# DCD file header. Define structured dtype with field specs
# (name, dtype, shape)
# numpy dtypes:
# i4 = int32
# f4 = float32 (single precision)
# f8 = float64 (double precision)
# S80 = string of length 80 (80 chars)
# shape:
# old syntax:
# ('foo', 'i4', 1) -> scalar int32
# new syntax: don't use just a blank "1"
# ('foo', 'i4', (1,)) -> array([int32])
# ('foo', 'i4') -> scalar int32
# for array fields, both work:
# ('foo', 'i4', 5) -> array([int32, int32, ...])
# ('foo', 'i4', (5,)) -> array([int32, int32, ...])
# but to be contistent better use tuple syntax
HEADER_TYPES = [
('blk0-0', 'i4' ), # 84 (start of first block, size=84 bytes)
('hdr', 'S4' ), # 'CORD'
('9int', 'i4', (9,)), # 9 ints, mostly 0
('timestep','f4' ), # timestep (float32)
('10int', 'i4', (10,)), # 10 ints, mostly 0, last is 24
('blk0-1', 'i4' ), # 84
('blk1-0', 'i4' ), # 164
('ntitle', 'i4' ), # 2
('remark1', 'S80' ), # remark1
('remark2', 'S80' ), # remark2
('blk1-1', 'i4' ), # 164
('blk2-0', 'i4' ), # 4 (4 bytes = int32)
('natoms', 'i4' ), # natoms (int32)
('blk2-1', 'i4' ), # 4
]
HEADER_DTYPE = np.dtype(HEADER_TYPES)
[docs]
def read_dcd_data_ref(fn, convang=False):
"""Read dcd file. Pure Python version. Slow, only reference implementation.
Use :func:`read_dcd_data` instead.
Parameters
----------
fn : str
filename
convang : bool
convert angles from cosine to degree (only useful for lammps style dcd
files)
Returns
-------
ret : (cryst_const, coords)
| cryst_const : (nstep,6) float64 array, (a,b,c,alpha,beta,gamma),
| Angstrom, degrees
| coords : (nstep, natoms, 3) float32 array, cartesian coords Angstrom
Examples
--------
>>> # default settings read cp2k files
>>> cc,co = read_dcd_data_ref('cp2k.dcd')
>>> cc,co = read_dcd_data_ref('cp2k.dcd', convang=False)
>>> cc,co = read_dcd_data_ref('lammps.dcd', convang=True)
"""
fd = open(fn, 'rb')
natoms = np.fromfile(fd, HEADER_DTYPE, 1)[0]['natoms']
# data per timestep
data_dtype = np.dtype([
('blk0-0', 'i4' ), # 48 = 6*8 bytes = 6*float64
('cryst_const_dcd', 'f8', (6,)), # unit cell (6*float64)
('bkl0-1', 'i4' ), # 48
('blkx-0', 'i4' ), # natoms*4 = natoms*float32
('x', 'f4', (natoms,)), # x (natoms*float32)
('blkx-1', 'i4' ), # natoms*4
('blky-0', 'i4' ), # natoms*4
('y', 'f4', (natoms,)), # y
('blky-1', 'i4' ), # natoms*4
('blkz-0', 'i4' ), # natoms*4
('z', 'f4', (natoms,)), # z
('blkz-1', 'i4' ), # natoms*4
])
cryst_const = []
coords = []
tmp_coords = np.empty((natoms,3), dtype=np.float32)
tmp_cryst_const = np.empty((6,), dtype=np.float64)
while True:
_data = np.fromfile(fd, data_dtype, 1)
if len(_data) == 0:
break
else:
data = _data[0]
tmp_coords[:,0] = data['x']
tmp_coords[:,1] = data['y']
tmp_coords[:,2] = data['z']
tmp_cryst_const[0] = data['cryst_const_dcd'][0]
tmp_cryst_const[1] = data['cryst_const_dcd'][2]
tmp_cryst_const[2] = data['cryst_const_dcd'][5]
tmp_cryst_const[3] = data['cryst_const_dcd'][4]
tmp_cryst_const[4] = data['cryst_const_dcd'][3]
tmp_cryst_const[5] = data['cryst_const_dcd'][1]
coords.append(tmp_coords.copy())
cryst_const.append(tmp_cryst_const.copy())
fd.close()
coords = np.array(coords, dtype=np.float32)
cryst_const = np.array(cryst_const, dtype=np.float64)
if convang:
cryst_const[:,3:] = np.arccos(cryst_const[:,3:])*180.0/np.pi
return cryst_const, coords
[docs]
def read_dcd_data(fn, convang=False):
"""Read dcd file. Fastest version. Calculates nstep from bytes between
end-of-header and EOF.
Parameters
----------
fn : str
filename
convang : bool
convert angles from cosine to degree (only useful for lammps style dcd
files)
Returns
-------
ret : (cryst_const, coords)
| cryst_const : (nstep,6) float64 array, (a,b,c,alpha,beta,gamma),
| Angstrom, degrees
| coords : (nstep, natoms, 3) float32 array, cartesian coords Angstrom
Examples
--------
>>> # default settings read cp2k files
>>> cc,co = read_dcd_data('cp2k.dcd')
>>> cc,co = read_dcd_data('cp2k.dcd', convang=False)
>>> cc,co = read_dcd_data('lammps.dcd', convang=True)
"""
fd = open(fn, 'rb')
natoms = np.fromfile(fd, HEADER_DTYPE, 1)[0]['natoms']
fd_pos = fd.tell()
# seek to end
fd.seek(0, os.SEEK_END)
# number of bytes between fd_pos and end
fd_rest = fd.tell() - fd_pos
# reset to pos after header
fd.seek(fd_pos)
# calculate nstep: fd_rest / bytes_per_timestep
# 4 - initial 48
# 6*8 - cryst_const_dcd
# 7*4 - markers between x,y,z and at the end of the block
# 3*4*natoms - float32 cartesian coords
nstep = fd_rest / (4 + 6*8 + 7*4 + 3*4*natoms*1.0)
assert nstep % 1.0 == 0.0, ("calculated nstep is not int, cannot "
"read file '{}'".format(fn))
nstep = int(nstep)
# dtype for fromfile: nstep times dtype of a timestep data block
dtype = \
np.dtype(([('x0', 'i4'),
('x1', 'f8', (6,)),
('x2', 'i4', (2,)),
('x3', 'f4', (natoms,)),
('x4', 'i4', (2,)),
('x5', 'f4', (natoms,)),
('x6', 'i4', (2,)),
('x7', 'f4', (natoms,)),
('x8', 'i4')],
(nstep,)))
arr = np.fromfile(fd, dtype, 1)
fd.close()
cryst_const = np.empty((nstep,6), dtype=np.float64)
cryst_const[:,0] = arr['x1'][0,:,0]
cryst_const[:,1] = arr['x1'][0,:,2]
cryst_const[:,2] = arr['x1'][0,:,5]
cryst_const[:,3] = arr['x1'][0,:,4]
cryst_const[:,4] = arr['x1'][0,:,3]
cryst_const[:,5] = arr['x1'][0,:,1]
coords = np.empty((nstep,natoms,3), dtype=np.float32)
coords[...,0] = arr['x3'][0,...]
coords[...,1] = arr['x5'][0,...]
coords[...,2] = arr['x7'][0,...]
if convang:
cryst_const[:,3:] = np.arccos(cryst_const[:,3:])*180.0/np.pi
return cryst_const, coords
[docs]
def read_dcd_data_f(fn, convang=False, nstephdr=False):
"""Read dcd file. Wrapper for the Fortran version in ``dcd.f90``.
Deprecated, use :func:`read_dcd_data` instead.
Parameters
----------
fn : str
filename
convang : bool
See :func:`read_dcd_data`
nstephdr : bool
read nstep from header (lammps) instead of walking the file twice (more
safe but slower, works for all dcd flavors)
Returns
-------
ret : See :func:`read_dcd_data`
Examples
--------
>>> # default settings read cp2k files
>>> cc,co = read_dcd_data_f('cp2k.dcd')
>>> cc,co = read_dcd_data_f('cp2k.dcd', convang=False, nstephdr=False)
>>> cc,co = read_dcd_data_f('lammps.dcd', convang=True, nstephdr=True)
>>> # more safe if you don't trust nstep from the header
>>> cc,co = read_dcd_data_f('lammps.dcd', convang=True, nstephdr=False)
"""
from pwtools import _dcd
nstep, natoms, timestep = _dcd.get_dcd_file_info(fn, nstephdr)
return _dcd.read_dcd_data(fn, nstep, natoms, convang)