r"""
Parser classes for different file formats. Input- and output files.
===================================================================
We need the following basic Unix tools installed:
| grep/egrep
| sed
| awk (better mawk)
| tail
| wc
| ...
The tested egrep versions don't know the ``\s`` character class for
whitespace as sed, Perl, Python or any other sane regex implementation
does. Use ``[ ]`` instead.
Using Parsing classes
---------------------
All parsing classes::
Pw*OutputFile
Cpmd*OutputFile
Cp2k*OutputFile
Lammps*OutputFile
are derived from FlexibleGetters -> UnitsHandler -> {Structure,Trajectory}FileParser
As a general rule: If a getter (self.get_<attr>() or self._get_<attr>_raw()
cannot find anything in the file, it returns None. All getters which depend
on it will also return None.
* After initialization
pp = SomeParsingClass(<filename>)
all attrs whoose name is in pp.attr_lst will be set to None.
* parse() will invoke self.try_set_attr(<attr>), which does
self.<attr> = self.get_<attr>()
for each <attr> in self.attr_lst, thus setting self.<attr> to a defined
value: None if nothing was found in the file or not None else
* All getters get_<attr>() will do their parsing, possibly
looking for a file self.filename, regardless of the fact that the attribute
self.<attr> may already be defined (e.g. if parse() has been called before).
* For interactive use (you need <attr> only once), prefer get_<attr>() over
parse().
* Use dump('foo.pk') only for temporary storage and fast re-reading. Use
pwtool.io.read_pickle('foo.pk'). See also the *FileParser.load() docstring.
* Use relative paths in <filename>.
* If loading a dump()'ed pickle file from disk,
pp=io.read_pickle(...)
then use direct attr access
pp.<attr>
instead of
pp.get_<attr>()
b/c latter would simply parse self.filname again.
For debugging, we still have many getters which produce redundant
information, e.g.
| cell + cryst_const
| _<attr>_raw + <attr> (where <attr> = cell, forces, ...)
| ...
especially in MD parsers, not so much in StructureFileParser drived
classes. If parse() is used, all this information retrieved and stored.
* All parsers try to return the default units of the program output, e.g. Ry,
Bohr, tryd for PWscf; Ha, Bohr, thart for Abinit and CPMD.
* Use get_struct() / get_traj() to get a Structure / Trajectory object with
pwtools standard units (eV, Ang, fs).
Using parse():
Pro:
* Simplicity. *All* getters are called when parse() is
invoked. You get it all.
* In theory, you can delete the file pointed to by self.filename, assuming
all getters have extracted all information that you need.
Con:
* The object is full of (potentially big) arrays holding redundant
information. Thus, the dump()'ed file may be large. Use the compress()
method.
* Parsing may be slow if each getter (of possibly many) is called.
Using get_<attr>():
Pro:
* You only parse what you really need.
Con:
* self.<attr> will NOT be set, since get_<attr>() only returns <attr> but
doesn't set self.<attr> = self.get_<attr>(), so dump() would save an
"empty" file.
"""
import re, sys, os
from math import acos, pi, sin, cos, sqrt
from io import StringIO
import types
import warnings
import numpy as np
# fail if missing, functionas which use that will fail later
try:
import CifFile as pycifrw_CifFile
except ImportError:
pass
from pwtools import (common, constants, regex, crys, atomic_data, num,
arrayio, dcd)
from pwtools.verbose import verbose
from pwtools.base import FlexibleGetters
from pwtools.constants import Ry, Ha, eV, Bohr, Angstrom, thart, Ang, fs, ps
from pwtools.crys import UnitsHandler
com = common
pj = os.path.join
# mawk is _much_ faster than GNU awk, which is ususlly /usr/bin/awk on linux.
# This test is pretty simple-minded but very fast. Maybe add more paths if
# needed (e.g. /usr/local/bin') or use a more elaborate thing such as
#
# 1) distutils.spawn.find_executable
# 2) shutil.which # python 3.3
# 3) try:
# subprocess.call(...) # or subprocess.Popen(...)
# except OSError:
# ...
# See
# http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
# http://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
if os.path.exists('/usr/bin/mawk'):
AWK = 'mawk'
else:
AWK = 'awk'
#-----------------------------------------------------------------------------
# General helpers
#-----------------------------------------------------------------------------
[docs]
def int_from_txt(txt):
if txt.strip() == '':
return None
else:
return int(txt)
[docs]
def float_from_txt(txt):
if txt.strip() == '':
return None
else:
return float(txt)
[docs]
def nstep_from_txt(txt):
if txt.strip() == '':
return 0
else:
return int(txt)
[docs]
def traj_from_txt(txt, shape, axis=0, dtype=np.float64, sep=' '):
"""Used for 3d trajectories where the exact shape of the array as written
by the MD code must be known, e.g. (nstep,N,3) where N=3 (cell, stress) or
N=natoms (coords, forces, ...).
We use np.fromstring for speed, so `txt` can only contain numbers and
separators (white space), no comments (like "# this is the header"), which
numpy.loadtxt() would handle.
Parameters
----------
txt : string
Text containing numbers (e.g. from ``common.backtick("grep ...")``)
shape : tuple
The 3d shape of the array when written along `axis` (see also
arrayio.writetxt()) to text as 2d chunks and read back in as 3d array.
Used to reconstruct the array.
axis : int
Axis along which the array was written in 2d chunks to text. (see
also `axis` in arrayio.writetxt()).
Used to reconstruct the array.
Only axis=0 implemented.
dtype, sep : passed to np.fromstring
"""
if txt.strip() == '':
return None
else:
assert len(shape) == 3, ("only 3d arrays supported")
assert axis == 0, ("only axis=0 implemented")
# Works only for axis = 0, but this is the only case we have when
# parsing MD code output. Else, some rollaxis would be needed. E.g. if
# shape=(natoms,nstep,3) and therefore axis=1, then we would do
# np.rollaxis(fromstring(...).reshape(shape), axis=1, start=0) ->
# (nstep,natoms,3)
return np.fromstring(txt, sep=sep, dtype=dtype).reshape(shape)
[docs]
def arr1d_from_txt(txt, dtype=np.float64):
if txt.strip() == '':
return None
else:
ret = np.atleast_1d(np.loadtxt(StringIO(txt), dtype=dtype))
return ret
[docs]
def arr2d_from_txt(txt, dtype=np.float64):
if txt.strip() == '':
return None
else:
ret = np.atleast_2d(np.loadtxt(StringIO(txt), dtype=dtype))
return ret
[docs]
def axis_lens(seq, axis=0):
"""Return length of `axis` of all arrays in `seq`.
If an entry in `seq` is None instead of an array, return 0. All arrays must
have at least axis+1 dimensions, of course (i.e. axis=1 -> all arrays at
least 2d).
Examples
--------
>>> axis_lens([arange(100), np.array([1,2,3]), None, rand(5,3)])
[100, 3, 0, 5]
"""
ret = []
for xx in seq:
try:
# handle None
ret.append(xx.shape[axis])
except AttributeError:
ret.append(0)
return ret
#-----------------------------------------------------------------------------
# Parsers
#-----------------------------------------------------------------------------
[docs]
class StructureFileParser(UnitsHandler):
"""Base class for single-structure parsers.
"""
Container = crys.Structure
default_units = {}
[docs]
def __init__(self, filename=None, units=None):
self.parse_called = False
self.filename = filename
# Some parsers do
# self._foo_file = os.path.join(self.basedir,'foo')
# in their __init__. That should not fail if we create an instance
# without passing a filename, like parser=SomeParsingClass(). So
# instead of setting basedir=None by default, we set it to '.' .
self.basedir = os.path.dirname(self.filename) if (self.filename is not
None) else '.'
UnitsHandler.__init__(self)
# Each derived parser class has `default_units`, with which self.units
# is updated. Then, self.units is updated from user input if we are
# called as ``SomeParser(...,units=...)``. Last, self.units is passed
# to Container(..., units=self.units) and units are applied there.
# Clear? :)
self.update_units(self.default_units)
self.update_units(units)
self.cont = self.Container(set_all_auto=False, units=self.units)
self.init_attr_lst(self.cont.attr_lst)
[docs]
def parse(self):
self.set_all()
self.parse_called = True
[docs]
def get_cont(self, auto_calc=True):
"""Populate and return a Container object.
Parameters
----------
auto_calc : bool
Auto-calculation of missing attributes in Container after passing
parsed data by calling ``set_all()``.
| True: call ``Container.set_all()`` =
| ``Container._extend_arrays_apply_units()`` +
| ``FlexibleGetters.set_all()``
| False: call only ``Container._extend_arrays_apply_units()``
"""
if not self.parse_called:
self.parse()
for attr_name in self.cont.attr_lst:
setattr(self.cont, attr_name, getattr(self, attr_name))
if auto_calc:
self.cont.set_all()
else:
self.cont._extend_arrays_apply_units()
assert self.cont.units_applied, "Container units not applied"
return self.cont
[docs]
def apply_units(self):
raise NotImplementedError("don't use that in parsers")
[docs]
def get_struct(self, **kwds):
return self.get_cont(**kwds)
[docs]
class TrajectoryFileParser(StructureFileParser):
"""Base class for MD-like parsers."""
Container = crys.Trajectory
# timeaxis in Trajectory defined before __init__, so we don't need to
# instantiate the object
timeaxis = Container.timeaxis
[docs]
def get_struct(self, **kwds):
raise NotImplementedError("use get_traj()")
[docs]
def get_traj(self, **kwds):
return self.get_cont(**kwds)
[docs]
class CifFile(StructureFileParser):
"""Parse Cif file. Uses PyCifRW [1]_.
References
----------
.. [1] https://bitbucket.org/jamesrhester/pycifrw
"""
[docs]
def __init__(self, filename=None, block=None, *args, **kwds):
"""
Parameters
----------
filename : name of the input file
block : data block name (i.e. 'data_foo' in the Cif file -> 'foo'
here). If None then the first data block in the file is used.
"""
self.block = block
StructureFileParser.__init__(self, filename=filename, *args, **kwds)
# only the ones for which we have getters
self.attr_lst = [\
'coords_frac',
'coords',
'symbols',
'cryst_const',
]
self.init_attr_lst()
[docs]
def cif_str2float(self, st):
"""'7.3782(7)' -> 7.3782"""
if '(' in st:
st = re.match(r'(' + regex.float_re + r')(\(.*)', st).group(1)
return float(st)
[docs]
def cif_clear_atom_symbol(self, st, rex=re.compile(r'([a-zA-Z]+)([0-9+-]*)')):
"""Remove digits and "+,-" from atom names.
Examples
--------
>>> cif_clear_atom_symbol('Al1')
'Al'
"""
return rex.match(st).group(1)
def _get_cif_dct(self):
# celldm from a,b,c and alpha,beta,gamma
# alpha = angbe between b,c
# beta = angbe between a,c
# gamma = angbe between a,b
self.try_set_attr('_cif_block')
cif_dct = {}
for x in ['a', 'b', 'c']:
what = '_cell_length_' + x
cif_dct[x] = self.cif_str2float(self._cif_block[what])
for x in ['alpha', 'beta', 'gamma']:
what = '_cell_angle_' + x
cif_dct[x] = self.cif_str2float(self._cif_block[what])
return cif_dct
def _get_cif_block(self):
cf = pycifrw_CifFile.ReadCif(self.filename)
if self.block is None:
cif_block = cf.first_block()
else:
cif_block = cf['data_' + self.block]
return cif_block
[docs]
def get_coords_frac(self):
if self.check_set_attr('_cif_block'):
if '_atom_site_fract_x' in self._cif_block:
arr = np.array([list(map(self.cif_str2float, [x,y,z])) for x,y,z in zip(
self._cif_block['_atom_site_fract_x'],
self._cif_block['_atom_site_fract_y'],
self._cif_block['_atom_site_fract_z'])])
return arr
else:
return None
else:
return None
[docs]
def get_coords(self):
if self.check_set_attr('_cif_block'):
if '_atom_site_Cartn_x' in self._cif_block:
arr = np.array([list(map(self.cif_str2float, [x,y,z])) for x,y,z in zip(
self._cif_block['_atom_site_Cartn_x'],
self._cif_block['_atom_site_Cartn_y'],
self._cif_block['_atom_site_Cartn_z'])])
return arr
else:
return None
else:
return None
[docs]
def get_symbols(self):
self.try_set_attr('_cif_block')
try_lst = ['_atom_site_type_symbol', '_atom_site_label']
for entry in try_lst:
if entry in self._cif_block:
return list(map(self.cif_clear_atom_symbol, self._cif_block[entry]))
return None
[docs]
def get_cryst_const(self):
self.try_set_attr('_cif_dct')
return np.array([self._cif_dct[key] for key in \
['a', 'b', 'c', 'alpha', 'beta', 'gamma']])
[docs]
class PDBFile(StructureFileParser):
"""Very very simple pdb file parser.
Extract only ATOM/HETATM and CRYST1 (if present) records. If you want smth
serious, check biopython or openbabel.
Notes
-----
self.cryst_const :
If no CRYST1 record is found, this is None.
parsing:
We use regexes which may not work for more complicated ATOM records. We
don't use the strict column numbers for each field as stated in the PDB
spec.
"""
# Notes:
#
# Grep atom symbols and coordinates in Angstrom ([A]) from PDB file.
# Note that for the atom symbols, we do NOT use the columns 77-78
# ("Element symbol"), b/c that is apparently not present in all the
# files which we currently use. Instead, we use the columns 13-16, i.e.
# "Atom name". Note that in general this is not the element symbol.
#
# From the PDB spec v3.20:
#
# ATOM record:
#
# COLUMNS DATA TYPE FIELD DEFINITION
# -------------------------------------------------------------------------------------
# 1 - 6 Record name "ATOM "
# 7 - 11 Integer serial Atom serial number.
# 13 - 16 Atom name Atom name.
# 17 Character altLoc Alternate location indicator.
# 18 - 20 Residue name resName Residue name.
# 22 Character chainID Chain identifier.
# 23 - 26 Integer resSeq Residue sequence number.
# 27 AChar iCode Code for insertion of residues.
# 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms.
# 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms.
# 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms.
# 55 - 60 Real(6.2) occupancy Occupancy.
# 61 - 66 Real(6.2) tempFactor Temperature factor.
# 77 - 78 LString(2) element Element symbol, right-justified.
# 79 - 80 LString(2) charge Charge on the atom.
#
# CRYST1 record:
#
# COLUMNS DATA TYPE FIELD DEFINITION
# -------------------------------------------------------------
# 1 - 6 Record name "CRYST1"
# 7 - 15 Real(9.3) a a (Angstroms).
# 16 - 24 Real(9.3) b b (Angstroms).
# 25 - 33 Real(9.3) c c (Angstroms).
# 34 - 40 Real(7.2) alpha alpha (degrees).
# 41 - 47 Real(7.2) beta beta (degrees).
# 48 - 54 Real(7.2) gamma gamma (degrees).
# 56 - 66 LString sGroup Space group.
# 67 - 70 Integer z Z value.
#
[docs]
def __init__(self, filename=None, *args, **kwds):
StructureFileParser.__init__(self, filename=filename, *args, **kwds)
# only the ones for which we have getters
self.attr_lst = [\
'coords',
'symbols',
'cryst_const',
]
self.init_attr_lst()
if self.filename is not None:
self.txt = common.file_read(self.filename)
def _get_coords_data(self):
pat = r'(ATOM|HETATM)[\s0-9]+([A-Za-z]+)[\sa-zA-Z0-9]*' + \
r'[\s0-9]+((\s+'+ regex.float_re + r'){3}?)'
# array of string type
return np.array([[m.group(2)] + m.group(3).split() for m in \
re.finditer(pat,self.txt)])
[docs]
def get_symbols(self):
# list of strings (system:nat,)
# Fix atom names, e.g. "AL" -> Al. Note that this is only needed b/c we
# use the "wrong" column "Atom name".
self.try_set_attr('_coords_data')
symbols = []
for sym in self._coords_data[:,0]:
if len(sym) == 2:
symbols.append(sym[0] + sym[1].lower())
else:
symbols.append(sym)
return symbols
[docs]
def get_coords(self):
self.try_set_attr('_coords_data')
# float array, (system:nat, 3)
return self._coords_data[:,1:].astype(float)
[docs]
def get_cryst_const(self):
# grep CRYST1 record, extract only crystallographic constants
# example:
# CRYST1 52.000 58.600 61.900 90.00 90.00 90.00 P 21 21 21 8
# a b c alpha beta gamma |space grp| z-value
pat = r'CRYST1\s+((\s+' + regex.float_re + r'){6}).*'
match = re.search(pat, self.txt)
return np.array(match.group(1).split()).astype(float)
[docs]
class PwSCFOutputFile(StructureFileParser):
r"""Parse a pw.x SCF output file (calculation='scf').
Some getters (_get_<attr>_raw) work for MD-like output, too. Here in the
SCF case, only the first item along the time axis is returned and should
only be used on calculation='scf' output.
SCF output files don't have an ATOMIC_POSITIONS block. We need to parse the
block below, which can be found at the top the file (cartesian, divided by
alat). From that, we also get symbols::
Cartesian axes
site n. atom positions (a_0 units)
1 Al tau( 1) = ( -0.0000050 0.5773532 0.0000000 )
2 Al tau( 2) = ( 0.5000050 0.2886722 0.8000643 )
3 N tau( 3) = ( -0.0000050 0.5773532 0.6208499 )
4 N tau( 4) = ( 0.5000050 0.2886722 1.4209142 )
Many quantities in PWscf's output files are always in units of the lattice
vector "a" (= a_0 = celldm1 = "alat" [Bohr]), i.e. divided by that value,
which is usually printed in the output in low precision::
lattice parameter (a_0) = 5.8789 a.u.
You can parse that value with ``get_alat(use_alat=True)``. We do that by
default: ``PwSCFOutputFile(filename, use_alat=True)`` b/c this is what most
people will expect if they just call the parser on some file. Then, we
multiply all relevent quantities with dimension length with the alat value
from pw.out automatically.
If ``use_alat=False``, we use ``alat=1.0``, i.e. all length quantities
which are "in alat units" are returned exactly as found in the file, which
is the same behavior as in all other parsers. Unit conversion happens only
when we pass things to Structure / Trajectory using self.units.
If you need/want to use another alat (i.e. a value with more precision),
then you need to explicitly provide that value and use ``use_alat=False``::
>>> alat = 1.23456789 # high precision value in Bohr
>>> pp = PwSCFOutputFile('pw.out', use_alat=False, units={'length': alat*Bohr/Ang})
>>> st = pp.get_struct()
``use_alat=False`` will prevent parsing the low precision value from
'pw.out'. The option ``units=...`` will overwrite ``default_units['length']
= Bohr/Ang``, which is used to convert all PWscf length [Bohr] to [Ang]
when passing things to Trajectory.
In either case, all quantities with a length unit or derived from such a
quantitiy, e.g.
| cell
| cryst_const
| coords
| coords_frac
| volume
| ...
will be correct (up to alat's precision).
All getters return PWscf standard units (Ry, Bohr, ...).
It is a special case for PWscf that a parser class may modify values parsed
from a file (multiply by alat if use_alat=True, etc) *before* they are
passed over to Structure / Trajectory b/c otherwise the numbers would be
pretty useless, unless you use `units` explicitely. To get an object with
pwtools standard units (eV, Angstrom, ...), use :meth:`get_struct`.
Notes
-----
Total force: Pwscf writes a "Total Force" after the "Forces acting on
atoms" section . This value a UNnormalized RMS of the force matrix
(f_ij, i=1,natoms j=1,2,3) printed. According to .../PW/forces.f90,
variable "sumfor", the "Total Force" is ``sqrt(sum_ij f_ij^2)``.
Use ``crys.rms(self.forces)`` (for PwSCFOutputFile) or
``crys.rms3d(self.forces, axis=self.timeaxis)`` (for PwMDOutputFile)
instead.
Verbose force printing: When using van der Waals (``london=.true.``) or
``verbosity='high'``, then more than one force block (natoms,3) is printed.
In that case, we assume the first block to be the sum of all force
contributions and that will end up in ``self.forces``. Each subsequent
block is discarded from ``self.forces``. However, you may use
``self._forces_raw`` (see ``self._get_forces_raw()``) to obtain all forces,
which will have the shape (N*natoms). The forces blocks will be in the
following order:
===================== ===================== =======================
``london=.true.`` ``verbosity='high'`` ``verbosity='high'`` +
``london=.true.``
===================== ===================== =======================
sum sum sum
vdw non-local non-local
\ ionic ionic
\ local local
\ core core
\ Hubbard Hubbard
\ SCF correction SCF correction
\ \ vdw
===================== ===================== =======================
Note that this order may change with QE versions, check your output file!
Tested w/ QE 4.3.2 .
"""
# self.timeaxis: This is the hardcoded time axis. It must be done
# this way b/c getters returning a >2d array cannot determine the shape
# of the returned array auttomatically based on the self.timeaxis
# setting alone. If you want to change this, then manually fix the
# "shape" kwarg to arrayio.readtxt() in all getters which return a 3d array.
default_units = \
{'energy': Ry / eV, # Ry -> eV
'length': Bohr / Angstrom, # Bohr -> Angstrom
'forces': Ry / eV * Angstrom / Bohr, # Ry / Bohr -> eV / Angstrom
'stress': 0.1, # kbar -> GPa
}
[docs]
def __init__(self, filename=None, use_alat=True, **kwds):
StructureFileParser.__init__(self, filename=filename, **kwds)
self.timeaxis = crys.Trajectory(set_all_auto=False).timeaxis
self.attr_lst = [\
'coords',
'symbols',
'stress',
'etot',
'forces',
'nstep_scf',
'cell',
'natoms',
'nkpoints',
'scf_converged',
]
self.use_alat = use_alat
self.init_attr_lst()
def _get_stress_raw(self):
verbose("getting _stress_raw")
key = 'P='
cmd = "grep -c %s %s" %(key, self.filename)
nstep = nstep_from_txt(com.backtick(cmd))
if nstep > 0:
cmd = "grep -A3 '%s' %s | grep -v -e %s -e '--'| \
%s '{print $4\" \"$5\" \"$6}'" \
%(key, self.filename, key, AWK)
return traj_from_txt(com.backtick(cmd),
shape=(nstep,3,3),
axis=self.timeaxis)
else:
return None
def _get_etot_raw(self):
verbose("getting _etot_raw")
cmd = r"grep '^!' %s | %s '{print $5}'" %(self.filename, AWK)
return arr1d_from_txt(com.backtick(cmd))
def _get_forces_raw(self):
verbose("getting _forces_raw")
if self.check_set_attr('natoms'):
# nstep: get it from outfile b/c the value in any input file will be
# wrong if the output file is a concatenation of multiple smaller files
key = r'Forces\s+acting\s+on\s+atoms.*$'
cmd = r"egrep -c '%s' %s" %(key.replace(r'\s', r'[ ]'), self.filename)
nstep = nstep_from_txt(com.backtick(cmd))
if nstep > 0:
# Need to split traj_from_txt() up into loadtxt() + arr2d_to_3d() b/c
# we need to get `nlines` first without an additional "grep -c
# ..."
cmd = "grep 'atom.*type.*force' %s \
| %s '{print $7\" \"$8\" \"$9}'" %(self.filename, AWK)
arr2d = arr2d_from_txt(com.backtick(cmd))
nlines = arr2d.shape[0]
# nlines_block = number of force lines per step = N*natoms
nlines_block = nlines // nstep
assert nlines_block % self.natoms == 0, ("nlines_block forces doesn't "
"match natoms")
return arrayio.arr2d_to_3d(arr2d,
shape=(nstep,nlines_block,3),
axis=self.timeaxis)
else:
return None
else:
return None
def _get_nstep_scf_raw(self):
verbose("getting _nstep_scf_raw")
cmd = r"grep 'convergence has been achieved in' %s | %s '{print $6}'" \
%(self.filename, AWK)
return arr1d_from_txt(com.backtick(cmd), dtype=int)
def _get_coords_symbols(self):
"""Grep start coords and symbols from pw.out header. This is always in
cartesian alat units (i.e. divided by alat) and printed with low
precision.
"""
verbose("getting start coords")
self.try_set_attr('natoms')
natoms = self.natoms
# coords
cmd = r"egrep -m1 -A%i 'site.*atom.*positions.*units.*\)' %s | tail -n%i | \
sed -re 's/.*\((.*)\)/\1/g'" \
%(natoms, self.filename, natoms)
coords = arr2d_from_txt(com.backtick(cmd))
cmd = r"egrep -m1 -A%i 'site.*atom.*positions.*units.*\)' %s | tail -n%i | \
%s '{print $2}'" \
%(natoms, self.filename, natoms, AWK)
symbols = com.backtick(cmd).strip().split()
return {'coords': coords, 'symbols': symbols}
def _get_cell_2d(self):
"""Start 2d cell in alat units.
Grep start cell from pw.out. Multiplication by alat in
:meth:`get_cell`.
The cell in pw.out is always in alat units (divided by alat) but
printed with much less precision compared to the input file. If you
need this information for further calculations, use the input file
value."""
cmd = "egrep -m1 -A3 'crystal.*axes.*units.*(a_0|alat)' %s | tail -n3 | \
%s '{print $4\" \"$5\" \"$6}'" %(self.filename, AWK)
return arr2d_from_txt(com.backtick(cmd))
[docs]
def get_alat(self, use_alat=None):
"""Lattice parameter "alat" [Bohr]. If use_alat or self.use_alat is
False, return 1.0, i.e. disbale alat.
Parameters
----------
use_alat : bool
Set use_alat and override self.use_alat.
"""
use_alat = self.use_alat if use_alat is None else use_alat
if use_alat:
cmd = r"grep -m1 'lattice parameter' %s | \
sed -re 's/.*=(.*)\s+a\.u\./\1/'" %self.filename
return float_from_txt(com.backtick(cmd))
else:
return 1.0
[docs]
def get_coords(self):
"""Cartesian start coords [Bohr] if self.alat in [Bohr]."""
verbose("getting coords")
if self.check_set_attr_lst(['_coords_symbols', 'alat']):
return self._coords_symbols['coords'] * self.alat
else:
return None
[docs]
def get_symbols(self):
verbose("getting symbols")
if self.check_set_attr('_coords_symbols'):
return self._coords_symbols['symbols']
else:
return None
[docs]
def get_stress(self):
"""Stress tensor [kbar]."""
return self.raw_slice_get('stress', sl=0, axis=self.timeaxis)
[docs]
def get_etot(self):
"""Total enery [Ry]."""
return self.raw_slice_get('etot', sl=0, axis=0)
[docs]
def get_forces(self):
"""Forces [Ry / Bohr]."""
if self.check_set_attr('natoms'):
# Assume that the first forces block printed are the forces on the
# ions. Skip vdw forces or whatever else is printed after that.
# Users can use self._forces_raw if they want and know what is
# printed in which order in the output file.
forces = self.raw_slice_get('forces', sl=0, axis=self.timeaxis)
return None if forces is None else forces[:self.natoms,:]
else:
return None
[docs]
def get_nstep_scf(self):
return self.raw_slice_get('nstep_scf', sl=0, axis=0)
[docs]
def get_cell(self):
"""Start cell [Bohr].
Apply self.alat unit to _cell_2d."""
if self.check_set_attr_lst(['_cell_2d', 'alat']):
return self._cell_2d * self.alat
else:
return None
[docs]
def get_natoms(self):
verbose("getting natoms")
cmd = r"grep -m 1 'number.*atoms/cell' %s | \
sed -re 's/.*=\s+([0-9]+).*/\1/'" %self.filename
return int_from_txt(com.backtick(cmd))
[docs]
def get_nkpoints(self):
verbose("getting nkpoints")
cmd = r"grep -m 1 'number of k points=' %s | \
sed -re 's/.*points=\s*([0-9]+)\s*.*/\1/'" %self.filename
return int_from_txt(com.backtick(cmd))
[docs]
def get_scf_converged(self):
verbose("getting scf_converged")
cmd = "grep 'convergence has been achieved in.*iterations' %s" %self.filename
if com.backtick(cmd).strip() != "":
return True
else:
return False
[docs]
class PwMDOutputFile(TrajectoryFileParser, PwSCFOutputFile):
"""Parse pw.x MD-like output.
Tested so far: md, relax, vc-relax. For vc-md, see PwVCMDOutputFile.
Notes
-----
Units: Notes on units for PwSCFOutputFile, esp. alat, apply here as well.
Additionally, the ATOMIC_POSITIONS and CELL_PARAMETERS blocks can have an
optional unit, which we account for. See get_cell(), get_coords() and
methods called in there.
| ATOMIC_POSITIONS <empty> | bohr | angstrom | alat | crystal
| CELL_PARAMETERS <empty> | (alat=...) | bohr | angstrom | alat
In each case, the quantity is multiplied by alat if applicable and
converted to Bohr, which is PWscf's default length, and later to Ang by
default (or whatever self.units['length'] does).
Initial SCF run: A special "feature" of pwscf is that SCF coords+cell
output is printed differently from MD-like output (where we have
ATOMIC_POSITIONS and CELL_PARAMETERS blocks). Since this parser uses only
the latter, the first etot+coords+cell+stress+... is skipped, i.e. the
complete iteration=0 = initial SCF run. Therefore, if you use
``tr=io.read_pw_md('pw.out')``, ``tr[0]`` is actually NOT your start input
structure! It is the first structure of the MD/relax. This may be a problem
if you need to accurately calculate differences between initial and final
relax structs, for instance. Then use::
>>> st = io.read_pw_scf('pw.out') # parse initial SCF output only: step=0
>>> tr_md = io.read_pw_md('pw.out') # parse MD-like output: step=1...end
>>> tr = crys.concatenate((st, tr_md))
"""
[docs]
def __init__(self, filename=None, use_alat=True, **kwds):
# update default_units *before* calling base class' __init__, where
# self.units is assembled from default_units
self.default_units.update({'time': constants.tryd / constants.fs})
TrajectoryFileParser.__init__(self, filename=filename, **kwds)
self.attr_lst = [\
'coords',
'coords_frac',
'symbols',
'stress',
'etot',
'ekin',
'temperature',
'forces',
'nstep_scf',
'cell',
'natoms',
'nkpoints',
'timestep',
]
self.init_attr_lst()
self.use_alat = use_alat
def _get_block_header_unit(self, key):
"""Parse things like
ATOMIC_POSITIONS -> None
ATOMIC_POSITIONS unit -> unit
ATOMIC_POSITIONS (unit) -> unit
ATOMIC_POSITIONS {unit} -> unit
CELL_PARAMETERS (alat=1.23) -> alat
Parameters
----------
key : str (e.g. 'ATOMIC_POSITIONS')
Returns
-------
str : unit
"""
assert key not in ['', None], "got illegal string"
cmd = 'grep -m1 %s %s' %(key, self.filename)
tmp = com.backtick(cmd).strip()
for sym in ['(', ')', '{', '}']:
tmp = tmp.replace(sym, '')
tmp = tmp.split()
if len(tmp) < 2:
return None
else:
return tmp[1].split('=')[0]
def _get_coords(self):
"""Parse ATOMIC_POSITIONS block. Unit is handled by get_coords_unit()."""
verbose("getting _coords")
if self.check_set_attr('natoms'):
self.try_set_attr('natoms')
natoms = self.natoms
# nstep: get it from outfile b/c the value in any input file will be
# wrong if the output file is a concatenation of multiple smaller files
key = 'ATOMIC_POSITIONS'
cmd = 'grep -c %s %s' %(key, self.filename)
nstep = nstep_from_txt(com.backtick(cmd))
# coords
cmd = "grep -A%i '%s' %s | grep -v -e %s -e '--' | \
%s '{print $2\" \"$3\" \"$4}'" \
%(natoms, key, self.filename, key, AWK)
return traj_from_txt(com.backtick(cmd),
shape=(nstep,natoms,3),
axis=self.timeaxis)
else:
return None
def _get_cell_3d(self):
"""Parse CELL_PARAMETERS block.
The block unit is ignored here. Only the content of the block is parsed
(i.e. the CELL_PARAMETERS as they are in the file). See also
``_get_block_header_unit()`` and ``get_cell``.
"""
verbose("getting _cell_3d")
# nstep
key = 'CELL_PARAMETERS'
cmd = 'grep -c %s %s' %(key, self.filename)
nstep = nstep_from_txt(com.backtick(cmd))
# cell
cmd = "grep -A3 %s %s | grep -v -e %s -e '--'" %(key, self.filename, key)
return traj_from_txt(com.backtick(cmd),
shape=(nstep,3,3),
axis=self.timeaxis)
def _get_cell_3d_factors(self):
"""Parse CELL_PARAMETERS unit factor printed at each time step.
::
CELL_PARAMETERS (alat= 22.75306514)
Returns
-------
alat_values : 1d array (nstep,) or None
1d array with alat for each time step if 'CELL_PARAMETERS.*alat' is
found; None if not found or if use_alat=False.
Notes
-----
During one run, that alat value doesn't change and is the same as alat
returned by ``get_alat()``. But it is needed if you parse an output
file which is a concatenation of multiple smaller files from vc-md runs
where that alat value has changed. This happens if a vc-md is
restarted. Then the new alat in the restart run is that of the last
cell of the old run.
"""
cmd = 'grep -m1 CELL_PARAMETERS.*alat.*= %s' %self.filename
if com.backtick(cmd).strip() == '':
return None
else:
if self.use_alat:
cmd = r"grep CELL_PARAMETERS %s | sed -re 's/.*alat.*=\s*(" \
%self.filename + regex.float_re + r")\)*.*/\1/g'"
return arr1d_from_txt(com.backtick(cmd))
else:
return None
def _match_nstep(self, arr):
"""Get nstep from _coords.shape[0] and return the last nstep steps from
the array `arr` along self.timeaxis.
Used to match forces,stress,... etc to coords b/c for MDs, the former
ones are always one step longer b/c of stuff printed in the first SCF
loop before the MD starts."""
if arr is not None:
if self.check_set_attr('_coords'):
nstep = self._coords.shape[self.timeaxis]
if arr.shape[self.timeaxis] > nstep:
return num.slicetake(arr, slice(-nstep,None,None),
axis=self.timeaxis)
else:
return arr
else:
return None
else:
return None
[docs]
def get_coords_unit(self):
verbose("getting coords_unit")
return self._get_block_header_unit('ATOMIC_POSITIONS')
[docs]
def get_cell_unit(self):
verbose("getting cell_unit")
return self._get_block_header_unit('CELL_PARAMETERS')
[docs]
def get_coords(self):
"""Cartesian coords [Bohr]. If coords_unit='alat', then [Bohr] if
self.alat in [Bohr]."""
if self.check_set_attr_lst(['_coords', 'coords_unit', 'alat']):
if self.coords_unit in ['bohr', None]:
return self._coords
elif self.coords_unit == 'alat':
return self._coords * self.alat
elif self.coords_unit == 'angstrom':
return self._coords * Angstrom / Bohr
else:
return None
else:
return None
[docs]
def get_cell(self):
"""Cell [Bohr]. Return 3d array from CELL_PARAMETERS or 2d array
self._cell_2d. Beware: complicated units logic ahead!"""
# From the manual, regarding the unit of CELL_PARAMETERS in the input
# file:
#
# bohr / angstrom: lattice vectors in bohr radii / angstrom.
# alat or nothing specified: if a lattice constant (celldm(1)
# or a) is present, lattice vectors are in units of the lattice
# constant; otherwise, in bohr radii or angstrom, as specified.
#
# .. yo!
# MD-like case
if self.check_set_attr('_cell_3d'):
# CELL_PARAMETERS (alat=...) | bohr | angstrom | alat
if self.check_set_attr('cell_unit'):
if self.cell_unit == 'bohr':
return self._cell_3d
elif self.cell_unit == 'alat':
if self.check_set_attr('_cell_3d_factors'):
return self._cell_3d * self._cell_3d_factors[:,None,None]
elif self.check_set_attr('alat'):
return self._cell_3d * self.alat
else:
return None
elif self.cell_unit == 'angstrom':
return self._cell_3d * Angstrom / Bohr
else:
return None
# CELL_PARAMETERS <empty>
else:
if self.check_set_attr('alat'):
return self._cell_3d * self.alat
else:
return None
# return start cell 2d, will be broadcast to 3d in Trajectory
elif self.check_set_attr_lst(['_cell_2d', 'alat']):
return self._cell_2d * self.alat
else:
return None
[docs]
def get_coords_frac(self):
"""Fractional coords."""
if self.check_set_attr_lst(['_coords', 'coords_unit']):
if self.coords_unit == 'crystal':
return self._coords
else:
return None
else:
return None
[docs]
def get_ekin(self):
"""Ion kinetic energy [Ry]."""
verbose("getting ekin")
cmd = r"grep 'kinetic energy' %s | %s '{print $5}'" %(self.filename,
AWK)
return arr1d_from_txt(com.backtick(cmd))
[docs]
def get_temperature(self):
"""Temperature [K]"""
verbose("getting temperature")
cmd = r"egrep 'temperature[ ]*=' %s " %self.filename + \
r"| sed -re 's/.*temp.*=\s*(" + regex.float_re + \
r")\s*K/\1/'"
return arr1d_from_txt(com.backtick(cmd))
[docs]
def get_timestep(self):
"""Time step [tryd]."""
cmd = r"grep -m1 'Time.*step' %s | sed -re \
's/.*step\s+=\s+(.*)a.u..*/\1/'" %self.filename
return float_from_txt(com.backtick(cmd))
[docs]
def get_stress(self):
"""Stress tensor [kbar]."""
return self._match_nstep(self.raw_return('stress'))
[docs]
def get_etot(self):
"""[Ry] """
return self._match_nstep(self.raw_return('etot'))
[docs]
def get_forces(self):
"""[Ry / Bohr] """
if self.check_set_attr('natoms'):
forces = self._match_nstep(self.raw_return('forces'))
return forces[:,:self.natoms,:]
else:
return None
[docs]
def get_nstep_scf(self):
return self.raw_return('nstep_scf')
[docs]
class PwVCMDOutputFile(PwMDOutputFile):
"""Parse only calculation='vc-md'."""
[docs]
def __init__(self, *args, **kwds):
PwMDOutputFile.__init__(self, *args, **kwds)
self.set_attr_lst(self.attr_lst + ['econst'])
def _get_datadct(self):
verbose("getting _datadct")
cmd = "grep 'Ekin.*T.*Etot' %s \
| %s '{print $3\" \"$7\" \"$11}'" %(self.filename, AWK)
ret_str = com.backtick(cmd)
if ret_str.strip() == '':
return None
else:
data = np.atleast_2d(np.loadtxt(StringIO(ret_str)))
return {'ekin': data[:,0],
'temperature': data[:,1],
'econst': data[:,2]}
[docs]
def get_ekin(self):
verbose("getting ekin")
self.try_set_attr('_datadct')
return self._datadct['ekin']
[docs]
def get_econst(self):
verbose("getting econst")
self.try_set_attr('_datadct')
return self._datadct['econst']
[docs]
def get_temperature(self):
verbose("getting temperature")
self.try_set_attr('_datadct')
return self._datadct['temperature']
[docs]
class CpmdSCFOutputFile(StructureFileParser):
"""Parse output from a CPMD "single point calculation" (wave function
optimization).
Some extra files are assumed to be in the same directory as self.filename.
extra files::
GEOMETRY.scale
Notes
-----
* The SYSTEM section must have SCALE such that a file GEOMETRY.scale is
written.
* To have forces in the output, use PRINT ON FORCES COORDINATES in the CPMD
section::
&CPMD
OPTIMIZE WAVEFUNCTION
CONVERGENCE ORBITALS
1.0d-7
PRINT ON FORCES COORDINATES
STRESS TENSOR
1
ODIIS NO_RESET=10
20
MAXITER
100
FILEPATH
/tmp
&END
&SYSTEM
SCALE
....
&END
"""
default_units = \
{'energy': Ha / eV, # Ha -> eV
'length': Bohr / Angstrom, # Bohr -> Angstrom
'forces': Ha / eV * Angstrom / Bohr, # Ha / Bohr -> eV / Angstrom
'stress': 0.1, # kbar -> GPa
}
[docs]
def __init__(self, *args, **kwds):
StructureFileParser.__init__(self, *args, **kwds)
self.attr_lst = [\
'cell',
'stress',
'etot',
'coords_frac',
'symbols',
'forces',
'natoms',
'nkpoints',
'nstep_scf',
'scf_converged',
]
self.init_attr_lst()
def _get_coords_forces(self):
"""Low precision cartesian coords [Bohr] + forces [Ha / Bohr] I guess.
Only printed in this form if we use
&CPMD
PRINT ON COORDINATES FORCES
&END
Forces with more precision are printed in the files TRAJECTORY or
FTRAJECTORY, but only for MD runs.
"""
verbose("getting _coords_forces")
self.try_set_attr('natoms')
if self.is_set_attr('natoms'):
cmd = "egrep -A%i 'ATOM[ ]+COORDINATES[ ]+GRADIENTS' %s \
| tail -n%i \
| %s '{print $3\" \"$4\" \"$5\" \"$6\" \"$7\" \"$8}'" \
%(self.natoms, self.filename, self.natoms, AWK)
return arr2d_from_txt(com.backtick(cmd))
else:
return None
def _get_scale_file(self):
"""Read GEOMETRY.scale file with fractional coords."""
fn = os.path.join(self.basedir, 'GEOMETRY.scale')
if os.path.exists(fn):
cmd = "grep -A3 'CELL MATRIX (BOHR)' %s | tail -n3" %fn
cell = arr2d_from_txt(com.backtick(cmd))
self.assert_set_attr('natoms')
cmd = "grep -A%i 'SCALED ATOMIC COORDINATES' %s | tail -n%i" \
%(self.natoms, fn, self.natoms)
arr = arr2d_from_txt(com.backtick(cmd), dtype=str)
coords_frac = arr[:,:3].astype(np.float64)
symbols = arr[:,3].tolist()
return {'coords_frac': coords_frac,
'symbols': symbols,
'cell': cell}
else:
return None
def _get_cell_2d(self):
"""2d array `cell` [Bohr] for fixed-cell MD or SCF from GEOMETRY.scale
file."""
verbose("getting _cell_2d")
if self.check_set_attr('_scale_file'):
return self._scale_file['cell']
else:
return None
[docs]
def get_cell(self):
"""2d cell [Bohr]"""
verbose("getting cell")
return self.get_return_attr('_cell_2d')
[docs]
def get_stress(self):
"""[kbar]"""
verbose("getting stress")
cmd = "grep -A3 'TOTAL STRESS TENSOR' %s | tail -n3" %self.filename
return arr2d_from_txt(com.backtick(cmd))
[docs]
def get_etot(self):
"""[Ha]"""
verbose("getting etot")
cmd = r"grep 'TOTAL ENERGY =' %s | tail -n1 | %s '{print $5}'" \
%(self.filename, AWK)
return float_from_txt(com.backtick(cmd))
[docs]
def get_coords_frac(self):
verbose("getting coords_frac")
if self.check_set_attr('_scale_file'):
return self._scale_file['coords_frac']
else:
return None
[docs]
def get_symbols(self):
verbose("getting symbols")
if self.check_set_attr('_scale_file'):
return self._scale_file['symbols']
else:
return None
[docs]
def get_forces(self):
"""[Ha / Bohr]"""
verbose("getting forces")
if self.check_set_attr('_coords_forces'):
return self._coords_forces[:,3:]
else:
return None
[docs]
def get_natoms(self):
"""Number of atoms. Apparently only printed as "NUMBER OF ATOMS ..." in
the SCF case, not in MD. So we use "grep -c" on the GEOMETRY file, which
has `natoms` lines (normally) and 6 colunms. Sometimes (so far seen in
variable cell calcs) there are some additional lines w/ 3 columns,
which we skip."""
verbose("getting natoms")
fn = os.path.join(self.basedir, 'GEOMETRY')
if os.path.exists(fn):
cmd = "egrep -c '([0-9][ ]+.*){5,}' %s" %fn
return int_from_txt(com.backtick(cmd))
else:
return None
[docs]
def get_nkpoints(self):
verbose("getting nkpoints")
cmd = r"grep 'NUMBER OF SPECIAL K POINTS' %s | \
sed -re 's/.*COORDINATES\):\s*([0-9]+)\s*.*/\1/'" %self.filename
return int_from_txt(com.backtick(cmd))
[docs]
def get_nstep_scf(self):
verbose("getting nstep_scf")
cmd = r"grep -B2 'RESTART INFORMATION WRITTEN' %s | head -n1 \
| %s '{print $1}'" %(self.filename, AWK)
return int_from_txt(com.backtick(cmd))
[docs]
def get_scf_converged(self):
verbose("getting scf_converged")
cmd = "grep 'BUT NO CONVERGENCE' %s" %self.filename
if com.backtick(cmd).strip() == "":
return True
else:
return False
[docs]
class CpmdMDOutputFile(TrajectoryFileParser, CpmdSCFOutputFile):
"""Parse CPMD MD output.
Works with BO-MD and CP-MD, fixed and variable cell. Some attrs may be None
or have different shapes (2d va 3d arrays) depending on what type of MD is
parsed and what info/files are available.
Notes for the comments below::
{A,B,C} = A or B or C
(A) = A is optional
(A (B)) = A is optional, but only if present, B is optional
Extra files which will be parsed and MUST be present::
GEOMETRY.scale
GEOMETRY
TRAJECTORY
ENERGIES
Extra files which will be parsed and MAY be present depending on the type
of MD::
(FTRAJECTORY)
(CELL)
(STRESS)
Notes
-----
The input should look like that::
&CPMD
MOLECULAR DYNAMICS {BO,CP}
(PARRINELLO-RAHMAN (NPT))
PRINT ON FORCES COORDINATES
TRAJECTORY XYZ FORCES
STRESS TENSOR
<step>
...
&END
&SYSTEM
SCALE
...
&END
Tested with CPMD 3.15.1, the following extra files are always written::
GEOMETRY.scale
GEOMETRY
TRAJECTORY
ENERGIES
In the listing below, we show which extra files are written (+) or not (-)
if the input follows the example above.
Also, the order of columns in the ENERGIES file depends on what type of MD
we are running. In case of BO-MD it depends on the kind of wavefunction
optimizer, too! This is most unpleasant. Currently we rely on the fact that
each tested case has a different number of columns, but this is very
hackish b/c it is not guaranteed to be unique! Maybe, we should let the
user set self.energies_order or a keywords mdtype={'cp-npt', 'bo', etc}
instead of subclassed for each case.
This is what we tested so far (cpmd 3.15.1). For BO-MD + ODIIS, some
columns are always 0.0, but all are there (e.g. EKINC is there but 0.0 b/c
not defined for BO, only CP). For BO-MD, we list the wf optimizer (xxx for
CP b/c there is none)::
MOLECULAR DYNAMICS BO
+FTRAJECTORY
-CELL
-STRESS # why!?
ODISS
NFI EKINC TEMPP EKS ECLASSIC EHAM DIS TCPU
LANCZOS DIAGONALIZATION
NFI TEMPP EKS ECLASSIC DIS TCPU
MOLECULAR DYNAMICS CP
+FTRAJECTORY
-CELL
+STRESS
xxx
NFI EKINC TEMPP EKS ECLASSIC EHAM DIS TCPU
MOLECULAR DYNAMICS BO
PARRINELLO-RAHMAN
not implemented !
MOLECULAR DYNAMICS CP
PARRINELLO-RAHMAN
-FTRAJECTORY # why!?
+CELL
+STRESS
xxx
NFI EKINC EKINH TEMPP EKS ECLASSIC EHAM DIS TCPU
MOLECULAR DYNAMICS BO
PARRINELLO-RAHMAN NPT
-FTRAJECTORY # why!?
+CELL
+STRESS
ODIIS
NFI EKINC EKINH TEMPP EKS ECLASSIC EHAM DIS TCPU
MOLECULAR DYNAMICS CP
PARRINELLO-RAHMAN NPT
-FTRAJECTORY # why!?
+CELL
+STRESS
xxx
NFI EKINC EKINH TEMPP EKS ECLASSIC EHAM DIS TCPU
"""
[docs]
def __init__(self, *args, **kwds):
"""
Parameters
----------
filename : file to parse
"""
self.default_units.update(\
{'time': constants.thart / constants.fs, # thart -> fs
'velocity': Bohr / Ang * fs / thart, # Bohr / thart -> Ang / fs
})
TrajectoryFileParser.__init__(self, *args, **kwds)
self.attr_lst = [\
'cell',
'coords',
'econst',
'ekin',
'ekin_cell',
'ekin_elec',
'etot',
'forces',
'natoms',
'stress',
'symbols',
'temperature',
'temperature_cell',
'timestep',
'velocity',
]
self.init_attr_lst()
self._energies_order = {\
9:\
{'nfi': 0,
'ekinc': 1,
'ekinh': 2,
'tempp': 3,
'eks': 4,
'eclassic': 5,
'eham': 6,
'dis': 7,
'tcpu': 8},
8:\
{'nfi': 0,
'ekinc': 1,
'tempp': 2,
'eks': 3,
'eclassic': 4,
'eham': 5,
'dis': 6,
'tcpu': 7},
6:\
{'nfi': 0,
'tempp': 1,
'eks': 2,
'eclassic': 3,
'dis': 4,
'tcpu': 5},
}
def _get_energies_file(self):
verbose("getting _energies_file")
fn = os.path.join(self.basedir, 'ENERGIES')
if os.path.exists(fn):
arr = np.loadtxt(fn)
ncols = arr.shape[-1]
if ncols not in list(self._energies_order.keys()):
raise Exception("only %s columns supported in "
"ENERGIES file" %str(list(self._energies_order.keys())))
else:
order = self._energies_order[ncols]
dct = {}
for key, idx in order.items():
dct[key] = arr[:,idx]
del arr
return dct
else:
return None
def _get_coords_vel_forces(self):
verbose("getting _coords_vel_forces")
"""Parse (F)TRAJECTORY file. Ignore lines which say
"<<<<<< NEW DATA >>>>>>" from restarts.
"""
# cols (both files):
# 0: natoms x nfi (natoms x 1, natoms x 2, ...)
# 1-3: x,y,z cartesian coords [Bohr]
# 4-6: x,y,z cartesian velocites [Bohr / thart ]
# thart = Hartree time = 0.024189 fs
# FTRAJECTORY extra:
# 7-9: x,y,z cartesian forces [Ha / Bohr]
self.assert_set_attr('natoms')
have_file = False
have_forces = False
fn_tr = os.path.join(self.basedir, 'TRAJECTORY')
fn_ftr = os.path.join(self.basedir, 'FTRAJECTORY')
if os.path.exists(fn_ftr):
have_forces = True
have_file = True
ncols = 10
fn = fn_ftr
elif os.path.exists(fn_tr):
have_forces = False
have_file = True
ncols = 7
fn = fn_tr
if have_file:
cmd = "grep -c -v '<<<<' %s" %fn
nlines = int_from_txt(com.backtick(cmd))
nstep = float(nlines) / float(self.natoms)
assert nstep % 1.0 == 0.0, (str(self.__class__) + \
"nlines is not a multiple of nstep in %s" %fn)
nstep = int(nstep)
# Need to use the slower arrayio.readtxt() here instead of
# traj_from_txt() which uses fast fromstring() b/c we have
# comments='<<<<'. The other way would be to use
# common.backtick("grep -v '<<<<' ...")) the text such that we have
# only numbers in it and then pass that to traj_from_txt().
arr = arrayio.readtxt(fn, axis=self.timeaxis, shape=(nstep, self.natoms, ncols),
comments='<<<<')
dct = {}
dct['coords'] = arr[...,1:4]
dct['velocity'] = arr[...,4:7]
dct['forces'] = arr[...,7:] if have_forces else None
return dct
else:
return None
[docs]
def get_ekin(self):
if self.check_set_attr('_energies_file'):
return self._energies_file['eclassic']
else:
return None
[docs]
def get_cell(self):
verbose("getting cell")
"""Parse CELL file [Bohr]. If CELL is not there, return 2d cell
from GEOMETRY.scale (self.cell from CpmdSCFOutputFile)."""
# So far tested CELL files have 6 cols:
# 1-3: x,y,z cell vectors
# 4-6: cell forces? ditch them for now ...
fn = os.path.join(self.basedir, 'CELL')
if os.path.exists(fn):
cmd = "head -n2 %s | tail -n1 | wc | %s '{print $2}'" %(fn, AWK)
ncols = int_from_txt(com.backtick(cmd))
cmd = "grep -c 'CELL PARAMETERS' %s" %fn
nstep = int_from_txt(com.backtick(cmd))
cmd = "grep -A3 'CELL PARAMETERS' %s | grep -v 'CELL'" %fn
arr = traj_from_txt(com.backtick(cmd),
shape=(nstep,3,ncols),
axis=self.timeaxis)
return arr[...,:3]
else:
if self.check_set_attr('_cell_2d'):
return self._cell_2d
else:
return None
[docs]
def get_coords(self):
verbose("getting coords")
"""Cartesian coords [Bohr]."""
req = '_coords_vel_forces'
self.try_set_attr(req)
return self._coords_vel_forces['coords'] if self.is_set_attr(req) \
else None
[docs]
def get_econst(self):
"""[Ha]"""
verbose("getting econst")
req = ['_energies_file', 'etot']
self.try_set_attr_lst(req)
if self.is_set_attr_lst(req):
if 'eham' in self._energies_file:
return self._energies_file['eham']
else:
return self.etot
else:
return None
[docs]
def get_ekinc(self):
verbose("getting ekinc")
"""Fictitious electron kinetic energy [Ha]."""
req = '_energies_file'
self.try_set_attr(req)
if self.is_set_attr(req) and 'ekinc' in self._energies_file:
return self._energies_file['ekinc']
else:
return None
[docs]
def get_etot(self):
verbose("getting etot")
"""Totat energy = EKS = Kohn Sham energy? [Ha]."""
req = '_energies_file'
self.try_set_attr(req)
return self._energies_file['eks'] if self.is_set_attr(req) \
else None
[docs]
def get_ekinh(self):
verbose("getting ekinh")
"""Fictitious cell kinetic energy [Ha].
From prcpmd.F:
EKINH [J] = 9/2 * kb [J/K] * TEMPH [K]
EKINH [Ha] = 9/2 * kb [J/K] * TEMPH [K] / Ha
where TEMPH is the fictitious cell temperature.
"""
req = '_energies_file'
self.try_set_attr(req)
if self.is_set_attr(req) and 'ekinh' in self._energies_file:
return self._energies_file['ekinh']
else:
return None
# alias
[docs]
def get_ekin_cell(self):
verbose("getting ekin_cell")
return self.get_ekinh()
# alias
[docs]
def get_ekin_elec(self):
verbose("getting ekin_elec")
return self.get_ekinc()
[docs]
def get_temperature_cell(self):
"""[K]"""
verbose("getting temperature_cell")
req = 'ekin_cell'
self.try_set_attr(req)
if self.is_set_attr(req):
return self.ekin_cell * 2.0 / 9.0 / constants.kb * constants.Ha
else:
return None
[docs]
def get_forces(self):
"""Cartesian forces [Ha/Bohr]."""
verbose("getting forces")
req = '_coords_vel_forces'
self.try_set_attr(req)
return self._coords_vel_forces['forces'] if self.is_set_attr(req) \
else None
[docs]
def get_stress(self):
"""Stress tensor from STRESS file if available [kbar]"""
verbose("getting stress")
fn = os.path.join(self.basedir, 'STRESS')
if os.path.exists(fn):
cmd = "grep -c 'TOTAL STRESS' %s" %fn
nstep = int_from_txt(com.backtick(cmd))
cmd = "grep -A3 'TOTAL STRESS TENSOR' %s | grep -v TOTAL" %fn
return traj_from_txt(com.backtick(cmd),
shape=(nstep,3,3),
axis=self.timeaxis)
else:
return None
[docs]
def get_temperature(self):
"""[K]"""
verbose("getting temperature")
req = '_energies_file'
self.try_set_attr(req)
return self._energies_file['tempp'] if self.is_set_attr(req) \
else None
[docs]
def get_velocity(self):
verbose("getting velocity")
"""Cartesian velocity [Bohr / thart]. Not sure about the unit!"""
if self.check_set_attr('_coords_vel_forces'):
return self._coords_vel_forces['velocity']
else:
return None
[docs]
def get_timestep(self):
"""Timestep [thart]."""
cmd = r"grep 'TIME STEP FOR IONS' %s | \
sed -re 's/.*IONS:\s+(.*)$/\1/'" %self.filename
return float_from_txt(com.backtick(cmd))
[docs]
class Cp2kSCFOutputFile(StructureFileParser):
"""CP2K SCF output parser ("global/run_type energy_force,print_level low").
Notes
-----
* Since we mainly use "global/print_level low", we don't bother to
special-case for "global/print_level medium". Therefore, we don't extract
cell and coords. SCF runs are only done for convergence tests, so forces,
etot and stress are important.
* It seems that with default &print settings, SCF runs write the stress
tensor in GPa, while for MD, the default is bar. Thank you very much!
"""
default_units = \
{'energy': Ha / eV, # Ha -> eV
'forces': Ha / eV * Angstrom / Bohr, # Ha / Bohr -> eV / Angstrom
}
[docs]
def __init__(self, *args, **kwds):
StructureFileParser.__init__(self, *args, **kwds)
self.attr_lst = [\
'natoms',
'etot',
'forces',
'stress',
'symbols',
]
self.init_attr_lst()
def _get_run_type(self):
cmd = r"grep -m1 'GLOBAL.*Run type' {0} | sed \
-re 's/.*type\s+(.*)\s*/\1/'".format(self.filename)
return com.backtick(cmd).strip()
def _get_natoms_symbols_forces(self):
cmd = r"sed -nre '1,/ATOMIC FORCES/d; " + \
r"1,/Atom\s+Kind\s+Element/d; " + \
r"/SUM OF ATOMIC FORCES/q;p' %s" %self.filename
ret = com.backtick(cmd).strip()
if ret != '':
arr = np.array([x.split() for x in ret.splitlines()])
return {'natoms': arr.shape[0],
'symbols': arr[:,2].tolist(),
'forces': arr[:,3:].astype(float)}
else:
return None
[docs]
def get_natoms(self):
if self.check_set_attr('_natoms_symbols_forces'):
return self._natoms_symbols_forces['natoms']
else:
return None
[docs]
def get_symbols(self):
if self.check_set_attr('_natoms_symbols_forces'):
return self._natoms_symbols_forces['symbols']
else:
return None
[docs]
def get_forces(self):
"""[Ha/Bohr]"""
if self.check_set_attr('_natoms_symbols_forces'):
return self._natoms_symbols_forces['forces']
else:
return None
[docs]
def get_etot(self):
"""[Ha]"""
cmd = r"sed -nre 's/.*ENERGY.*Total.*energy.*:(.*)/\1/p' %s" %self.filename
return float_from_txt(com.backtick(cmd))
[docs]
def get_stress(self):
"""[GPa]"""
cmd = r"grep -A5 'STRESS TENSOR.*GPa' %s | egrep -v 'X[ ]+Y[ ]+Z' | \
egrep '^[ ]+(X|Y|Z)'" %self.filename
ret = com.backtick(cmd).strip()
arr = np.array([x.split() for x in ret.splitlines()])
return arr[:,1:].astype(float)
[docs]
class Cp2kMDOutputFile(TrajectoryFileParser, Cp2kSCFOutputFile):
"""CP2K MD output parser. Tested with cp2k v2.4, "global/run_type
md,print_level low".
"""
[docs]
def __init__(self, *args, **kwds):
self.default_units['stress'] = 1e-4 # bar -> GPa
self.default_units['velocity'] = Bohr/thart / Ang*fs # Bohr/thart -> Ang/fs
TrajectoryFileParser.__init__(self, *args, **kwds)
self.attr_lst = [\
'cell',
'coords',
'econst',
'ekin',
'etot',
'forces',
'natoms',
'stress',
'symbols',
'temperature',
'timestep',
'velocity',
]
self.init_attr_lst()
self._cell_file = common.pj(self.basedir, 'PROJECT-1.cell')
self._ener_file = common.pj(self.basedir, 'PROJECT-1.ener')
self._stress_file = common.pj(self.basedir, 'PROJECT-1.stress')
self._pos_file = common.pj(self.basedir, 'PROJECT-pos-1.xyz')
self._frc_file = common.pj(self.basedir, 'PROJECT-frc-1.xyz')
self._vel_file = common.pj(self.basedir, 'PROJECT-vel-1.xyz')
@staticmethod
def _cp2k_repack_arr(arr):
"""Convert arr, which is an unrolled (nstep,3,3) array, back."""
out = np.empty((arr.shape[0],3,3), dtype=float)
out[:,0,0] = arr[:,2]
out[:,0,1] = arr[:,3]
out[:,0,2] = arr[:,4]
out[:,1,0] = arr[:,5]
out[:,1,1] = arr[:,6]
out[:,1,2] = arr[:,7]
out[:,2,0] = arr[:,8]
out[:,2,1] = arr[:,9]
out[:,2,2] = arr[:,10]
return out
def _cp2k_xyz2arr(self, fn):
"""Parse cp2k style XYZ files and return the 3d array."""
cmd = "grep -c 'i = .*E =' %s" %fn
nstep = nstep_from_txt(com.backtick(cmd))
natoms = int_from_txt(com.backtick("head -n1 %s" %fn))
cmd = "%s '!/i =.*E =|^[ ]+[0-9]+/ \
{print $2\" \"$3\" \"$4}' %s" %(AWK, fn)
assert self.timeaxis == 0
return np.fromstring(common.backtick(cmd), sep=' ').reshape(nstep,natoms,3)
def _get_cell_file_arr(self):
if os.path.exists(self._cell_file):
return np.loadtxt(self._cell_file)
else:
return None
def _get_ener_file_arr(self):
if os.path.exists(self._ener_file):
return np.loadtxt(self._ener_file)
else:
return None
def _get_stress_file_arr(self):
if os.path.exists(self._stress_file):
return np.loadtxt(self._stress_file)
else:
return None
[docs]
def get_natoms(self):
cmd = r"grep -m1 'Number of atoms:' %s | \
sed -re 's/.*:(.*)/\1/'" %self.filename
return int_from_txt(com.backtick(cmd))
[docs]
def get_timestep(self):
"""[fs]"""
cmd = r"egrep -m1 'MD\| Time Step \[fs\]' %s | \
sed -re 's/.*\](.*)/\1/'" %self.filename
return float_from_txt(com.backtick(cmd))
[docs]
def get_symbols(self):
for fn in [self._pos_file, self._frc_file, self._vel_file]:
if os.path.exists(fn) and self.check_set_attr('natoms'):
cmd = r"grep -m1 -A%i 'i =' %s | \
grep -v 'i ='| %s '{print $1}'" \
%(self.natoms, fn, AWK)
return com.backtick(cmd).strip().split()
return None
def _get_forces_from_outfile(self):
if self.check_set_attr('natoms'):
cmd = r"grep -c 'ATOMIC FORCES in' %s" %self.filename
nstep = nstep_from_txt(com.backtick(cmd))
cmd = r"sed -re '/^\s*$/d' {fn} | grep -A{nlines} 'ATOMIC FORCES in' \
| egrep -v -e 'ATOM|--|Kind' \
| tr -s ' ' | cut -d ' ' -f5-".format(fn=self.filename,
nlines=self.natoms+1)
return traj_from_txt(com.backtick(cmd),
shape=(nstep,self.natoms,3),
axis=self.timeaxis)
else:
return None
[docs]
def get_coords(self):
"""Cartesian [Ang]"""
if os.path.exists(self._pos_file):
return self._cp2k_xyz2arr(self._pos_file)
else:
return None
[docs]
def get_forces(self):
"""[Ha/Bohr]"""
if os.path.exists(self._frc_file):
return self._cp2k_xyz2arr(self._frc_file)
else:
return self._get_forces_from_outfile()
[docs]
def get_velocity(self):
"""[Bohr/thart]"""
if os.path.exists(self._vel_file):
return self._cp2k_xyz2arr(self._vel_file)
else:
return None
[docs]
def get_stress(self):
"""[bar]"""
if self.check_set_attr('_stress_file_arr'):
return self._cp2k_repack_arr(self._stress_file_arr)
else:
return None
[docs]
def get_ekin(self):
"""[Ha]"""
if self.check_set_attr('_ener_file_arr'):
return self._ener_file_arr[:,2]
else:
return None
[docs]
def get_temperature(self):
"""[K]"""
if self.check_set_attr('_ener_file_arr'):
return self._ener_file_arr[:,3]
else:
return None
[docs]
def get_etot(self):
"""[Ha]"""
if self.check_set_attr('_ener_file_arr'):
return self._ener_file_arr[:,4]
else:
return None
[docs]
def get_econst(self):
"""[Ha]"""
if self.check_set_attr('_ener_file_arr'):
return self._ener_file_arr[:,5]
else:
return None
[docs]
def get_cell(self):
"""[Ang]"""
if self.check_set_attr('_cell_file_arr'):
return self._cp2k_repack_arr(self._cell_file_arr)
else:
return None
[docs]
def get_volume(self):
"""[Ang^3]"""
if self.check_set_attr('_cell_file_arr'):
return self._cell_file_arr[:,-1]
else:
return None
[docs]
class Cp2kRelaxOutputFile(Cp2kMDOutputFile):
"""Parse cp2k global/run_type cell_opt. geo_opt might also work, but not
tested yet."""
[docs]
def get_natoms(self):
if os.path.exists(self._pos_file):
cmd = r"head -n1 {0}".format(self._pos_file)
return int_from_txt(com.backtick(cmd))
else:
return None
[docs]
def get_etot(self):
if os.path.exists(self._pos_file):
cmd = r"%s '/i =.*E/ {print $6}' %s" %(AWK, self._pos_file)
return arr1d_from_txt(com.backtick(cmd))
else:
return None
[docs]
def get_cell(self):
# For cell_opt, cp2k does a final scf calc after the cell optimization.
# That creates an additional time step in _pos_file, but NOT in
# _cell_file. Then, coords[-1,...] and coords[-2,...] are the same b/c
# the final geometry is simply scf'ed again. Also etot is one step
# longer. Handle that by appending cell[-1,...] to the end of the cell
# array in order to have equal length. Need that to make get_traj()
# happy.
if self.check_set_attr('_cell_file_arr'):
cell = self._cp2k_repack_arr(self._cell_file_arr)
self.assert_set_attr('_run_type')
if self._run_type == 'CELL_OPT':
if self.check_set_attr('coords'):
offset = self.coords.shape[0] - cell.shape[0]
if offset == 1:
return np.concatenate((cell, cell[-1,...][None,...]),
axis=0)
else:
raise Exception("cell and coords have a timestep "
"offset != 1, dunno what to do "
"(offset={0}, coords: {1}, cell: {2})".format(offset,self.coords.shape, cell.shape))
else:
return None
# GEO_OPT not tested yet. Simply return cell for now unchanged, if
# defined.
else:
return cell
else:
return None
[docs]
class DcdOutputFile:
"""Base class which implements dcd file reading. Used only for
inheritance."""
_dcd_convang = False
def _get_dcd_data(self):
if os.path.exists(self.dcdfilename):
cryst_const, coords = dcd.read_dcd_data(self.dcdfilename, convang=self._dcd_convang)
return {'cryst_const': cryst_const,
'coords': coords,
'nstep': cryst_const.shape[0],
'natoms': coords.shape[1],
'timestep': dcd.read_dcd_header(self.dcdfilename)['timestep']}
else:
return None
[docs]
def get_coords(self):
if self.check_set_attr('_dcd_data'):
return self._dcd_data['coords']
else:
return None
[docs]
def get_cryst_const(self):
if self.check_set_attr('_dcd_data'):
return self._dcd_data['cryst_const']
else:
return None
[docs]
def get_natoms(self):
if self.check_set_attr('_dcd_data'):
return self._dcd_data['natoms']
else:
return None
[docs]
def get_nstep(self):
if self.check_set_attr('_dcd_data'):
return self._dcd_data['nstep']
else:
return None
# Avoid reading PROJECT-1.cell (cp2k) or any other text output (lammps)
# with cell info even if it exists. We need to make sure that we read the
# unit cell from the dcd file. This is essential in cp2k when using
# dcd_aligned_cell.
[docs]
def get_cell(self):
return None
[docs]
def get_volume(self):
return None
[docs]
class Cp2kDcdMDOutputFile(DcdOutputFile, Cp2kMDOutputFile):
"""Same as :class:`Cp2kMDOutputFile` (all ``PROJECT*`` files are text),
only that the coordinates file is a dcd format binary file
``PROJECT-pos-1.dcd``."""
[docs]
def __init__(self, *args, **kwds):
super(Cp2kDcdMDOutputFile, self).__init__(*args, **kwds)
self.dcdfilename = common.pj(self.basedir, 'PROJECT-pos-1.dcd')
self._dcd_convang = False
self.attr_lst = [\
'cryst_const',
'coords',
'econst',
'ekin',
'etot',
'forces',
'natoms',
'stress',
'symbols',
'temperature',
'timestep',
'velocity',
]
self.init_attr_lst()
[docs]
class LammpsTextMDOutputFile(TrajectoryFileParser):
"""Parse LAMMPS text output.
We parse the default ``log.lammps`` file (`filename`) with ``thermo``
output and, if present, a custom dump file ``lmp.out.dump`` created by
something like ``dump 2 all custom 1 lmp.out.dump ...`` Tested with MD and
structure optimization (``minimize``).
Currently hardcoded file names:
| `dumpfilename` = ``basedir/lmp.out.dump``
| `symbolsfilename` = ``basedir/lmp.struct.symbols``
where `basedir` is the dir where `filename` (i.e. ``log.lammps``) lives.
default_units are for lammps metal units.
Examples
--------
Example lammps input::
clear
units metal
boundary p p p
atom_style atomic
read_data lmp.struct
### interactions
pair_style tersoff
pair_coeff * * AlN.tersoff Al N
### IO
dump dump_txt all custom 1 lmp.out.dump id type xsu ysu zsu xu yu zu fx fy fz vx vy vz
dump dump_dcd all dcd 1 lmp.out.dcd
##dump dump_xyz all xyz 1 lmp.out.xyz
##dump_modify dump_xyz element Al N
dump_modify dump_txt sort id
dump_modify dump_dcd sort id unwrap yes
thermo_style custom step temp vol cella cellb cellc cellalpha cellbeta cellgamma &
ke pe etotal &
press pxx pyy pzz pxy pxz pyz cpu
thermo_modify flush yes
thermo 1
### init
velocity all create 300.0 123 rot yes dist gaussian
### run
fix fix_npt all npt temp 1000 1000 0.01 tri 0 0 0.3 tchain 4 pchain 4 &
mtk yes scaleyz no scalexz no scalexy no flip no
timestep 2.5e-3
run 1000
Notes
-----
* columns in `filename` and `dumpfilename`: We automagically extract
the "header" (e.g. "Step Temp Volume Cella ..." in `filename` or
"ITEM: ATOMS id type xsu ysu zsu fx fy fz vx vy vz" in
`dumpfilename`) and map data to these symbols. See `_thermo_dct` and
`_dump_dct`. Currently, "xsu ysu zsu" is parsed to get
coords_frac. "xu yu zu" is parsed to get coords. Wrapped coordinates
(e.g. "xs ys zs" and "x y z") are ignored.
* multiple runs from one input script (i.e. 2 or more ``run``
commands): it seems that the last step of the preceeding run is
printed again by the new run by ``thermo_style``, which results in
more `nstep` in `filename` as there really are in `dumpfilename`. We
parse all important stuff (coords, cell, velocity) from
`dumpfilename`, but `temperature` and `stress` etc. is from
`filename`. Watch out when plotting.
* cell: We parse cell from "ITEM: BOX BOUNDS" in `dumpfilename` and the
cell is always [[x,0,0],[xy,y,0],[xz,yz,z]], i.e. it is aligned in the
same way in each timestep.
* atom symbols: First we try to read `symbolsfilename`, which may have been
written by :func:`pwtools.io.write_lammps`. If that is not found, we try
to use the ``type`` column in `dumpfilename` together with a type number
-> atom symbol mapping either from the `order` input keyword or a
``dump_modify ... element`` line in `filename` if found.
"""
[docs]
def __init__(self, filename='log.lammps', order=None, **kwds):
"""
Parameters
----------
filename : str
Text output file, where ``thermo_style [custom]``
output is written to. The lammps default is ``log.lammps``.
order : dict, optional
See :meth:`pwtools.crys.Structure.get_order`. Mapping of atom
symbols to ``type`` in lammps, i.e. {'Al': 1, 'N': 2}. If None then
we try to use ``dump_modify ... element`` if present in `filename`,
where lammps echos the input script. `symbols` is build from the
``type`` column in `dumpfilename`.
"""
self.default_units['stress'] = 1e-4 # bar -> GPa
self.default_units['velocity'] = fs/ps # Ang/ps -> Ang/fs
self.default_units['time'] = ps/fs # ps -> fs
TrajectoryFileParser.__init__(self, filename, **kwds)
self.attr_lst = [\
'cell',
'coords_frac',
'coords',
'cryst_const',
'ekin',
'etot',
'forces',
'natoms',
'stress',
'symbols',
'temperature',
'timestep',
'velocity',
'volume',
]
self.init_attr_lst()
self.order = order
# Text output file from ``dump <ID> all custom 1 ...``.
self.dumpfilename = pj(self.basedir, 'lmp.out.dump')
# written by io.write_lammps()
self.symbolsfilename = pj(self.basedir, 'lmp.struct.symbols')
@staticmethod
def _get_from_dct(dct, key):
if key in dct:
return dct[key]
else:
return None
@staticmethod
def _assert_shape_mod(name, a, b):
mod = a % b
msg = ("{name}: shape mismatch: a: {a}, b: {b}, "
"{a} % {b} = {mod}, "
"should be 0".format(a=a,b=b,mod=mod,name=name))
assert mod == 0, msg
def _get_thermo_dct(self):
"""Parse all text between "Step ..." and "Loop ..." in self.filename::
Step Temp Volume Cella Cellb Cellc
0 0 37.412297 3 3 4.8
1 0 37.42182 3.0002318 3.0002318 4.80048
2 0 37.431339 3.0004633 3.0004633 4.80096
....
1000 0 37.431339 3.0004633 3.0004633 4.80096
Loop time of 0.115472 on 1 procs (1 MPI x 1 OpenMP) for 1043 steps with 4 atoms
This is usually generated by::
thermo_style custom step temp vol cella cellb cellc
thermo 1
in the input file. I think if no ``thermo_style`` command is used, it
still prints a line starting with "Step ...".
"""
if os.path.exists(self.filename):
header = com.backtick("grep -m1 Step %s" %self.filename).split()
# Strip all text except for the data columns. Also works for
# multiple ``run`` or ``minimize`` commands in one input file,
# which cause wildly mixed text.
cmd = r"sed -nre '/^Step/,/^Loop/p' %s | \
egrep -v 'Step|Loop'" %self.filename
arr = arr2d_from_txt(com.backtick(cmd))
return dict((x, arr[:,ii]) for ii,x in enumerate(header))
else:
return None
def _get_dump_dct(self):
if self.check_set_attr('natoms') and \
os.path.exists(self.dumpfilename):
cmd = r"grep -c 'ITEM: TIMESTEP' %s" %self.dumpfilename
nstep = nstep_from_txt(com.backtick(cmd))
header = com.backtick("grep -m1 'ITEM: ATOMS' %s \
| sed -re 's/.*TOMS //'" %self.dumpfilename).split()
cmd = r"grep -A%i 'ITEM: ATOMS' %s | \
egrep -ve '--|ITEM'" %(self.natoms, self.dumpfilename)
arr = np.fromstring(com.backtick(cmd),
sep=' ').reshape(nstep*self.natoms,len(header))
self._assert_shape_mod('dump', arr.shape[0], self.natoms)
return dict((x, arr[:,ii]) for ii,x in enumerate(header))
else:
return None
def _lmp_dump2arr3d(self, dct, keys):
if self.check_set_attr('natoms'):
for k in keys:
if k not in dct:
return None
nstep = dct[keys[0]].shape[0] // self.natoms
arr = np.array([dct[keys[0]],
dct[keys[1]],
dct[keys[2]]]).T.reshape((nstep,
self.natoms,3))
return arr
else:
return None
[docs]
def get_natoms(self):
if os.path.exists(self.dumpfilename):
cmd = r"grep -A1 -m1 'ITEM: NUMBER OF ATOMS' %s | tail -n1" %self.dumpfilename
return nstep_from_txt(com.backtick(cmd))
else:
return None
[docs]
def get_stress(self):
if self.check_set_attr('_thermo_dct'):
keys = 'Pxx Pyy Pzz Pxy Pxz Pyz'.split()
for k in keys:
if k not in self._thermo_dct:
return None
voigt = np.array([self._thermo_dct['P'+k] for k in ['xx', 'yy',
'zz', 'yz', 'xz', 'xy']]).T
return crys.voigt2tensor3d(voigt)
else:
return None
[docs]
def get_etot(self):
"""Potetntial energy PotEng [eV]. etot+ekin here is
TotEng=PotEng+KinEng in lammps. In DFT, the potential energy is usually
called "total energy".
"""
if self.check_set_attr('_thermo_dct'):
return self._get_from_dct(self._thermo_dct, 'PotEng')
else:
return None
[docs]
def get_ekin(self):
if self.check_set_attr('_thermo_dct'):
return self._get_from_dct(self._thermo_dct, 'KinEng')
else:
return None
[docs]
def get_temperature(self):
if self.check_set_attr('_thermo_dct'):
return self._get_from_dct(self._thermo_dct, 'Temp')
else:
return None
[docs]
def get_volume(self):
if self.check_set_attr('_thermo_dct'):
return self._get_from_dct(self._thermo_dct, 'Volume')
else:
return None
[docs]
def get_cryst_const(self):
if self.check_set_attr('_thermo_dct'):
keys = 'Cella Cellb Cellc CellAlpha CellBeta CellGamma'.split()
for k in keys:
if k not in self._thermo_dct:
return None
nstep = len(self._thermo_dct['Cella'])
ret = np.empty((nstep,6))
for ii,k in enumerate(keys):
ret[:,ii] = self._thermo_dct[k]
return ret
else:
return None
[docs]
def get_coords_frac(self):
if self.check_set_attr('_dump_dct'):
keys = 'xsu ysu zsu'.split()
return self._lmp_dump2arr3d(self._dump_dct, keys)
else:
return None
[docs]
def get_coords(self):
if self.check_set_attr('_dump_dct'):
keys = 'xu yu zu'.split()
return self._lmp_dump2arr3d(self._dump_dct, keys)
else:
return None
[docs]
def get_forces(self):
if self.check_set_attr('_dump_dct'):
keys = 'fx fy fz'.split()
return self._lmp_dump2arr3d(self._dump_dct, keys)
else:
return None
[docs]
def get_velocity(self):
if self.check_set_attr('_dump_dct'):
keys = 'vx vy vz'.split()
return self._lmp_dump2arr3d(self._dump_dct, keys)
else:
return None
[docs]
def get_timestep(self):
if os.path.exists(self.filename):
cmd = r"grep -m1 timestep %s | \
sed -re 's/.*step (.*)/\1/'" %self.filename
return float_from_txt(com.backtick(cmd))
else:
return None
[docs]
def get_symbols(self):
if os.path.exists(self.symbolsfilename):
return com.file_read(self.symbolsfilename).split()
elif self.check_set_attr_lst(['_dump_dct','natoms']):
if 'type' in self._dump_dct:
if self.order is None:
cmd = r"grep -m1 'dump_modify.*element' %s | sed -re \
's/.*ment (.*)/\1/'" %self.filename
revorder = dict((ii+1,sy) for ii,sy in \
enumerate(com.backtick(cmd).split()))
else:
# {'a':1,'b':2} -> {1:'a',2:'b'}
revorder = dict((v,k) for k,v in self.order.items())
return [revorder[int(ii)] for ii in self._dump_dct['type'][:self.natoms]]
else:
return None
else:
return None
[docs]
def get_cell(self):
if os.path.exists(self.dumpfilename):
cmd = r"grep -c 'ITEM: BOX BOUNDS' %s" %self.dumpfilename
nstep = nstep_from_txt(com.backtick(cmd))
cmd = r"grep -A3 'ITEM: BOX BOUNDS' %s | \
egrep -ve '--|ITEM'" %self.dumpfilename
arr = np.fromstring(com.backtick(cmd), sep=' ').reshape((nstep,3,3))
cell = np.zeros_like(arr)
for ii in range(nstep):
xlo_bound = arr[ii,0,0]
xhi_bound = arr[ii,0,1]
ylo_bound = arr[ii,1,0]
yhi_bound = arr[ii,1,1]
zlo = arr[ii,2,0]
zhi = arr[ii,2,1]
xy = arr[ii,0,2]
xz = arr[ii,1,2]
yz = arr[ii,2,2]
xlo = xlo_bound - min(0.0,xy,xz,xy+xz)
xhi = xhi_bound - max(0.0,xy,xz,xy+xz)
ylo = ylo_bound - min(0.0,yz)
yhi = yhi_bound - max(0.0,yz)
# [[x, 0, 0],
# [xy, y, 0],
# [xz, yz, z]]
cell[ii,0,0] = xhi-xlo
cell[ii,1,1] = yhi-ylo
cell[ii,2,2] = zhi-zlo
cell[ii,1,0] = xy
cell[ii,2,0] = xz
cell[ii,2,1] = yz
return cell
else:
return None
[docs]
class LammpsDcdMDOutputFile(DcdOutputFile, LammpsTextMDOutputFile):
"""Parse Lammps DCD binary output + ``log.lammps`` text output.
Hardcodes files:
| `dcdfilename` = ``basedir/lmp.out.dcd``
Notes
-----
* cell: The DCD file format stores only cryst_const (see ``unitcell`` in
dcd.f90). ``Trajectory.cell`` calculated from ``Trajectory.cryst_const``
is aligned in the same way as ``LammpsTextMDOutputFile.cell``. That's why
the cell and cryst_const obtained from
:func:`~pwtools.io.read_lammps_md_dcd()` and
:func:`~pwtools.io.read_lammps_md_txt()` must be identical up to
numerical noise (about 1e-6 for default lammps text printing precision).
"""
[docs]
def __init__(self, *args, **kwds):
super(LammpsDcdMDOutputFile, self).__init__(*args, **kwds)
self.attr_lst = [\
'cryst_const',
'coords',
'ekin',
'etot',
'natoms',
'stress',
'symbols',
'temperature',
'timestep',
'volume',
]
self.init_attr_lst()
self._dcd_convang = True
self.dcdfilename = pj(self.basedir, 'lmp.out.dcd')