Source code for pwtools.io

"""High level Structure and Trajectory I/O. HDF5 convenience IO functions."""

import warnings, os
try:
    import h5py
except ImportError:
    pass

import pickle
import numpy as np
from pwtools.common import frepr, cpickle_load
from pwtools.constants import Ha, eV
from pwtools import parse, atomic_data, lammps
from pwtools import crys
from pwtools import common
from pwtools import pwscf
##warnings.simplefilter('always')

try:
    import CifFile as pycifrw_CifFile
except ImportError:
    pass


[docs] def wien_sgroup_input(struct, lat_symbol='P'): """Generate input for WIEN2K's ``sgroup`` symmetry analysis tool. length: can be any Parameters ---------- struct : Structure instance lat_symbol : str, e.g. 'P' Notes ----- From sgroup's README .. code-block:: text / ------------------------------------------------------------ / in input file symbol "/" means a comment / and trailing characters are ignored by the program / empty lines are allowed P /type of lattice; choices are P,F,I,C,A / parameters of cell: / lengths of the basis vectors and / angles (degree unit is used) alpha=b^c beta=a^c gamma=a^b / |a| |b| |c| alpha beta gamma 1.0 1.1 1.2 90. 91. 92. /Number of atoms in the cell 4 /List of atoms 0.1 0.2 0.3 / <-- Atom positions in units of the vectors a b c Al / <-- name of this atom 0.1 0.2 0.4 /.... Al1 0.2 0.2 0.3 Fe 0.1 0.3 0.3 Fe / ------------------------------------------------------------------ """ empty = '\n\n' txt = "/ lattice type symbol\n%s" %lat_symbol txt += empty txt += "/ a b c alpha beta gamma\n" txt += " ".join(["%.16e"]*6) % tuple(struct.cryst_const) txt += empty txt += "/ number of atoms\n%i" %struct.natoms txt += empty txt += "/ atom list (crystal cooords)\n" fmt = ' '.join(['%.16e']*3) for sym, coord in zip(struct.symbols, struct.coords_frac): txt += fmt % tuple(coord) + '\n' + sym + '\n' return txt
[docs] def write_wien_sgroup(filename, struct, **kwds): """ Write `struct` to input file for WIEN2K's ``sgroup`` symmetry analysis tool. Parameters ---------- filename : str name of the output file struct : Structure **kwds : see wien_sgroup_input() """ txt = wien_sgroup_input(struct, **kwds) common.file_write(filename, txt)
[docs] def write_cif(filename, struct): """Q'n'D Cif writer. Uses PyCifRW. length: Angstrom Parameters ---------- filename : str name of output .cif file struct : Structure, length units Angstrom assumed """ ffmt = "%.16e" cf = pycifrw_CifFile.CifFile() block = pycifrw_CifFile.CifBlock() block['_cell_length_a'] = frepr(struct.cryst_const[0], ffmt=ffmt) block['_cell_length_b'] = frepr(struct.cryst_const[1], ffmt=ffmt) block['_cell_length_c'] = frepr(struct.cryst_const[2], ffmt=ffmt) block['_cell_angle_alpha'] = frepr(struct.cryst_const[3], ffmt=ffmt) block['_cell_angle_beta'] = frepr(struct.cryst_const[4], ffmt=ffmt) block['_cell_angle_gamma'] = frepr(struct.cryst_const[5], ffmt=ffmt) block['_symmetry_space_group_name_H-M'] = 'P 1' block['_symmetry_Int_Tables_number'] = 1 # assigning a list produces a "loop_" block['_symmetry_equiv_pos_as_xyz'] = ['x,y,z'] # atoms # # _atom_site_label: We just use symbols, which is then = # _atom_site_type_symbol, but we *could* use that to number atoms of each # specie, e.g. Si1, Si2, ..., Al1, Al2, ... data_names = ['_atom_site_label', '_atom_site_fract_x', '_atom_site_fract_y', '_atom_site_fract_z', '_atom_site_type_symbol'] _xyz2str = lambda arr: [ffmt %x for x in arr] data = [struct.symbols, _xyz2str(struct.coords_frac[:,0]), _xyz2str(struct.coords_frac[:,1]), _xyz2str(struct.coords_frac[:,2]), struct.symbols] # "loop_" with multiple columns block.AddCifItem([[data_names], [data]]) cf['pwtools'] = block # maxoutlength = 2048 is default for cif 1.1 standard (which is default in # pycifrw 3.x). Reset default wraplength=80 b/c ASE's cif reader cannot # handle wrapped lines. common.file_write(filename, cf.WriteOut(wraplength=2048))
[docs] def write_xyz(filename, obj, name='pwtools_dummy_mol_name'): """Write VMD-style [VMD] XYZ file. length: Angstrom Parameters ---------- filename : target file name obj : Trajectory or Structure name : str, optional Molecule name. References ---------- [VMD] http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html """ traj = crys.struct2traj(obj) xyz_str = "" for istep in range(traj.nstep): xyz_str += "%i\n%s\n%s" %(traj.natoms, name + '.%i' %(istep + 1), pwscf.atpos_str_fast(traj.symbols, traj.coords[istep,...]), ) common.file_write(filename, xyz_str)
[docs] def write_axsf(filename, obj): """Write animated XSF file for Structure (only 1 step) or Trajectory. Note that forces are converted eV / Ang -> Ha / Ang. length: Angstrom forces: Ha / Angstrom Parameters ---------- filename : target file name obj : Structure or Trajectory References ---------- [XSF] http://www.xcrysden.org/doc/XSF.html """ # Notes # ----- # XSF: The XSF spec [XSF] is a little fuzzy about what PRIMCOORD actually # is (fractional or cartesian Angstrom). Only the latter case results # in a correctly displayed structure in xcrsyden. So we use that. # # Speed: The only time-consuming step is calling atpos_str*() in the loop # b/c that transforms *every* single float to a string, which # effectively is a double loop over `ccf`. No way to get faster w/ pure # Python. # traj = crys.struct2traj(obj) # ccf = cartesian coords + forces (6 columns) if traj.is_set_attr('forces'): ccf = np.concatenate((traj.coords, traj.forces*eV/Ha), axis=-1) else: ccf = traj.coords axsf_str = "ANIMSTEPS %i\nCRYSTAL" %traj.nstep for istep in range(traj.nstep): axsf_str += "\nPRIMVEC %i\n%s" %(istep+1, common.str_arr(traj.cell[istep,...])) axsf_str += "\nPRIMCOORD %i\n%i 1\n%s" %(istep+1, traj.natoms, pwscf.atpos_str_fast(traj.symbols, ccf[istep,...])) common.file_write(filename, axsf_str)
[docs] def write_lammps(filename, struct, symbolsbasename='lmp.struct.symbols'): """Write Structure object to lammps format. That file can be read in a lammps input file by ``read_data``. Write file ``lmp.struct.symbols`` with atom symbols. Parameters ---------- filename : str name of file to write symbolsbasename : str, optional file for atom symbols struct : Structure References ---------- ase.calculators.lammpsrun (ASE 3.8). """ dr = os.path.dirname(filename) fn = os.path.join(dr, symbolsbasename) common.file_write(fn, '\n'.join(struct.symbols)) common.file_write(filename, lammps.struct_str(struct))
[docs] def write_h5(fn, dct, **kwds): """Write dictionary with arrays (or whatever HDF5 handles) to h5 file. Dict keys are supposed to be HDF group + dataset names like `/a/b/c/dset`. The leading slash can be skipped. Default file mode is 'a' (see below for details). Parameters ---------- fn : str filename (e.g. 'foo.h5', 'bar.hdf') dct : dict **kwds : keywords to ``h5py.File`` (e.g. ``mode='w'``) Notes ----- The default file opening mode is the (old?) ``h5py.File`` default value, which is ``mode='a'``, i.e. read+append mode. In this mode, existing keys cannot be reused (overwritten), only new ones can be appended. The file is created if nonexistent. To overwrite, use ``mode='w'``, but this is the same as deleting the file and writing a new one! If you want to overwrite some or all existing keys and add new ones, use smth like:: >>> old = read_h5('file.h5') >>> old.update({'/old/key': new_value, '/new/key': some_more_data}) >>> write_h5('file.h5', old, mode='w') """ mode = kwds.pop('mode') if 'mode' in kwds else 'a' fh = h5py.File(fn, mode=mode, **kwds) for key,val in dct.items(): fh[key] = val fh.close()
[docs] def read_h5(fn): """Read h5 file into dict. Dict keys are the group + dataset names, e.g. '/a/b/c/dset'. All keys start with a leading slash even if written without (see :func:`write_h5`). Parameters ---------- fn : str filename Examples -------- >>> read_h5('foo.h5').keys() ['/a/b/d1', '/a/b/d2', '/a/c/d3', '/x/y/z'] """ fh = h5py.File(fn, mode='r') dct = {} def get(name, obj): if isinstance(obj, h5py.Dataset): _name = name if name.startswith('/') else '/'+name val = obj[()] dct[_name] = obj.asstr()[()] if isinstance(val, bytes) else val fh.visititems(get) fh.close() return dct
[docs] def load_h5(*args, **kwds): """Alias for :func:`read_h5`. Deprecated.""" warnings.warn("load_h5() is deprcated, use read_h5() instead", DeprecationWarning) return read_h5(*args, **kwds)
[docs] def read_pickle(filename): """Load object written by ``pickle.dump()``, e.g. files written by :meth:`~pwtools.base.FlexibleGetters.dump()`.""" return pickle.load(open(filename, 'rb'))
[docs] class ReadFactory: """Factory class to construct callables to parse files."""
[docs] def __init__(self, parser=None, struct_or_traj=None, doc=''): """ Parameters ---------- parser : one of parse.*File parsing classes struct_or_traj : str {'struct','traj'} Whether the callables should return parser.get_{struct,traj}()'s return value. doc : str docstring header """ self.parser = parser self.struct_or_traj = struct_or_traj # Overwrite class docstring. That shows up in sphinx autodoc of the # class instances (all read_* "functions"). The more natural thing is # to tell sphinx to use the __call__.__doc__ since the instance is used # as a callable. Now, they are treated as normal class instance, which # is perfectly right from sphinx' point of view. We would need to add # `doc` to __call__.__doc__, but how? Fancy decorator magic! self.__doc__ = doc + """ Parameters ---------- filename : str Name of the file to parse. **kwds : keywords args passed to the parser class (e.g. units=...) Returns ------- ret : :class:`~pwtools.crys.Structure` (SCF runs) or \ :class:`~pwtools.crys.Trajectory` (MD-like runs) """
[docs] def __call__(self, filename, **kwds): """ Parameters ---------- filename : str Name of the file to parse. **kwds : keywords args passed to the parser class (e.g. units=...) """ if self.struct_or_traj == 'struct': return self.parser(filename, **kwds).get_struct() elif self.struct_or_traj == 'traj': return self.parser(filename, **kwds).get_traj() else: raise Exception("unknown struct_or_traj: %s" %struct_or_traj)
read_cif = ReadFactory(parser=parse.CifFile, struct_or_traj='struct', doc="Read Cif files." ) read_pdb = ReadFactory(parser=parse.PDBFile, struct_or_traj='struct', doc="Read PDB files." ) read_pw_scf = ReadFactory(parser=parse.PwSCFOutputFile, struct_or_traj='struct', doc="Read Pwscf SCF run ouput." ) read_pw_md = ReadFactory(parser=parse.PwMDOutputFile, struct_or_traj='traj', doc="Read Pwscf md/relax/vc-relax run ouput." ) read_pw_vcmd = ReadFactory(parser=parse.PwVCMDOutputFile, struct_or_traj='traj', doc="Read Pwscf vc-md run ouput." ) read_cpmd_scf = ReadFactory(parser=parse.CpmdSCFOutputFile, struct_or_traj='struct', doc="Read CPMD SCF (single point) run ouput." ) read_cpmd_md = ReadFactory(parser=parse.CpmdMDOutputFile, struct_or_traj='traj', doc="Read CPMD MD (fixed and variable cell, BO and CP) run ouput." ) read_cp2k_scf = ReadFactory(parser=parse.Cp2kSCFOutputFile, struct_or_traj='struct', doc="Read CP2K SCF (single point) run ouput." ) read_cp2k_md = ReadFactory(parser=parse.Cp2kMDOutputFile, struct_or_traj='traj', doc="Read CP2K MD run ouput (all text)." ) read_cp2k_md_dcd = ReadFactory(parser=parse.Cp2kDcdMDOutputFile, struct_or_traj='traj', doc="Read CP2K MD run ouput (coordinates in dcd binary format)." ) read_cp2k_relax = ReadFactory(parser=parse.Cp2kRelaxOutputFile, struct_or_traj='traj', doc="Read CP2K relaxation run ouput (all text)." ) read_lammps_md_txt = ReadFactory(parser=parse.LammpsTextMDOutputFile, struct_or_traj='traj', doc="Read LAMMPS MD run ouput (all text)." ) read_lammps_md_dcd = ReadFactory(parser=parse.LammpsDcdMDOutputFile, struct_or_traj='traj', doc="Read LAMMPS MD run ouput (coordinates in dcd format)." )