"""
Basic methods for dealing with Gamess & Molden file IO for ePolyScat.
10/06/21 Added some basic error checks during testing. SHOULD SET AS A DECORATOR.
03/02/21 v2 Revisiting and finishing off...
- Fixed formatting options.
- Added wrappers for cclib moldenwriter.MOLDEN as new class.
- Use EShandler class for general IO.
- Tested with N2O demo file + ePS test job OK for Molden2006 format.
Files from writeMoldenFile2006() are working with ePS (tested for N2O test file).
Files from reformatMoldenFile() are NOT working due to line-endings issues.
See `notes/epsman_EShandler_class_demo_050221.ipynb` for demo & testing.
26/08/20 v1 Quick hack from existing functions - needs some more sophistication for file handling. Should have utils for this...
Dev work currently in [Bemo] http://localhost:8888/notebooks/ePS/N2O/N2O_electronic_structure_proc_tests_250820.ipynb
"""
import fileinput
import sys
from pathlib import Path
import cclib
from cclib.io import moldenwriter # Molden class + functions
# import epsman as em # For base class
[docs]class EShandler():
"""
Basic class for handling Gamess & Molden file IO.
Uses `CCLIB <https://cclib.github.io/>`_ to read Gamess log files & convert to Molden format.
For ePS compatibilty, this is slightly modified to match the "Molden2006" specifications defined therein (see source in `MoldenCnv2006.f90`).
Parameters
----------
fileName : str or Path obj, optional, default = None
Gamess or Molden file.
fileBase : str or Path obj, optional, default = None
Path to file location, defaults to current working dir.
outFile : str or Path obj, optional, default = None
Name for output Molden file, defaults to fileName.molden if not set.
If no args are passed, fileName = None will be set, and filePath = working dir.
Examples
--------
>>> fileBase = Path(modPath, 'epsman', 'elecStructure','fileTest') # Set for test file, where modPath = path to epsman root
>>> fileName = r'N2O_aug-cc-pVDZ_geomOpt.log'
>>> esData = EShandler(fileName, fileBase) # Create class instance
>>> esData.readGamessLog() # Read Gamess file
>>> esData.writeMoldenFile2006() # Write Molden2006 file
>>> esData.writeMoldenFile() # Write Molden file as per CCLIB defaults.
>>> esData = EShandler(fileName = 'test.molden') # Pass a Molden file to set & use the reformatter
>>> esData.reformatMoldenFile()
Notes
-----
Thanks to `the CCLIB authors <https://cclib.github.io/>`_ for making this possible!
To do
-----
- Implement directory scan (or wrapper class/decorator for this).
- Better file handling, should implement Pathlib tests for file(s).
- Fix reformatMoldenFile() method, this currently outputs OS specific line endings.
19/02/21: For full eps job class inheritance, use ESjob class instead.
"""
def __init__(self, fileName = None, fileBase = None, outFile = None):
self.setFiles(fileName=fileName, fileBase=fileBase, outFile=outFile)
# If a Gamess file is passed, read it.
if (self.fileName is not None) and (self.fileName.suffix != '.molden'):
self.readGamessLog()
else:
# Currently not reading data for Molden case, just set to None
self.data = None
[docs] def setFiles(self, fileName = None, fileBase = None, outFile = None):
"""
Set fileName, fileBase and outFile
Parameters
----------
fileName : str or Path obj, optional, default = None
Gamess or Molden file.
fileBase : str or Path obj, optional, default = None
Path to file location, defaults to current working dir.
outFile : str or Path obj, optional, default = None
Name for output Molden file, defaults to fileName.molden if not set.
If no args are passed, fileName = None will be set, and filePath = working dir.
"""
# Set fileBase and fileName - for now no error checking here
if fileBase is None:
self.fileBase = Path.cwd()
else:
self.fileBase = Path(fileBase)
if fileName is None:
if fileBase is None:
# Case for empty class
self.fileName = None
else:
print('*** No electronic structure file set. TODO - implement dir scan here')
self.fileName = None
else:
self.fileName = Path(fileName)
if self.fileName is not None:
self.fullPath = (self.fileBase/self.fileName)
else:
self.fullPath = self.fileBase # Just propagate fileBase in this case.
# If a Molden file is passed, just set moldenFile for use later.
# TODO: integrate this with setMoldenFile() below.
if self.fileName is not None:
# if self.fileName.suffix != '.molden':
# self.readGamessLog()
if outFile is None:
self.moldenFile = self.fullPath.with_suffix('.' + 'molden')
else:
self.moldenFile = self.fullPath.with_name(outFile)
else:
self.moldenFile = self.fullPath
print(f'\nSet input file as {self.fullPath}, use self.setFiles to change.')
print(f'Set output file as {self.moldenFile}, use self.setMoldenFile to override.')
[docs] def setMoldenFile(self, fileName, fileBase = None):
"""
Set self.moldenFile with new fileName and existing path, or new path.
Parameters
----------
fileName : str or Path obj, optional, default = None
Molden filename.
fileBase : str or Path obj, optional, default = None
Path to file location, defaults to currently set path.
"""
# Assume current path is correct, and set fileName
if fileBase is None:
if self.fullPath.is_dir():
self.moldenFile = self.fullPath/Path(fileName)
else:
self.moldenFile = self.fullPath.with_name(fileName)
else:
self.moldenFile = Path(fileBase, fileName)
# self.fileName = fileName
print(f'Set output file as {self.moldenFile}, run self.setMoldenFile to override.')
[docs] def readGamessLog(self):
"""
Read Gamess log file using CCLIB.io.ccread(self.fullPath)
"""
self.data = cclib.io.ccread(self.fullPath.as_posix())
print(f"\n*** Read file {self.fullPath}")
try:
print("Read %i atoms and %i MOs" % (self.data.natom, self.data.nmo))
# Generic error case, usually due to None returned from cclib.
except AttributeError:
print(f"*** Error: File {self.fullPath} not found or empty.")
[docs] def writeMoldenFile(self):
"""
Write data to Molden format file using CCLIB.io.ccwrite(self.data)
"""
try:
# Convert to Molden format
cclib.io.ccwrite(self.data, terse=True, outputtype='molden', outputdest=self.moldenFile.as_posix()) # From data
print(f"Written Molden format file {self.moldenFile}")
# self.reformatMoldenFile()
# Generic error case, usually due to None returned from cclib.
except AttributeError:
print(f"*** Error: Missing data, run self.readGamessLog().")
[docs] def writeMoldenFile2006(self):
"""
Write data to Molden format file using reformatted CCLIB code, for ePS compatible 'Molden2006' formatting.
See :py:class:`moldenCCLIBReformatted` for details.
"""
try:
# Convert to Molden format
f = 'molden' # Set output format
# self.moldenFile = self.fullPath.with_suffix('.' + f)
# cclib.io.ccwrite(self.data, terse=True, outputtype=f, outputdest=self.moldenFile.as_posix()) # From data
# Set object
self.moldenData = moldenCCLIBReformatted(self.data)
# Write to file using modified functions
# Note newline='\n' to force Unix style output (default will otherwise use os.linesep, see https://docs.python.org/3/library/functions.html#open).
with open(self.moldenFile, 'w', newline='\n') as f:
# f.write(self.moldenData.generate_repr()) # Write full file - no further reformatting
# With additional per-line checks
moldenRepr = self.moldenData.generate_repr().split('\n')
for line in moldenRepr:
if line.startswith(' Sym='):
pass # Skip ' Sym=XX' orbital defn. lines, not in standard Molden 2006 output.
else:
# f.write(line, end='')
f.write(f'{line}\n')
print(f"Written Molden2006 format file {self.moldenFile}")
# self.reformatMoldenFile()
# Generic error case, usually due to None returned from cclib.
except AttributeError:
print(f"*** Error: Missing data, run self.readGamessLog().")
# Try redefining existing methods - class version with inheritance
# FILEPARSE from epsproc
# Should just import... but included directly here for now!
# File parsing function - scan file for keywords & read segments
# Following above idiomatic solution, with full IO
# https://stackoverflow.com/questions/3961265/get-line-number-of-certain-phrase-in-file-python
[docs]def fileParse(fileName, startPhrase = None, endPhrase = None, comment = None, verbose = False):
"""
Parse a file, return segment(s) from startPhrase:endPhase, excluding comments.
Parameters
----------
fileName : str
File to read (file in working dir, or full path)
startPhrase : str, optional
Phrase denoting start of section to read. Default = None
endPhase : str, optional
Phrase denoting end of section to read. Default = None
comment : str, optional
Phrase denoting comment lines, which are skipped. Default = None
Returns
-------
list
[lineStart, lineStop], ints for line #s found from start and end phrases.
list
segments, list of lines read from file.
All lists can contain multiple entries, if more than one segment matches the search criteria.
"""
lineStart = [] # Create empty list to hold line #s
lineStop = [] # Create empty list to hold line #s
segments = [[]] # Possible to create empty multi-dim array here without knowing # of segments? Otherwise might be easier to use np textscan functions
readFlag = False
n = 0
# Force list to ensure endPhase is used correctly for single phase case (otherwise will test chars)
if type(endPhrase) is str:
endPhrase = [endPhrase]
# Open file & scan each line.
with open(fileName,'r') as f:
for (i, line) in enumerate(f): # Note enumerate() here gives lines with numbers, e.g. fullFile=enumerate(f) will read in file with numbers
i = i + 1 # Offset for file line numbers (1 indexed)
# If line matches startPhrase, print line & append to list.
# Note use of lstrip to skip any leading whitespace.
# if startPhrase in line:
if line.lstrip().startswith(startPhrase):
if verbose:
print('Found "', startPhrase, '" at line: ', i)
lineStart.append(i)
readFlag = True
# Read lines into segment[] until endPhrase found
if readFlag:
# Check for end of segment (start of next Command sequence)
if endPhrase and ([line.startswith(endP) for endP in endPhrase].count(True) > 0): # This allows for multiple endPhases
# NOTE: this will iterate over all chars in a phrase if a single str is passed.
# Log stop line and list
lineStop.append(i)
readFlag = False
# Log segment and create next
segments.append([])
n += 1
continue # Continue will skip rest of loop
# Check for comments, skip line but keep reading
elif comment and line.startswith(comment):
continue # Continue will skip rest of loop
segments[n].append([n, i, line]) # Store line if part of defined segment
if verbose:
print('Found {0} segments.'.format(n+1))
return ([lineStart, lineStop], segments) # [:-1])