Source code for qmmd.qmcalc.genScripts

import os
from os.path import isdir
from typing import Optional, Union

from qmmd.qmcalc.constants import keywordDict
from qmmd.qmcalc.admin import groupFilesIntoDir


[docs] def writeGaussInpFile( name: str, inpDirPath: str, keywordLine: Optional[str] = None, mem: Union[int, str] = 4000, ncpus: Union[int, str] = 8, combination: str = " m062x/6-311+g(d,p)", scrf: str = " scrf=(cpcm,solvent=water)", freq: str = " freq", grid: str = " opt=calcfc int(grid=ultrafine)", charge: int = 0, spin: int = 1, verbose: bool = False, ) -> None: """ Generate a Gaussian input file based on specified inputs. Parameters ---------- name : str Name of the jobscript, also used for input and output file of the software to run. inpDirPath : str Directory path to store the jobscript. keywordLine : str, optional The line of keywords specification for Gaussian job. mem : Union[int,str] Amount of memory to request for the Gaussian job. ncpus : Union[int,str] Number of CPUs to request for the job. combination : str Keyword for DFT method and basis set specification in Gaussian. scrf : str Keyword for solvent specification in Gaussian. freq : str Keyword for force constant and resulting vibrational frequencies computation in Gaussian. grid : str Keyword for grid specification in Gaussian. charge : int Charge of the molecule (pay special attention if you have a transition state). spin : int Spin of the molecule. verbose : bool Whether to display details of the process. """ with open(f"{inpDirPath}/{name}.inp", "w") as inpFile: inpFile.write(f"%mem={mem}mb\n%nprocshared={ncpus}\n%chk={name}.chk\n") if keywordLine: inpFile.write(keywordLine) else: inpFile.write(f"#{combination}{scrf}{grid}{freq}") inpFile.write(f"\n\n{name}\n\n{charge} {spin}\n") with open(f"{inpDirPath}/{name}.xyz", "r") as xyzFile: lineList = xyzFile.readlines() # Couldn't recount why this was implemented, to be looked into hasEnergyName = ( "Energy" in lineList[1] or "TI" in lineList[1] or "Name" in lineList[1] ) hasPath = ":" in lineList[1] hasBlank = "\n" in lineList[1] for i, line in enumerate(lineList): if hasEnergyName and (i > 1): inpFile.write(line) elif hasPath and (i > 1): inpFile.write(f"\n{line}") if i == 2 else inpFile.write(line) elif hasBlank and (i > 1): inpFile.write(line) elif not (hasEnergyName) and not (hasPath) and not (hasBlank) and (i > 0): inpFile.write(line) inpFile.write("\n") if verbose: print(f" Generated Gaussian input file for {name}!")
[docs] def writeHPCJobScript( name: str, inpDirPath: str = ".", scheduler: str = "pbs", cluster: str = "gadi", ncpus: Union[int, str] = 8, walltime: str = "24:00:00", vmem: Union[int, str] = 8000, jobfs: Union[int, str] = 9000, project: str = "p39", software: str = "g16", version: str = "c01", verbose: bool = False, ) -> None: """ Generate a HPC jobscript based on specified inputs. Parameters ---------- name : str Name of the jobscript, also used for input and output file of the software to run. inpDirPath : str Directory path to store the jobscript. scheduler : str Scheduler to submit the job to. cluster : {'gadi', 'uq-rcc'} Cluster to run the job on. ncpus : Union[int,str] Number of CPUs to request for the job. walltime : str Wall time to request for the job. vmem : Union[int,str] Amount of memory to request for the HPC job. jobfs : Union[int,str] Amount of Jobfs memory to request for the job. project : str Project ID on NCI Gadi, only used if 'cluster' = 'gadi'. software : str Gaussian software name to use for the job. version : str Version of the Gaussian software. verbose : bool Whether to display details of the process. Notes ----- - If your HPC system does not use PBS jobscript modifications will be needed for the function (open an issue on GitHub!). - Feel free to change the default values according to your most commonly used settings. """ with open(f"{inpDirPath}/{name}.sh", "w") as f: if scheduler == "pbs": if cluster == "gadi": f.write("#!/bin/bash\n#PBS -l wd\n#PBS -q normal\n") f.write( f"#PBS -l walltime={walltime},mem={vmem}mb,ncpus={ncpus},software={software},jobfs={jobfs}mb,storage=scratch/{project}" ) f.write(f"\n\nmodule load gaussian/{software}{version}") f.write(f"\n{software} < {name}.inp > {name}.out 2>&1") elif cluster == "uq-rcc": f.write( f"#!/bin/bash\n#PBS -S /bin/bash\n#PBS -l walltime={walltime}\n#PBS -A UQ-SCI-SCMB\n" ) f.write(f"#PBS -l select=1:ncpus={ncpus}:mem={vmem}MB") f.write("\n\ncd $PBS_O_WORKDIR") f.write(f"\n\nmodule load gaussian/{software}-{version.upper()}-bash") f.write(f"\n{software} < {name}.inp > {name}.out") else: raise Exception(f"Cluster {cluster} not recognised/accommodated for yet!") else: raise Exception(f"Scheduler {scheduler} not recognised/accommodated for yet!") if verbose: print(f" Generated HPC job script for {name}!")
[docs] def genAllScripts( inpDirPath: str, keywordLine: Optional[str] = None, method: str = "m062x", basisSet: str = "6-311+g(d,p)", solvent: str = "water", solventModel: str = "cpcm", mem: Union[int, str] = 4000, ncpus: Union[int, str] = 8, calcType: str = "GOVF", charge: int = 0, spin: int = 1, scheduler: str = "pbs", cluster: str = "gadi", walltime: str = "24:00:00", vmem: Union[int, str] = 8000, jobfs: Union[int, str] = 9000, project: str = "p39", software: str = "g16", version: str = "c01", verbose: bool = False, ) -> None: """ Generate Gaussian input job files and submission files for molecules under all directories under a specified directory ('inpDirPath'). Parameters ---------- inpDirPath : str Directory path to the input directories. keywordLine : str, optional The line of keywords specification for Gaussian job, the other input arguments will be used to compose the line if it is not provided. method : str Keyword for DFT method specification in Gaussian. basisSet : str Keyword for basis set specification in Gaussian. solvent : str Keyword for solvent specification in Gaussian. solventModel : str Keyword for SCRF method specification in Gaussian. mem : Union[int,str] Amount of memory to request for the Gaussian job. ncpus : Union[int,str] Number of CPUs to request for the job. calcType : str Type of calculation (e.g. 'GOVF' for normal geometry optimisation; 'TSGOVF' for transition state geometry optimisation, 'SPEiS' for single point energy calculation, refer to 'keywordDict' for other options). charge : int Charge of the molecule (pay special attention if you have a transition state). spin : int Spin of the molecule. scheduler : str Scheduler to submit the job to. cluster : {'gadi', 'uq-rcc'} Cluster to run the job on. walltime : str Wall time to request for the job. vmem : Union[int,str] Amount of memory to request for the HPC job. jobfs : Union[int,str] Amount of Jobfs memory to request for the job. software : str Gaussian software name to use for the job. version : str Version of the software. verbose : bool Whether to display details of the process. Notes ----- - Users should organise their directories such that a directory is created for each molecule to be calculated, and all of these directories should be placed under the specified directory that this function takes in ('inpDirPath') """ if verbose: print( f"\nGenerating all job scripts for molecules under directories under {inpDirPath}..." ) assert calcType in keywordDict.keys(), "Calculation type not known!" molecules = [g for g in os.listdir(inpDirPath) if isdir(f"{inpDirPath}/{g}")] if len(molecules) == 0: groupFilesIntoDir(inpDirPath, verbose=verbose) molecules = [g for g in os.listdir(inpDirPath) if isdir(f"{inpDirPath}/{g}")] for name in molecules: if verbose: print(f" Processing {name}...") moleculeDir = f"{inpDirPath}/{name}" combination = f" {method}/{basisSet}" scrf = f" scrf=({solventModel},solvent={solvent})" freq = keywordDict[calcType]["freq"] grid = keywordDict[calcType]["grid"] writeGaussInpFile( name, moleculeDir, keywordLine, mem, ncpus, combination, freq, scrf, grid, charge, spin, verbose, ) writeHPCJobScript( name, moleculeDir, scheduler, cluster, ncpus, walltime, vmem, jobfs, project, software, version, verbose, ) if verbose: print("DONE -- Generated all scripts!\n")
if __name__ == "__main__": # Test case inpDirPath = "/mnt/c/Users/ASUS/Documents/qmmd/src/qmmd/data/exampleXYZs" keywordLine = ( "# m062x/6-311+g(d,p) opt=calcfc freq scrf=(cpcm,solvent=water) int(grid=ultrafine)" ) method, basisSet = "m062x", "6-311+g(d,p)" solvent, solventModel = "water", "cpcm" mem, ncpus = 4000, 8 calcType = "GOVF" charge, spin = 0, 1 scheduler, cluster = "pbs", "gadi" walltime, vmem, jobfs, project = "24:00:00", 8000, 9000, "p39" software, version = "g16", "c01" genAllScripts( inpDirPath, keywordLine, method, basisSet, solvent, solventModel, mem, ncpus, calcType, charge, spin, scheduler, cluster, walltime, vmem, jobfs, project, software, version, verbose=True, )