diff --git a/Makefile b/Makefile index ff5b1e7091..4fd3dfbc35 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ help: @echo " install-kinbot Install KinBot" @echo " install-sella Install Sella" @echo " install-xtb Install xTB" + @echo " install-crest Install CREST" @echo " install-torchani Install TorchANI" @echo " install-ob Install OpenBabel" @echo "" @@ -100,6 +101,9 @@ install-sella: install-xtb: bash $(DEVTOOLS_DIR)/install_xtb.sh +install-crest: + bash $(DEVTOOLS_DIR)/install_crest.sh + install-torchani: bash $(DEVTOOLS_DIR)/install_torchani.sh diff --git a/arc/constants.pxd b/arc/constants.pxd index 4a50c72602..9fc3b9127d 100644 --- a/arc/constants.pxd +++ b/arc/constants.pxd @@ -1 +1 @@ -cdef double pi, Na, kB, R, h, hbar, c, e, m_e, m_p, m_n, amu, a0, E_h, F, E_h_kJmol, bohr_to_angstrom +cdef double pi, Na, kB, R, h, hbar, c, e, m_e, m_p, m_n, amu, a0, E_h, F, E_h_kJmol, bohr_to_angstrom, angstrom_to_bohr diff --git a/arc/constants.py b/arc/constants.py index fef8e8f167..82c30212ae 100644 --- a/arc/constants.py +++ b/arc/constants.py @@ -79,6 +79,7 @@ epsilon_0 = 8.8541878128 bohr_to_angstrom = 0.529177 +angstrom_to_bohr = 1.8897259886 # Cython does not automatically place module-level variables into the module # symbol table when in compiled mode, so we must do this manually so that we @@ -102,4 +103,5 @@ 'F': F, 'epsilon_0': epsilon_0, 'bohr_to_angstrom': bohr_to_angstrom, + 'angstrom_to_bohr': angstrom_to_bohr, }) diff --git a/arc/job/adapter.py b/arc/job/adapter.py index fbef435827..5aeaba802b 100644 --- a/arc/job/adapter.py +++ b/arc/job/adapter.py @@ -97,6 +97,7 @@ class JobEnum(str, Enum): # TS search methods autotst = 'autotst' # AutoTST, 10.1021/acs.jpca.7b07361, 10.26434/chemrxiv.13277870.v2 heuristics = 'heuristics' # ARC's heuristics + crest = 'crest' # CREST conformer/TS search kinbot = 'kinbot' # KinBot, 10.1016/j.cpc.2019.106947 gcn = 'gcn' # Graph neural network for isomerization, https://doi.org/10.1021/acs.jpclett.0c00500 user = 'user' # user guesses diff --git a/arc/job/adapter_test.py b/arc/job/adapter_test.py index 429bf31110..aa90ad84b4 100644 --- a/arc/job/adapter_test.py +++ b/arc/job/adapter_test.py @@ -208,6 +208,12 @@ def setUpClass(cls): server='server3', testing=True, ) + os.makedirs(cls.job_5.local_path, exist_ok=True) + fixture_path = os.path.join(ARC_PATH, 'arc', 'testing', 'trsh', 'wall_exceeded.txt') + with open(fixture_path, 'r') as f: + log_content = f.read() + with open(os.path.join(cls.job_5.local_path, 'out.txt'), 'w') as f: + f.write(log_content) cls.job_6 = GaussianAdapter(execution_type='queue', job_name='spc1', job_type='opt', diff --git a/arc/job/adapters/common.py b/arc/job/adapters/common.py index 8fb331522c..0256a300bf 100644 --- a/arc/job/adapters/common.py +++ b/arc/job/adapters/common.py @@ -41,7 +41,7 @@ 'Cyclic_Ether_Formation': ['kinbot'], 'Cyclopentadiene_scission': ['gcn', 'xtb_gsm'], 'Diels_alder_addition': ['kinbot'], - 'H_Abstraction': ['heuristics', 'autotst'], + 'H_Abstraction': ['heuristics', 'autotst', 'crest'], 'carbonyl_based_hydrolysis': ['heuristics'], 'ether_hydrolysis': ['heuristics'], 'nitrile_hydrolysis': ['heuristics'], @@ -77,7 +77,8 @@ adapters_that_do_not_require_a_level_arg = ['xtb', 'torchani'] # Default is "queue", "pipe" will be called whenever needed. So just list 'incore'. -default_incore_adapters = ['autotst', 'gcn', 'heuristics', 'kinbot', 'psi4', 'xtb', 'xtb_gsm', 'torchani', 'openbabel'] +default_incore_adapters = ['autotst', 'crest', 'gcn', 'heuristics', 'kinbot', 'psi4', 'xtb', 'xtb_gsm', 'torchani', + 'openbabel'] def _initialize_adapter(obj: 'JobAdapter', diff --git a/arc/job/adapters/ts/__init__.py b/arc/job/adapters/ts/__init__.py index 29444e0ed4..dab3af7f4d 100644 --- a/arc/job/adapters/ts/__init__.py +++ b/arc/job/adapters/ts/__init__.py @@ -1,4 +1,5 @@ import arc.job.adapters.ts.autotst_ts +import arc.job.adapters.ts.crest import arc.job.adapters.ts.gcn_ts import arc.job.adapters.ts.heuristics import arc.job.adapters.ts.kinbot_ts diff --git a/arc/job/adapters/ts/crest.py b/arc/job/adapters/ts/crest.py new file mode 100644 index 0000000000..2f280f1bd6 --- /dev/null +++ b/arc/job/adapters/ts/crest.py @@ -0,0 +1,638 @@ +""" +Utilities for running CREST within ARC. + +Separated from heuristics so CREST can be conditionally imported and reused. +""" + +import datetime +import os +import re +import time +from typing import TYPE_CHECKING, List, Optional, Union + +import numpy as np +import pandas as pd + +from arc.common import almost_equal_coords, get_logger +from arc.imports import settings, submit_scripts +from arc.job.adapter import JobAdapter +from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family +from arc.job.factory import register_job_adapter +from arc.job.local import check_job_status, submit_job +from arc.plotter import save_geo +from arc.species.converter import reorder_xyz_string, str_to_xyz, xyz_to_dmat, xyz_to_str +from arc.species.species import ARCSpecies, TSGuess +from arc.job.adapters.ts.heuristics import h_abstraction + +if TYPE_CHECKING: + from arc.level import Level + from arc.reaction import ARCReaction + +logger = get_logger() + +MAX_CHECK_INTERVAL_SECONDS = 100 + +try: + CREST_PATH = settings["CREST_PATH"] + CREST_ENV_PATH = settings["CREST_ENV_PATH"] + SERVERS = settings["servers"] +except KeyError: + CREST_PATH = None + CREST_ENV_PATH = None + SERVERS = {} + + +def crest_available() -> bool: + """ + Return whether CREST is configured for use. + """ + return bool(SERVERS.get("local")) and bool(CREST_PATH or CREST_ENV_PATH) + + +class CrestAdapter(JobAdapter): + """ + A class for executing CREST TS conformer searches based on heuristics-generated guesses. + """ + + def __init__(self, + project: str, + project_directory: str, + job_type: Union[List[str], str], + args: Optional[dict] = None, + bath_gas: Optional[str] = None, + checkfile: Optional[str] = None, + conformer: Optional[int] = None, + constraints: Optional[List] = None, + cpu_cores: Optional[str] = None, + dihedral_increment: Optional[float] = None, + dihedrals: Optional[List[float]] = None, + directed_scan_type: Optional[str] = None, + ess_settings: Optional[dict] = None, + ess_trsh_methods: Optional[List[str]] = None, + execution_type: Optional[str] = None, + fine: bool = False, + initial_time: Optional[Union['datetime.datetime', str]] = None, + irc_direction: Optional[str] = None, + job_id: Optional[int] = None, + job_memory_gb: float = 14.0, + job_name: Optional[str] = None, + job_num: Optional[int] = None, + job_server_name: Optional[str] = None, + job_status: Optional[List[Union[dict, str]]] = None, + level: Optional['Level'] = None, + max_job_time: Optional[float] = None, + run_multi_species: bool = False, + reactions: Optional[List['ARCReaction']] = None, + rotor_index: Optional[int] = None, + server: Optional[str] = None, + server_nodes: Optional[list] = None, + queue: Optional[str] = None, + attempted_queues: Optional[List[str]] = None, + species: Optional[List[ARCSpecies]] = None, + testing: bool = False, + times_rerun: int = 0, + torsions: Optional[List[List[int]]] = None, + tsg: Optional[int] = None, + xyz: Optional[dict] = None, + ): + + self.incore_capacity = 50 + self.job_adapter = 'crest' + self.command = None + self.execution_type = execution_type or 'incore' + + if reactions is None: + raise ValueError('Cannot execute TS CREST without ARCReaction object(s).') + + dihedral_increment = dihedral_increment or 30 + + _initialize_adapter(obj=self, + is_ts=True, + project=project, + project_directory=project_directory, + job_type=job_type, + args=args, + bath_gas=bath_gas, + checkfile=checkfile, + conformer=conformer, + constraints=constraints, + cpu_cores=cpu_cores, + dihedral_increment=dihedral_increment, + dihedrals=dihedrals, + directed_scan_type=directed_scan_type, + ess_settings=ess_settings, + ess_trsh_methods=ess_trsh_methods, + fine=fine, + initial_time=initial_time, + irc_direction=irc_direction, + job_id=job_id, + job_memory_gb=job_memory_gb, + job_name=job_name, + job_num=job_num, + job_server_name=job_server_name, + job_status=job_status, + level=level, + max_job_time=max_job_time, + run_multi_species=run_multi_species, + reactions=reactions, + rotor_index=rotor_index, + server=server, + server_nodes=server_nodes, + queue=queue, + attempted_queues=attempted_queues, + species=species, + testing=testing, + times_rerun=times_rerun, + torsions=torsions, + tsg=tsg, + xyz=xyz, + ) + + def write_input_file(self) -> None: + pass + + def set_files(self) -> None: + pass + + def set_additional_file_paths(self) -> None: + pass + + def set_input_file_memory(self) -> None: + pass + + def execute_incore(self): + self._log_job_execution() + self.initial_time = self.initial_time if self.initial_time else datetime.datetime.now() + + supported_families = [key for key, val in ts_adapters_by_rmg_family.items() if 'crest' in val] + + self.reactions = [self.reactions] if not isinstance(self.reactions, list) else self.reactions + for rxn in self.reactions: + if rxn.family not in supported_families: + logger.warning(f'The CREST TS search adapter does not support the {rxn.family} reaction family.') + continue + if any(spc.get_xyz() is None for spc in rxn.r_species + rxn.p_species): + logger.warning(f'The CREST TS search adapter cannot process a reaction if 3D coordinates of ' + f'some/all of its reactants/products are missing.\nNot processing {rxn}.') + continue + if not crest_available(): + logger.warning('CREST is not available. Skipping CREST TS search.') + continue + + if rxn.ts_species is None: + rxn.ts_species = ARCSpecies(label='TS', + is_ts=True, + charge=rxn.charge, + multiplicity=rxn.multiplicity, + ) + + tsg = TSGuess(method='CREST') + tsg.tic() + + crest_job_dirs = [] + xyz_guesses = h_abstraction(reaction=rxn, + dihedral_increment=self.dihedral_increment) + if not xyz_guesses: + logger.warning(f'CREST TS search failed to generate any seed guesses for {rxn.label}.') + tsg.tok() + continue + + for iteration, xyz_entry in enumerate(xyz_guesses): + if isinstance(xyz_entry, dict): + xyz_guess = xyz_entry.get("xyz") + else: + xyz_guess = xyz_entry + if xyz_guess is None: + continue + + df_dmat = convert_xyz_to_df(xyz_guess) + try: + h_abs_atoms_dict = get_h_abs_atoms(df_dmat) + if not h_abs_atoms_dict: + logger.warning( + f"Could not determine the H abstraction atoms for {rxn.label} crest seed {iteration}. " + "Skipping this CREST seed." + ) + continue + crest_job_dir = crest_ts_conformer_search( + xyz_guess, + h_abs_atoms_dict["A"], + h_abs_atoms_dict["H"], + h_abs_atoms_dict["B"], + path=self.local_path, + xyz_crest_int=iteration, + ) + crest_job_dirs.append(crest_job_dir) + except KeyError as e: + logger.error( + f"Could not determine the H abstraction atoms (missing key {e}) for " + f"{rxn.label} crest seed {iteration}." + ) + except ValueError as e: + logger.error( + f"Could not determine the H abstraction atoms for {rxn.label} crest seed {iteration}: {e}" + ) + + if not crest_job_dirs: + logger.warning(f'CREST TS search failed to prepare any jobs for {rxn.label}.') + tsg.tok() + continue + + crest_jobs = submit_crest_jobs(crest_job_dirs) + monitor_crest_jobs(crest_jobs) + xyz_guesses_crest = process_completed_jobs(crest_jobs) + tsg.tok() + + for method_index, xyz in enumerate(xyz_guesses_crest): + if xyz is None: + continue + unique = True + for other_tsg in rxn.ts_species.ts_guesses: + if almost_equal_coords(xyz, other_tsg.initial_xyz): + if hasattr(other_tsg, "method_sources"): + other_tsg.method_sources = other_tsg._normalize_method_sources( + (other_tsg.method_sources or []) + ["crest"] + ) + unique = False + break + if unique: + ts_guess = TSGuess(method='CREST', + index=len(rxn.ts_species.ts_guesses), + method_index=method_index, + t0=tsg.t0, + execution_time=tsg.execution_time, + success=True, + family=rxn.family, + xyz=xyz, + ) + rxn.ts_species.ts_guesses.append(ts_guess) + save_geo(xyz=xyz, + path=self.local_path, + filename=f'CREST_{method_index}', + format_='xyz', + comment=f'CREST {method_index}, family: {rxn.family}', + ) + + if len(self.reactions) < 5: + successes = [tsg for tsg in rxn.ts_species.ts_guesses if tsg.success and 'crest' in tsg.method.lower()] + if successes: + logger.info(f'CREST successfully found {len(successes)} TS guesses for {rxn.label}.') + else: + logger.info(f'CREST did not find any successful TS guesses for {rxn.label}.') + + self.final_time = datetime.datetime.now() + + def execute_queue(self): + self.execute_incore() + + +def crest_ts_conformer_search( + xyz_guess: dict, + a_atom: int, + h_atom: int, + b_atom: int, + path: str = "", + xyz_crest_int: int = 0, +) -> str: + """ + Prepare a CREST TS conformer search job: + - Write coords.ref and constraints.inp + - Write a PBS/HTCondor submit script using submit_scripts["local"]["crest"] + - Return the CREST job directory path + """ + path = os.path.join(path, f"crest_{xyz_crest_int}") + os.makedirs(path, exist_ok=True) + + # --- coords.ref --- + symbols = xyz_guess["symbols"] + converted_coords = reorder_xyz_string( + xyz_str=xyz_to_str(xyz_guess), + reverse_atoms=True, + convert_to="bohr", + ) + coords_ref_content = f"$coord\n{converted_coords}\n$end\n" + coords_ref_path = os.path.join(path, "coords.ref") + with open(coords_ref_path, "w") as f: + f.write(coords_ref_content) + + # --- constraints.inp --- + num_atoms = len(symbols) + # CREST uses 1-based indices + a_atom += 1 + h_atom += 1 + b_atom += 1 + + # All atoms not directly involved in A–H–B go into the metadynamics atom list + list_of_atoms_numbers_not_participating_in_reaction = [ + i for i in range(1, num_atoms + 1) if i not in [a_atom, h_atom, b_atom] + ] + + constraints_path = os.path.join(path, "constraints.inp") + with open(constraints_path, "w") as f: + f.write("$constrain\n") + f.write(f" atoms: {a_atom}, {h_atom}, {b_atom}\n") + f.write(" force constant: 0.5\n") + f.write(" reference=coords.ref\n") + f.write(f" distance: {a_atom}, {h_atom}, auto\n") + f.write(f" distance: {h_atom}, {b_atom}, auto\n") + f.write("$metadyn\n") + if list_of_atoms_numbers_not_participating_in_reaction: + f.write( + f' atoms: {", ".join(map(str, list_of_atoms_numbers_not_participating_in_reaction))}\n' + ) + f.write("$end\n") + + # --- build CREST command string --- + # Example: crest coords.ref --cinp constraints.inp --noreftopo -T 8 + local_server = SERVERS.get("local", {}) + cpus = int(local_server.get("cpus", 8)) + if CREST_ENV_PATH: + crest_exe = "crest" + else: + crest_exe = CREST_PATH if CREST_PATH is not None else "crest" + + commands = [ + crest_exe, + "coords.ref", + "--cinp constraints.inp", + "--noreftopo", + f"-T {cpus}", + ] + command = " ".join(commands) + + # --- activation line (optional) --- + activation_line = CREST_ENV_PATH or "" + + if SERVERS.get("local") is not None: + cluster_soft = SERVERS["local"]["cluster_soft"].lower() + + if cluster_soft in ["condor", "htcondor"]: + # HTCondor branch (kept for completeness – you can delete if you don't use it) + sub_job = submit_scripts["local"]["crest"] + format_params = { + "name": f"crest_{xyz_crest_int}", + "cpus": cpus, + "memory": int(SERVERS["local"].get("memory", 32.0) * 1024), + } + sub_job = sub_job.format(**format_params) + + with open( + os.path.join(path, settings["submit_filenames"]["HTCondor"]), "w" + ) as f: + f.write(sub_job) + + crest_job = submit_scripts["local"]["crest_job"] + crest_job = crest_job.format( + path=path, + activation_line=activation_line, + commands=command, + ) + + with open(os.path.join(path, "job.sh"), "w") as f: + f.write(crest_job) + os.chmod(os.path.join(path, "job.sh"), 0o700) + + # Pre-create out/err for any status checkers that expect them + for fname in ("out.txt", "err.txt"): + fpath = os.path.join(path, fname) + if not os.path.exists(fpath): + with open(fpath, "w") as f: + f.write("") + os.chmod(fpath, 0o600) + + elif cluster_soft == "pbs": + # PBS branch that matches your 'crest' template above + sub_job = submit_scripts["local"]["crest"] + format_params = { + "queue": SERVERS["local"].get("queue", "alon_q"), + "name": f"crest_{xyz_crest_int}", + "cpus": cpus, + # 'memory' is in GB for the template: mem={memory}gb + "memory": int( + SERVERS["local"].get("memory", 32) + if SERVERS["local"].get("memory", 32) < 60 + else 40 + ), + "activation_line": activation_line, + "commands": command, + } + sub_job = sub_job.format(**format_params) + + submit_filename = settings["submit_filenames"]["PBS"] # usually 'submit.sh' + submit_path = os.path.join(path, submit_filename) + with open(submit_path, "w") as f: + f.write(sub_job) + os.chmod(submit_path, 0o700) + + else: + raise ValueError(f"Unsupported cluster_soft for CREST: {cluster_soft!r}") + + return path + + +def submit_crest_jobs(crest_paths: List[str]) -> dict: + """ + Submit CREST jobs to the server. + + Args: + crest_paths (List[str]): List of paths to the CREST directories. + + Returns: + dict: A dictionary containing job IDs as keys and their statuses as values. + """ + crest_jobs = {} + for crest_path in crest_paths: + job_status, job_id = submit_job(path=crest_path) + logger.info(f"CREST job {job_id} submitted for {crest_path}") + crest_jobs[job_id] = {"path": crest_path, "status": job_status} + return crest_jobs + + +def monitor_crest_jobs(crest_jobs: dict, check_interval: int = 300) -> None: + """ + Monitor CREST jobs until they are complete. + + Args: + crest_jobs (dict): Dictionary containing job information (job ID, path, and status). + check_interval (int): Time interval (in seconds) to wait between status checks. + """ + while True: + all_done = True + for job_id, job_info in crest_jobs.items(): + if job_info["status"] not in ["done", "failed"]: + try: + job_info["status"] = check_job_status(job_id) # Update job status + except Exception as e: + logger.error(f"Error checking job status for job {job_id}: {e}") + job_info["status"] = "failed" + if job_info["status"] not in ["done", "failed"]: + all_done = False + if all_done: + break + time.sleep(min(check_interval, MAX_CHECK_INTERVAL_SECONDS)) + + +def process_completed_jobs(crest_jobs: dict) -> list: + """ + Process the completed CREST jobs and update XYZ guesses. + + Args: + crest_jobs (dict): Dictionary containing job information. + """ + xyz_guesses = [] + for job_id, job_info in crest_jobs.items(): + crest_path = job_info["path"] + if job_info["status"] == "done": + crest_best_path = os.path.join(crest_path, "crest_best.xyz") + if os.path.exists(crest_best_path): + with open(crest_best_path, "r") as f: + content = f.read() + xyz_guess = str_to_xyz(content) + xyz_guesses.append(xyz_guess) + else: + logger.error(f"crest_best.xyz not found in {crest_path}") + elif job_info["status"] == "failed": + logger.error(f"CREST job failed for {crest_path}") + + return xyz_guesses + + +def extract_digits(s: str) -> int: + """ + Extract the first integer from a string + + Args: + s (str): The string to extract the integer from + + Returns: + int: The first integer in the string + + """ + digit_str = re.sub(r"[^\d]", "", s) + if not digit_str: + raise ValueError(f"No digits found in string: {s!r}") + return int(digit_str) + + +def convert_xyz_to_df(xyz: dict) -> pd.DataFrame: + """ + Convert a dictionary of xyz coords to a pandas DataFrame with bond distances + + Args: + xyz (dict): The xyz coordinates of the molecule + + Return: + pd.DataFrame: The xyz coordinates as a pandas DataFrame + + """ + symbols = xyz["symbols"] + symbol_enum = [f"{symbol}{i}" for i, symbol in enumerate(symbols)] + ts_dmat = xyz_to_dmat(xyz) + + return pd.DataFrame(ts_dmat, columns=symbol_enum, index=symbol_enum) + + +def get_h_abs_atoms(dataframe: pd.DataFrame) -> Optional[dict]: + """ + Get the donating/accepting hydrogen atom, and the two heavy atoms that are bonded to it + + Args: + dataframe (pd.DataFrame): The dataframe of the bond distances, columns and index are the atom symbols + + Returns: + Optional[dict]: The hydrogen atom and the two heavy atoms (keys: 'H', 'A', 'B'), + or None if they cannot be determined. + """ + + closest_atoms = {} + for index, row in dataframe.iterrows(): + + row[index] = np.inf + closest = row.nsmallest(2).index.tolist() + closest_atoms[index] = closest + + hydrogen_keys = [key for key in dataframe.index if key.startswith("H")] + condition_occurrences = [] + + for hydrogen_key in hydrogen_keys: + atom_neighbours = closest_atoms[hydrogen_key] + is_heavy_present = any( + atom for atom in atom_neighbours if not atom.startswith("H") + ) + if_hydrogen_present = any( + atom + for atom in atom_neighbours + if atom.startswith("H") and atom != hydrogen_key + ) + + if is_heavy_present and if_hydrogen_present: + # Store the details of this occurrence + condition_occurrences.append( + {"H": hydrogen_key, "A": atom_neighbours[0], "B": atom_neighbours[1]} + ) + + # Check if the condition was met + if condition_occurrences: + if len(condition_occurrences) > 1: + # Store distances to decide which occurrence to use + occurrence_distances = [] + for occurrence in condition_occurrences: + # Calculate the sum of distances to the two heavy atoms + hydrogen_key = f"{occurrence['H']}" + heavy_atoms = [f"{occurrence['A']}", f"{occurrence['B']}"] + try: + distances = dataframe.loc[hydrogen_key, heavy_atoms].sum() + occurrence_distances.append((occurrence, distances)) + except KeyError as e: + logger.error(f"Error accessing distances for occurrence {occurrence}: {e}") + + if not occurrence_distances: + logger.warning("No valid occurrences with distance data found for CREST H-abstraction atoms.") + return None + + # Select the occurrence with the smallest distance + best_occurrence = min(occurrence_distances, key=lambda x: x[1])[0] + return { + "H": extract_digits(best_occurrence["H"]), + "A": extract_digits(best_occurrence["A"]), + "B": extract_digits(best_occurrence["B"]), + } + else: + single_occurrence = condition_occurrences[0] + return { + "H": extract_digits(single_occurrence["H"]), + "A": extract_digits(single_occurrence["A"]), + "B": extract_digits(single_occurrence["B"]), + } + else: + + # Check the all the hydrogen atoms, and see the closest two heavy atoms and aggregate their distances to determine which Hyodrogen atom has the lowest distance aggregate + min_distance = np.inf + selected_hydrogen = None + selected_heavy_atoms = None + + for hydrogen_key in hydrogen_keys: + atom_neighbours = closest_atoms[hydrogen_key] + heavy_atoms = [atom for atom in atom_neighbours if not atom.startswith("H")] + + if len(heavy_atoms) < 2: + continue + + distances = dataframe.loc[hydrogen_key, heavy_atoms[:2]].sum() + if distances < min_distance: + min_distance = distances + selected_hydrogen = hydrogen_key + selected_heavy_atoms = heavy_atoms + + if selected_hydrogen: + return { + "H": extract_digits(selected_hydrogen), + "A": extract_digits(selected_heavy_atoms[0]), + "B": extract_digits(selected_heavy_atoms[1]), + } + else: + logger.warning("No valid hydrogen atom found for CREST H-abstraction atoms.") + return None + + +register_job_adapter('crest', CrestAdapter) diff --git a/arc/job/adapters/ts/crest_test.py b/arc/job/adapters/ts/crest_test.py new file mode 100644 index 0000000000..1d5320ad5e --- /dev/null +++ b/arc/job/adapters/ts/crest_test.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# encoding: utf-8 + +""" +Unit tests for arc.job.adapters.ts.crest +""" + +import os +import tempfile +import unittest + +from arc.species.converter import str_to_xyz + + +class TestCrestAdapter(unittest.TestCase): + """ + Tests for CREST input generation. + """ + + def setUp(self): + self.tmpdir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.tmpdir.cleanup() + + def test_creates_valid_input_files(self): + """ + Ensure CREST inputs are written with expected content/format. + """ + from arc.job.adapters.ts import crest as crest_mod + + xyz = str_to_xyz( + """O 0.0 0.0 0.0 + H 0.0 0.0 0.96 + H 0.9 0.0 0.0""" + ) + + backups = { + "settings": crest_mod.settings, + "submit_scripts": crest_mod.submit_scripts, + "CREST_PATH": crest_mod.CREST_PATH, + "CREST_ENV_PATH": crest_mod.CREST_ENV_PATH, + "SERVERS": crest_mod.SERVERS, + } + + try: + crest_mod.settings = {"submit_filenames": {"PBS": "submit.sh"}} + crest_mod.submit_scripts = { + "local": { + "crest": ( + "#PBS -q {queue}\n" + "#PBS -N {name}\n" + "#PBS -l select=1:ncpus={cpus}:mem={memory}gb\n" + ), + "crest_job": "{activation_line}\ncd {path}\n{commands}\n", + } + } + crest_mod.CREST_PATH = "/usr/bin/crest" + crest_mod.CREST_ENV_PATH = "" + crest_mod.SERVERS = { + "local": {"cluster_soft": "pbs", "cpus": 4, "memory": 8, "queue": "testq"} + } + + crest_dir = crest_mod.crest_ts_conformer_search( + xyz_guess=xyz, a_atom=0, h_atom=1, b_atom=2, path=self.tmpdir.name, xyz_crest_int=0 + ) + + coords_path = os.path.join(crest_dir, "coords.ref") + constraints_path = os.path.join(crest_dir, "constraints.inp") + submit_path = os.path.join(crest_dir, "submit.sh") + + self.assertTrue(os.path.exists(coords_path)) + self.assertTrue(os.path.exists(constraints_path)) + self.assertTrue(os.path.exists(submit_path)) + + with open(coords_path) as f: + coords = f.read().strip().splitlines() + self.assertEqual(coords[0].strip(), "$coord") + self.assertEqual(coords[-1].strip(), "$end") + self.assertEqual(len(coords) - 2, len(xyz["symbols"])) + + with open(constraints_path) as f: + constraints = f.read() + self.assertIn("atoms: 1, 2, 3", constraints) + self.assertIn("force constant: 0.5", constraints) + self.assertIn("reference=coords.ref", constraints) + self.assertIn("distance: 1, 2, auto", constraints) + self.assertIn("distance: 2, 3, auto", constraints) + self.assertIn("$metadyn", constraints) + self.assertTrue(constraints.strip().endswith("$end")) + finally: + crest_mod.settings = backups["settings"] + crest_mod.submit_scripts = backups["submit_scripts"] + crest_mod.CREST_PATH = backups["CREST_PATH"] + crest_mod.CREST_ENV_PATH = backups["CREST_ENV_PATH"] + crest_mod.SERVERS = backups["SERVERS"] + + +if __name__ == "__main__": + unittest.main() diff --git a/arc/job/adapters/ts/heuristics.py b/arc/job/adapters/ts/heuristics.py index 9031aa9ec3..3ce88b3d1b 100644 --- a/arc/job/adapters/ts/heuristics.py +++ b/arc/job/adapters/ts/heuristics.py @@ -21,15 +21,29 @@ import os from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from arc.common import (ARC_PATH, almost_equal_coords, get_angle_in_180_range, get_logger, is_angle_linear, - is_xyz_linear, key_by_val, read_yaml_file) +from arc.common import ( + ARC_PATH, + almost_equal_coords, + get_angle_in_180_range, + get_logger, + is_angle_linear, + is_xyz_linear, + key_by_val, + read_yaml_file, +) from arc.family import get_reaction_family_products from arc.job.adapter import JobAdapter from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family from arc.job.factory import register_job_adapter from arc.plotter import save_geo -from arc.species.converter import (compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz, - add_atom_to_xyz_using_internal_coords, sorted_distances_of_atom) +from arc.species.converter import ( + add_atom_to_xyz_using_internal_coords, + compare_zmats, + relocate_zmat_dummy_atoms_to_the_end, + sorted_distances_of_atom, + zmat_from_xyz, + zmat_to_xyz, +) from arc.mapping.engine import map_two_species from arc.molecule.molecule import Molecule from arc.species.species import ARCSpecies, TSGuess, SpeciesError, colliding_atoms @@ -40,9 +54,10 @@ from arc.level import Level from arc.reaction import ARCReaction - -FAMILY_SETS = {'hydrolysis_set_1': ['carbonyl_based_hydrolysis', 'ether_hydrolysis'], - 'hydrolysis_set_2': ['nitrile_hydrolysis']} +FAMILY_SETS = { + 'hydrolysis_set_1': ['carbonyl_based_hydrolysis', 'ether_hydrolysis'], + 'hydrolysis_set_2': ['nitrile_hydrolysis'], +} DIHEDRAL_INCREMENT = 30 @@ -270,24 +285,35 @@ def execute_incore(self): try: tsg = TSGuess(method='Heuristics') tsg.tic() - xyzs, families, indices = hydrolysis(reaction=rxn) + xyzs_raw, families, indices = hydrolysis(reaction=rxn) tsg.tok() - if not xyzs: - logger.warning(f'Heuristics TS search failed to generate any valid TS guesses for {rxn.label}.') + if not xyzs_raw: + logger.warning( + f'Heuristics TS search failed to generate any valid TS guesses for {rxn.label}.' + ) continue + xyzs = [{'xyz': xyz, 'method': 'Heuristics'} for xyz in xyzs_raw] except ValueError: continue - for method_index, xyz in enumerate(xyzs): + for method_index, xyz_entry in enumerate(xyzs): + xyz = xyz_entry.get("xyz") + method_label = xyz_entry.get("method", "Heuristics") + if xyz is None: + continue unique = True for other_tsg in rxn.ts_species.ts_guesses: if almost_equal_coords(xyz, other_tsg.initial_xyz): - if 'heuristics' not in other_tsg.method.lower(): - other_tsg.method += ' and Heuristics' + existing_sources = getattr(other_tsg, "method_sources", None) + if existing_sources is not None: + combined_sources = list(existing_sources) + [method_label] + else: + combined_sources = [other_tsg.method, method_label] + other_tsg.method_sources = TSGuess._normalize_method_sources(combined_sources) unique = False break if unique: - ts_guess = TSGuess(method='Heuristics', + ts_guess = TSGuess(method=method_label, index=len(rxn.ts_species.ts_guesses), method_index=method_index, t0=tsg.t0, @@ -299,15 +325,15 @@ def execute_incore(self): rxn.ts_species.ts_guesses.append(ts_guess) save_geo(xyz=xyz, path=self.local_path, - filename=f'Heuristics_{method_index}', + filename=f'{method_label}_{method_index}', format_='xyz', - comment=f'Heuristics {method_index}, family: {rxn.family}', + comment=f'{method_label} {method_index}, family: {rxn.family}', ) if len(self.reactions) < 5: - successes = len([tsg for tsg in rxn.ts_species.ts_guesses if tsg.success and 'heuristics' in tsg.method]) + successes = [tsg for tsg in rxn.ts_species.ts_guesses if tsg.success] if successes: - logger.info(f'Heuristics successfully found {successes} TS guesses for {rxn.label}.') + logger.info(f'Heuristics successfully found {len(successes)} TS guesses for {rxn.label}.') else: logger.info(f'Heuristics did not find any successful TS guesses for {rxn.label}.') @@ -873,7 +899,7 @@ def h_abstraction(reaction: 'ARCReaction', dihedral_increment (int, optional): The dihedral increment to use for B-H-A-C and D-B-H-C dihedral scans. Returns: List[dict] - Entries are Cartesian coordinates of TS guesses for all reactions. + Entries hold Cartesian coordinates of TS guesses and the generating method label. """ xyz_guesses = list() dihedral_increment = dihedral_increment or DIHEDRAL_INCREMENT @@ -952,7 +978,8 @@ def h_abstraction(reaction: 'ARCReaction', else: # This TS is unique, and has no atom collisions. zmats.append(zmat_guess) - xyz_guesses.append(xyz_guess) + xyz_guesses.append({"xyz": xyz_guess, "method": "Heuristics"}) + return xyz_guesses @@ -987,9 +1014,11 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in is_set_1 = reaction_family in hydrolysis_parameters["family_sets"]["set_1"] is_set_2 = reaction_family in hydrolysis_parameters["family_sets"]["set_2"] - main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices(reaction, - product_dict, - is_set_1) + main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices( + reaction, + product_dict, + is_set_1, + ) base_xyz_indices = { "a": xyz_indices["a"], "b": xyz_indices["b"], @@ -999,9 +1028,19 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in } adjustments_to_try = [False, True] if dihedrals_to_change_num == 1 else [True] for adjust_dihedral in adjustments_to_try: - chosen_xyz_indices, xyz_guesses, zmats_total, n_dihedrals_found = process_chosen_d_indices(initial_xyz, base_xyz_indices, xyz_indices, - hydrolysis_parameters,reaction_family, water, zmats_total, is_set_1, is_set_2, - dihedrals_to_change_num, should_adjust_dihedral=adjust_dihedral) + chosen_xyz_indices, xyz_guesses, zmats_total, n_dihedrals_found = process_chosen_d_indices( + initial_xyz, + base_xyz_indices, + xyz_indices, + hydrolysis_parameters, + reaction_family, + water, + zmats_total, + is_set_1, + is_set_2, + dihedrals_to_change_num, + should_adjust_dihedral=adjust_dihedral, + ) max_dihedrals_found = max(max_dihedrals_found, n_dihedrals_found) if xyz_guesses: xyz_guesses_total.extend(xyz_guesses) @@ -1015,8 +1054,8 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in condition_met = len(xyz_guesses_total) > 0 nitrile_in_inputs = any( - (pd.get("family") == "nitrile_hydrolysis") or - (isinstance(pd.get("family"), list) and "nitrile_hydrolysis" in pd.get("family")) + (pd.get("family") == "nitrile_hydrolysis") + or (isinstance(pd.get("family"), list) and "nitrile_hydrolysis" in pd.get("family")) for pd in product_dicts ) nitrile_already_found = any(fam == "nitrile_hydrolysis" for fam in reaction_families) @@ -1032,9 +1071,11 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in is_set_1 = reaction_family in hydrolysis_parameters["family_sets"]["set_1"] is_set_2 = reaction_family in hydrolysis_parameters["family_sets"]["set_2"] - main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices(reaction, - product_dict, - is_set_1) + main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices( + reaction, + product_dict, + is_set_1, + ) base_xyz_indices = { "a": xyz_indices["a"], "b": xyz_indices["b"], @@ -1048,10 +1089,18 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in break dihedrals_to_change_num += 1 chosen_xyz_indices, xyz_guesses, zmats_total, n_dihedrals_found = process_chosen_d_indices( - initial_xyz, base_xyz_indices, xyz_indices, - hydrolysis_parameters, reaction_family, water, zmats_total, is_set_1, is_set_2, - dihedrals_to_change_num, should_adjust_dihedral=True, - allow_nitrile_dihedrals=True + initial_xyz, + base_xyz_indices, + xyz_indices, + hydrolysis_parameters, + reaction_family, + water, + zmats_total, + is_set_1, + is_set_2, + dihedrals_to_change_num, + should_adjust_dihedral=True, + allow_nitrile_dihedrals=True, ) max_dihedrals_found = max(max_dihedrals_found, n_dihedrals_found) @@ -1083,11 +1132,13 @@ def get_products_and_check_families(reaction: 'ARCReaction') -> Tuple[List[dict] consider_arc_families=True, ) carbonyl_based_present = any( - "carbonyl_based_hydrolysis" in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) + "carbonyl_based_hydrolysis" + in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) for d in product_dicts ) ether_present = any( - "ether_hydrolysis" in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) + "ether_hydrolysis" + in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) for d in product_dicts ) @@ -1118,9 +1169,11 @@ def has_carbonyl_based_hydrolysis(reaction_families: List[dict]) -> bool: return any(family == "carbonyl_based_hydrolysis" for family in reaction_families) -def extract_reactant_and_indices(reaction: 'ARCReaction', - product_dict: dict, - is_set_1: bool) -> Tuple[ARCSpecies, ARCSpecies, dict, dict]: +def extract_reactant_and_indices( + reaction: 'ARCReaction', + product_dict: dict, + is_set_1: bool, +) -> Tuple[ARCSpecies, ARCSpecies, dict, dict]: """ Extract the reactant molecules and relevant atomic indices (a,b,e,d,o,h1) for the hydrolysis reaction. @@ -1163,11 +1216,13 @@ def extract_reactant_and_indices(reaction: 'ARCReaction', main_reactant, a_xyz_index, b_xyz_index, - two_neighbors + two_neighbors, ) except ValueError as e: - raise ValueError(f"Failed to determine neighbors by electronegativity for atom {a_xyz_index} " - f"in species {main_reactant.label}: {e}") + raise ValueError( + f"Failed to determine neighbors by electronegativity for atom {a_xyz_index} " + f"in species {main_reactant.label}: {e}" + ) o_index = len(main_reactant.mol.atoms) h1_index = o_index + 1 @@ -1178,25 +1233,26 @@ def extract_reactant_and_indices(reaction: 'ARCReaction', "e": e_xyz_index, "d": d_xyz_indices, "o": o_index, - "h1": h1_index + "h1": h1_index, } return main_reactant, water, initial_xyz, xyz_indices -def process_chosen_d_indices(initial_xyz: dict, - base_xyz_indices: dict, - xyz_indices: dict, - hydrolysis_parameters: dict, - reaction_family: str, - water: 'ARCSpecies', - zmats_total: List[dict], - is_set_1: bool, - is_set_2: bool, - dihedrals_to_change_num: int, - should_adjust_dihedral: bool, - allow_nitrile_dihedrals: bool = False - ) -> Tuple[Dict[str, int], List[Dict[str, Any]], List[Dict[str, Any]], int]: +def process_chosen_d_indices( + initial_xyz: dict, + base_xyz_indices: dict, + xyz_indices: dict, + hydrolysis_parameters: dict, + reaction_family: str, + water: 'ARCSpecies', + zmats_total: List[dict], + is_set_1: bool, + is_set_2: bool, + dihedrals_to_change_num: int, + should_adjust_dihedral: bool, + allow_nitrile_dihedrals: bool = False, +) -> Tuple[Dict[str, int], List[Dict[str, Any]], List[Dict[str, Any]], int]: """ Iterates over the 'd' indices to process TS guess generation. @@ -1214,7 +1270,6 @@ def process_chosen_d_indices(initial_xyz: dict, should_adjust_dihedral (bool): Whether to adjust dihedral angles. allow_nitrile_dihedrals (bool, optional): Force-enable dihedral adjustments for nitriles. Defaults to False. - Returns: Tuple[Dict[str, int], List[Dict[str, Any]], List[Dict[str, Any]]]: - Chosen indices for TS generation. @@ -1224,11 +1279,18 @@ def process_chosen_d_indices(initial_xyz: dict, """ max_dihedrals_found = 0 for d_index in xyz_indices.get("d", []) or [None]: - chosen_xyz_indices = {**base_xyz_indices, "d": d_index} if d_index is not None else {**base_xyz_indices, - "d": None} + chosen_xyz_indices = {**base_xyz_indices, "d": d_index} if d_index is not None else { + **base_xyz_indices, + "d": None, + } current_zmat, zmat_indices = setup_zmat_indices(initial_xyz, chosen_xyz_indices) - matches = get_matching_dihedrals(current_zmat, zmat_indices['a'], zmat_indices['b'], - zmat_indices['e'], zmat_indices['d']) + matches = get_matching_dihedrals( + current_zmat, + zmat_indices['a'], + zmat_indices['b'], + zmat_indices['e'], + zmat_indices['d'], + ) max_dihedrals_found = max(max_dihedrals_found, len(matches)) if should_adjust_dihedral and dihedrals_to_change_num > len(matches): continue @@ -1246,22 +1308,28 @@ def process_chosen_d_indices(initial_xyz: dict, zmat_variants = generate_dihedral_variants(current_zmat, indices, adjustment_factors) if zmat_variants: adjusted_zmats.extend(zmat_variants) - if not adjusted_zmats: - pass - else: + if adjusted_zmats: zmats_to_process = adjusted_zmats ts_guesses_list = [] for zmat_to_process in zmats_to_process: ts_guesses, updated_zmats = process_family_specific_adjustments( - is_set_1, is_set_2, reaction_family, hydrolysis_parameters, - zmat_to_process, water, chosen_xyz_indices, zmats_total) + is_set_1, + is_set_2, + reaction_family, + hydrolysis_parameters, + zmat_to_process, + water, + chosen_xyz_indices, + zmats_total, + ) zmats_total = updated_zmats ts_guesses_list.extend(ts_guesses) if attempted_dihedral_adjustments and not ts_guesses_list and ( - reaction_family != 'nitrile_hydrolysis' or allow_nitrile_dihedrals): - flipped_zmats= [] + reaction_family != 'nitrile_hydrolysis' or allow_nitrile_dihedrals + ): + flipped_zmats = [] adjustment_factors = [15, 25, 35, 45, 55] for indices in indices_list: flipped_variants = generate_dihedral_variants(current_zmat, indices, adjustment_factors, flip=True) @@ -1269,8 +1337,14 @@ def process_chosen_d_indices(initial_xyz: dict, for zmat_to_process in flipped_zmats: ts_guesses, updated_zmats = process_family_specific_adjustments( - is_set_1, is_set_2, reaction_family, hydrolysis_parameters, - zmat_to_process, water, chosen_xyz_indices, zmats_total + is_set_1, + is_set_2, + reaction_family, + hydrolysis_parameters, + zmat_to_process, + water, + chosen_xyz_indices, + zmats_total, ) zmats_total = updated_zmats ts_guesses_list.extend(ts_guesses) @@ -1311,10 +1385,12 @@ def get_main_reactant_and_water_from_hydrolysis_reaction(reaction: 'ARCReaction' return arc_reactant, water -def get_neighbors_by_electronegativity(spc: 'ARCSpecies', - atom_index: int, - exclude_index: int, - two_neighbors: bool = True) -> Tuple[int, List[int]]: +def get_neighbors_by_electronegativity( + spc: 'ARCSpecies', + atom_index: int, + exclude_index: int, + two_neighbors: bool = True, +) -> Tuple[int, List[int]]: """ Retrieve the top two neighbors of a given atom in a species, sorted by their effective electronegativity, excluding a specified neighbor. @@ -1340,8 +1416,11 @@ def get_neighbors_by_electronegativity(spc: 'ARCSpecies', Raises: ValueError: If the atom has no valid neighbors. """ - neighbors = [neighbor for neighbor in spc.mol.atoms[atom_index].edges.keys() - if spc.mol.atoms.index(neighbor) != exclude_index] + neighbors = [ + neighbor + for neighbor in spc.mol.atoms[atom_index].edges.keys() + if spc.mol.atoms.index(neighbor) != exclude_index + ] if not neighbors: raise ValueError(f"Atom at index {atom_index} has no valid neighbors.") @@ -1355,12 +1434,17 @@ def get_neighbor_total_electronegativity(neighbor: 'Atom') -> float: float: The total electronegativity of the neighbor """ return sum( - ELECTRONEGATIVITIES[n.symbol] * neighbor.edges[n].order - for n in neighbor.edges.keys() + ELECTRONEGATIVITIES[n.symbol] * neighbor.edges[n].order for n in neighbor.edges.keys() ) - effective_electronegativities = [(ELECTRONEGATIVITIES[n.symbol] * spc.mol.atoms[atom_index].edges[n].order, - get_neighbor_total_electronegativity(n), n ) for n in neighbors] + effective_electronegativities = [ + ( + ELECTRONEGATIVITIES[n.symbol] * spc.mol.atoms[atom_index].edges[n].order, + get_neighbor_total_electronegativity(n), + n, + ) + for n in neighbors + ] effective_electronegativities.sort(reverse=True, key=lambda x: (x[0], x[1])) sorted_neighbors = [spc.mol.atoms.index(n[2]) for n in effective_electronegativities] most_electronegative = sorted_neighbors[0] @@ -1368,8 +1452,7 @@ def get_neighbor_total_electronegativity(neighbor: 'Atom') -> float: return most_electronegative, remaining_neighbors -def setup_zmat_indices(initial_xyz: dict, - xyz_indices: dict) -> Tuple[dict, dict]: +def setup_zmat_indices(initial_xyz: dict, xyz_indices: dict) -> Tuple[dict, dict]: """ Convert XYZ coordinates to Z-matrix format and set up corresponding indices. @@ -1387,26 +1470,28 @@ def setup_zmat_indices(initial_xyz: dict, 'a': key_by_val(initial_zmat.get('map', {}), xyz_indices['a']), 'b': key_by_val(initial_zmat.get('map', {}), xyz_indices['b']), 'e': key_by_val(initial_zmat.get('map', {}), xyz_indices['e']), - 'd': key_by_val(initial_zmat.get('map', {}), xyz_indices['d']) if xyz_indices['d'] is not None else None + 'd': key_by_val(initial_zmat.get('map', {}), xyz_indices['d']) if xyz_indices['d'] is not None else None, } return initial_zmat, zmat_indices -def generate_dihedral_variants(zmat: dict, - indices: List[int], - adjustment_factors: List[float], - flip: bool = False, - tolerance_degrees: float = 10.0) -> List[dict]: +def generate_dihedral_variants( + zmat: dict, + indices: List[int], + adjustment_factors: List[float], + flip: bool = False, + tolerance_degrees: float = 10.0, +) -> List[dict]: """ - Create variants of a Z-matrix by adjusting dihedral angles using multiple adjustment factors. + Create variants of a Z-matrix by adjusting dihedral angles using multiple adjustment factors. This function creates variants of the Z-matrix using different adjustment factors: - 1. Retrieve the current dihedral value and normalize it to the (-180°, 180°] range. - 2. For each adjustment factor, slightly push the angle away from 0° or ±180° to avoid - unstable, boundary configurations. - 3. If `flip=True`, the same procedure is applied starting from a flipped - (180°-shifted) baseline angle. - 4. Each adjusted or flipped variant is deep-copied to ensure independence. + 1. Retrieve the current dihedral value and normalize it to the (-180°, 180°] range. + 2. For each adjustment factor, slightly push the angle away from 0° or ±180° to avoid + unstable, boundary configurations. + 3. If `flip=True`, the same procedure is applied starting from a flipped + (180°-shifted) baseline angle. + 4. Each adjusted or flipped variant is deep-copied to ensure independence. Args: zmat (dict): The initial Z-matrix. @@ -1414,7 +1499,8 @@ def generate_dihedral_variants(zmat: dict, adjustment_factors (List[float], optional): List of factors to try. flip (bool, optional): Whether to start from a flipped (180°) baseline dihedral angle. Defaults to False. - tolerance_degrees (float, optional): Tolerance (in degrees) for detecting angles near 0° or ±180°. Defaults to 10.0. + tolerance_degrees (float, optional): Tolerance (in degrees) for detecting angles near 0° or ±180°. + Defaults to 10.0. Returns: List[dict]: List of Z-matrix variants with adjusted dihedral angles. @@ -1440,8 +1526,9 @@ def push_up_dihedral(val: float, adj_factor: float) -> float: seed_value = normalized_value if flip: seed_value = get_angle_in_180_range(normalized_value + 180.0) - boundary_like = ((abs(seed_value) < tolerance_degrees) - or (180 - tolerance_degrees <= abs(seed_value) <= 180+tolerance_degrees)) + boundary_like = (abs(seed_value) < tolerance_degrees) or ( + 180 - tolerance_degrees <= abs(seed_value) <= 180 + tolerance_degrees + ) if boundary_like: for factor in adjustment_factors: variant = copy.deepcopy(zmat) @@ -1450,11 +1537,13 @@ def push_up_dihedral(val: float, adj_factor: float) -> float: return variants -def get_matching_dihedrals(zmat: dict, - a: int, - b: int, - e: int, - d: Optional[int]) -> List[List[int]]: +def get_matching_dihedrals( + zmat: dict, + a: int, + b: int, + e: int, + d: Optional[int], +) -> List[List[int]]: """ Retrieve all dihedral angles in the Z-matrix that match the given atom indices. This function scans the Z-matrix for dihedral parameters (keys starting with 'D_' or 'DX_') @@ -1484,11 +1573,13 @@ def get_matching_dihedrals(zmat: dict, return matches -def stretch_ab_bond(initial_zmat: 'dict', - xyz_indices: 'dict', - zmat_indices: 'dict', - hydrolysis_parameters: 'dict', - reaction_family: str) -> None: +def stretch_ab_bond( + initial_zmat: dict, + xyz_indices: dict, + zmat_indices: dict, + hydrolysis_parameters: dict, + reaction_family: str, +) -> None: """ Stretch the bond between atoms a and b in the Z-matrix based on the reaction family parameters. @@ -1519,16 +1610,18 @@ def stretch_ab_bond(initial_zmat: 'dict', stretch_zmat_bond(zmat=initial_zmat, indices=indices, stretch=stretch_degree) -def process_family_specific_adjustments(is_set_1: bool, - is_set_2: bool, - reaction_family: str, - hydrolysis_parameters: dict, - initial_zmat: dict, - water: 'ARCSpecies', - xyz_indices: dict, - zmats_total: List[dict]) -> Tuple[List[dict], List[dict]]: +def process_family_specific_adjustments( + is_set_1: bool, + is_set_2: bool, + reaction_family: str, + hydrolysis_parameters: dict, + initial_zmat: dict, + water: 'ARCSpecies', + xyz_indices: dict, + zmats_total: List[dict], +) -> Tuple[List[dict], List[dict]]: """ - Process specific adjustments for different hydrolysis reaction families if needed, then generate TS guesses . + Process specific adjustments for different hydrolysis reaction families if needed, then generate TS guesses. Args: is_set_1 (bool): Whether the reaction belongs to parameter set 1. @@ -1546,38 +1639,52 @@ def process_family_specific_adjustments(is_set_1: bool, Raises: ValueError: If the reaction family is not supported. """ - a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz= xyz_indices.values() + a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz = xyz_indices.values() r_atoms = [a_xyz, o_xyz, o_xyz] a_atoms = [[b_xyz, a_xyz], [a_xyz, o_xyz], [h1_xyz, o_xyz]] - d_atoms = ([[e_xyz, d_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]] - if d_xyz is not None else - [[e_xyz, b_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]]) + d_atoms = ( + [[e_xyz, d_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]] + if d_xyz is not None + else [[e_xyz, b_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]] + ) r_value = hydrolysis_parameters['family_parameters'][str(reaction_family)]['r_value'] a_value = hydrolysis_parameters['family_parameters'][str(reaction_family)]['a_value'] d_values = hydrolysis_parameters['family_parameters'][str(reaction_family)]['d_values'] if is_set_1 or is_set_2: initial_xyz = zmat_to_xyz(initial_zmat) - return generate_hydrolysis_ts_guess(initial_xyz, xyz_indices.values(), water, r_atoms, a_atoms, d_atoms, - r_value, a_value, d_values, zmats_total, is_set_1, - threshold=0.6 if reaction_family == 'nitrile_hydrolysis' else 0.8) + return generate_hydrolysis_ts_guess( + initial_xyz, + xyz_indices.values(), + water, + r_atoms, + a_atoms, + d_atoms, + r_value, + a_value, + d_values, + zmats_total, + is_set_1, + threshold=0.6 if reaction_family == 'nitrile_hydrolysis' else 0.8, + ) else: raise ValueError(f"Family {reaction_family} not supported for hydrolysis TS guess generation.") -def generate_hydrolysis_ts_guess(initial_xyz: dict, - xyz_indices: List[int], - water: 'ARCSpecies', - r_atoms: List[int], - a_atoms: List[List[int]], - d_atoms: List[List[int]], - r_value: List[float], - a_value: List[float], - d_values: List[List[float]], - zmats_total: List[dict], - is_set_1: bool, - threshold: float - ) -> Tuple[List[dict], List[dict]]: +def generate_hydrolysis_ts_guess( + initial_xyz: dict, + xyz_indices: List[int], + water: 'ARCSpecies', + r_atoms: List[int], + a_atoms: List[List[int]], + d_atoms: List[List[int]], + r_value: List[float], + a_value: List[float], + d_values: List[List[float]], + zmats_total: List[dict], + is_set_1: bool, + threshold: float, +) -> Tuple[List[dict], List[dict]]: """ Generate Z-matrices and Cartesian coordinates for transition state (TS) guesses. @@ -1600,7 +1707,7 @@ def generate_hydrolysis_ts_guess(initial_xyz: dict, """ xyz_guesses = [] - for index, d_value in enumerate(d_values): + for d_value in d_values: xyz_guess = copy.deepcopy(initial_xyz) for i in range(3): xyz_guess = add_atom_to_xyz_using_internal_coords( @@ -1611,23 +1718,22 @@ def generate_hydrolysis_ts_guess(initial_xyz: dict, d_indices=d_atoms[i], r_value=r_value[i], a_value=a_value[i], - d_value=d_value[i] + d_value=d_value[i], ) - a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz= xyz_indices - are_valid_bonds=check_ts_bonds(xyz_guess, [o_xyz, h1_xyz, h1_xyz+1, a_xyz, b_xyz]) - colliding=colliding_atoms(xyz_guess, threshold=threshold) + a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz = xyz_indices + are_valid_bonds = check_ts_bonds(xyz_guess, [o_xyz, h1_xyz, h1_xyz + 1, a_xyz, b_xyz]) + colliding = colliding_atoms(xyz_guess, threshold=threshold) duplicate = any(compare_zmats(existing, xyz_to_zmat(xyz_guess)) for existing in zmats_total) if is_set_1: - dihedral_edao=[e_xyz, d_xyz, a_xyz, o_xyz] - dao_is_linear=check_dao_angle(dihedral_edao, xyz_guess) + dihedral_edao = [e_xyz, d_xyz, a_xyz, o_xyz] + dao_is_linear = check_dao_angle(dihedral_edao, xyz_guess) else: - dao_is_linear=False + dao_is_linear = False if xyz_guess is not None and not colliding and not duplicate and are_valid_bonds and not dao_is_linear: xyz_guesses.append(xyz_guess) zmats_total.append(xyz_to_zmat(xyz_guess)) - return xyz_guesses, zmats_total @@ -1644,7 +1750,7 @@ def check_dao_angle(d_indices: List[int], xyz_guess: dict) -> bool: """ angle_indices = [d_indices[1], d_indices[2], d_indices[3]] angle_value = calculate_angle(xyz_guess, angle_indices) - norm_value=(angle_value + 180) % 180 + norm_value = (angle_value + 180) % 180 return (norm_value < 10) or (norm_value > 170) @@ -1659,7 +1765,7 @@ def check_ts_bonds(transition_state_xyz: dict, tested_atom_indices: list) -> boo Returns: bool: Whether the transition state guess has the expected water-related bonds. """ - oxygen_index, h1_index, h2_index, a_index, b_index= tested_atom_indices + oxygen_index, h1_index, h2_index, a_index, b_index = tested_atom_indices oxygen_bonds = sorted_distances_of_atom(transition_state_xyz, oxygen_index) h1_bonds = sorted_distances_of_atom(transition_state_xyz, h1_index) h2_bonds = sorted_distances_of_atom(transition_state_xyz, h2_index) @@ -1678,10 +1784,12 @@ def check_oxygen_bonds(bonds): return rel_error <= 0.1 return False - oxygen_has_valid_bonds = (oxygen_bonds[0][0] == h2_index and check_oxygen_bonds(oxygen_bonds)) - h1_has_valid_bonds = (h1_bonds[0][0] in {oxygen_index, b_index}and h1_bonds[1][0] in {oxygen_index, b_index}) + oxygen_has_valid_bonds = oxygen_bonds[0][0] == h2_index and check_oxygen_bonds(oxygen_bonds) + h1_has_valid_bonds = (h1_bonds[0][0] in {oxygen_index, b_index}) and ( + h1_bonds[1][0] in {oxygen_index, b_index} + ) h2_has_valid_bonds = h2_bonds[0][0] == oxygen_index return oxygen_has_valid_bonds and h1_has_valid_bonds and h2_has_valid_bonds -register_job_adapter('heuristics', HeuristicsAdapter) +register_job_adapter("heuristics", HeuristicsAdapter) diff --git a/arc/settings/settings.py b/arc/settings/settings.py index ea2c90a9cc..34a8ff2a03 100644 --- a/arc/settings/settings.py +++ b/arc/settings/settings.py @@ -9,6 +9,7 @@ import os import string import sys +import shutil # Users should update the following server dictionary. # Instructions for RSA key generation can be found here: @@ -88,7 +89,7 @@ supported_ess = ['cfour', 'gaussian', 'mockter', 'molpro', 'orca', 'qchem', 'terachem', 'onedmin', 'xtb', 'torchani', 'openbabel'] # TS methods to try when appropriate for a reaction (other than user guesses which are always allowed): -ts_adapters = ['heuristics', 'AutoTST', 'GCN', 'xtb_gsm'] +ts_adapters = ['heuristics', 'AutoTST', 'GCN', 'xtb_gsm', 'crest'] # List here job types to execute by default default_job_types = {'conf_opt': True, # defaults to True if not specified @@ -427,3 +428,179 @@ def add_rmg_db_candidates(prefix: str) -> None: if path and os.path.isdir(path): RMG_DB_PATH = path break + + + +def parse_version(folder_name): + """ + Parses the version from the folder name and returns a tuple for comparison. + Supports versions like: 3.0.2, v212, 2.1, 2 + """ + version_regex = re.compile(r"(?:v?(\d+)(?:\.(\d+))?(?:\.(\d+))?)", re.IGNORECASE) + match = version_regex.search(folder_name) + if not match: + return (0, 0, 0) + + major = int(match.group(1)) if match.group(1) else 0 + minor = int(match.group(2)) if match.group(2) else 0 + patch = int(match.group(3)) if match.group(3) else 0 + + # Example: v212 → (2, 1, 2) + if major >= 100 and match.group(2) is None and match.group(3) is None: + s = str(major).rjust(3, "0") + major = int(s[0]) + minor = int(s[1]) + patch = int(s[2]) + + return (major, minor, patch) + + +def find_highest_version_in_directory(directory, name_contains): + """ + Find the crest executable with the highest version in a directory. + + This function searches the given directory for subdirectories whose names contain + the specified string, interprets their names as versioned folders (e.g., "crest-3.0.2", + "v212", "2.1"), and within each candidate folder looks for an executable named + ``crest``. It returns the full path to the crest executable located in the folder + with the highest parsed version. + + Args: + directory (str): The path to the directory to search. + name_contains (str): A substring that must be present in the folder name. + + Returns: + Optional[str]: The full path to the crest executable with the highest version, + or None if no suitable executable is found. + """ + if not os.path.exists(directory): + return None + + highest_version_path = None + highest_version = () + + for folder in os.listdir(directory): + file_path = os.path.join(directory, folder) + if name_contains.lower() in folder.lower() and os.path.isdir(file_path): + crest_path = os.path.join(file_path, "crest") + if os.path.isfile(crest_path) and os.access(crest_path, os.X_OK): + version = parse_version(folder) + if highest_version == () or version > highest_version: + highest_version = version + highest_version_path = crest_path + return highest_version_path + + +def find_crest_executable(): + """ + Returns (crest_path, env_cmd): + + - crest_path: full path to 'crest' + - env_cmd: shell snippet to activate its environment (may be "") + """ + # Priority 1: standalone builds in a configurable directory (default: /Local/ce_dana) + standalone_dir = os.getenv("ARC_CREST_STANDALONE_DIR", "/Local/ce_dana") + crest_path = find_highest_version_in_directory(standalone_dir, "crest") + if crest_path and os.path.isfile(crest_path) and os.access(crest_path, os.X_OK): + # Standalone binary: no env activation needed + return crest_path, "" + + # Priority 2: Conda/Mamba/Micromamba envs + home = os.path.expanduser("~") + potential_env_paths = [ + os.path.join(home, "anaconda3", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "miniconda3", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "miniforge3", "envs", "crest_env", "bin", "crest"), + os.path.join(home, ".conda", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "mambaforge", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "micromamba", "envs", "crest_env", "bin", "crest"), + ] + + # Also check the current env's bin + current_env_bin = os.path.dirname(sys.executable) + potential_env_paths.insert(0, os.path.join(current_env_bin, "crest")) + + for crest_path in potential_env_paths: + if os.path.isfile(crest_path) and os.access(crest_path, os.X_OK): + # env_root = .../anaconda3 or .../miniforge3 or .../mambaforge etc. + env_marker = os.path.join("envs", "crest_env") + os.path.sep + env_root = crest_path.split(env_marker)[0] + if "micromamba" in crest_path: + env_cmd = ( + f"source {env_root}/etc/profile.d/micromamba.sh && " + f"micromamba activate crest_env" + ) + elif any( + name in env_root + for name in ("anaconda3", "miniconda3", "miniforge3", "mambaforge", ".conda") + ): + env_cmd = ( + f"source {env_root}/etc/profile.d/conda.sh && " + f"conda activate crest_env" + ) + else: + # If for some reason it's just a random prefix with crest in bin + env_cmd = "" + return crest_path, env_cmd + + # Priority 3: PATH + crest_in_path = shutil.which("crest") + if crest_in_path: + return crest_in_path, "" + + return None, None + + +CREST_PATH, CREST_ENV_PATH = find_crest_executable() + +__all__ = [ + "servers", + "global_ess_settings", + "supported_ess", + "ts_adapters", + "default_job_types", + "levels_ess", + "check_status_command", + "submit_command", + "delete_command", + "list_available_nodes_command", + "submit_filenames", + "t_max_format", + "input_filenames", + "output_filenames", + "default_levels_of_theory", + "orca_default_options_dict", + "tani_default_options_dict", + "ob_default_settings", + "xtb_gsm_settings", + "valid_chars", + "rotor_scan_resolution", + "maximum_barrier", + "minimum_barrier", + "inconsistency_az", + "inconsistency_ab", + "max_rotor_trsh", + "preserve_params_in_scan", + "workers_coeff", + "default_job_settings", + "ARC_FAMILIES_PATH", + "home", + "TANI_PYTHON", + "OB_PYTHON", + "TS_GCN_PYTHON", + "AUTOTST_PYTHON", + "ARC_PYTHON", + "RMG_ENV_NAME", + "RMG_PYTHON", + "XTB", + "exported_rmg_path", + "exported_rmg_db_path", + "gw", + "find_executable", + "add_rmg_db_candidates", + "parse_version", + "find_highest_version_in_directory", + "find_crest_executable", + "CREST_PATH", + "CREST_ENV_PATH", +] diff --git a/arc/species/converter.py b/arc/species/converter.py index ce0d484541..9d19a4b13d 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -5,6 +5,7 @@ import math import numpy as np import os +import warnings from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union from ase import Atoms @@ -48,6 +49,103 @@ DIST_PRECISION = 0.01 # Angstrom ANGL_PRECISION = 0.1 # rad (for both bond angle and dihedral) +def reorder_xyz_string(xyz_str: str, + reverse_atoms: bool = False, + units: str = 'angstrom', + convert_to: str = 'angstrom', + project_directory: Optional[str] = None + ) -> str: + """ + Reorder an XYZ string between ``ATOM X Y Z`` and ``X Y Z ATOM`` with optional unit conversion. + + Args: + xyz_str (str): The string xyz format to be converted. + reverse_atoms (bool, optional): Whether to reverse the atoms and coordinates. + units (str, optional): Units of the input coordinates ('angstrom' or 'bohr'). + convert_to (str, optional): The units to convert to (either 'angstrom' or 'bohr'). + project_directory (str, optional): The path to the project directory. + + Raises: + ConverterError: If xyz_str is not a string or does not have four space-separated entries per non-empty line. + + Returns: str + The converted string xyz format. + """ + if isinstance(xyz_str, tuple): + xyz_str = '\n'.join(xyz_str) + if isinstance(xyz_str, list): + xyz_str = '\n'.join(xyz_str) + if not isinstance(xyz_str, str): + raise ConverterError(f'Expected a string input, got {type(xyz_str)}') + if project_directory is not None: + file_path = os.path.join(project_directory, xyz_str) + if os.path.isfile(file_path): + with open(file_path, 'r') as f: + xyz_str = f.read() + + + if units.lower() == 'angstrom' and convert_to.lower() == 'angstrom': + conversion_factor = 1 + elif units.lower() == 'bohr' and convert_to.lower() == 'bohr': + conversion_factor = 1 + elif units.lower() == 'angstrom' and convert_to.lower() == 'bohr': + conversion_factor = constants.angstrom_to_bohr + elif units.lower() == 'bohr' and convert_to.lower() == 'angstrom': + conversion_factor = constants.bohr_to_angstrom + else: + raise ConverterError("Invalid target unit. Choose 'angstrom' or 'bohr'.") + + processed_lines = list() + # Split the string into lines + lxyz = xyz_str.strip().splitlines() + # Determine whether the atom label appears first or last in each line + first_line_tokens = lxyz[0].strip().split() + atom_first = not is_str_float(first_line_tokens[0]) + + for item in lxyz: + parts = item.strip().split() + + if len(parts) != 4: + raise ConverterError(f'xyz_str has an incorrect format, expected 4 elements in each line, ' + f'got "{item}" in:\n{xyz_str}') + if atom_first: + atom, x_str, y_str, z_str = parts + else: + x_str, y_str, z_str, atom = parts + + try: + x = float(x_str) * conversion_factor + y = float(y_str) * conversion_factor + z = float(z_str) * conversion_factor + + except ValueError as e: + raise ConverterError(f'Could not convert {x_str}, {y_str}, or {z_str} to floats.') from e + + if reverse_atoms and atom_first: + formatted_line = f'{x} {y} {z} {atom}' + elif reverse_atoms and not atom_first: + formatted_line = f'{atom} {x} {y} {z}' + elif not reverse_atoms and atom_first: + formatted_line = f'{atom} {x} {y} {z}' + elif not reverse_atoms and not atom_first: + formatted_line = f'{x} {y} {z} {atom}' + + processed_lines.append(formatted_line) + + return '\n'.join(processed_lines) + + +def str_to_str(*args, **kwargs) -> str: + """ + Backwards compatible wrapper for reorder_xyz_string. + """ + warnings.warn( + "str_to_str was renamed to reorder_xyz_string and will be removed in a future ARC release", + DeprecationWarning, + stacklevel=2, + ) + return reorder_xyz_string(*args, **kwargs) + def str_to_xyz(xyz_str: str, project_directory: Optional[str] = None, diff --git a/arc/species/converter_test.py b/arc/species/converter_test.py index 0d1345d94c..2a1265917a 100644 --- a/arc/species/converter_test.py +++ b/arc/species/converter_test.py @@ -700,6 +700,25 @@ def test_str_to_xyz(self): xyz = converter.str_to_xyz(xyz_format) self.assertEqual(xyz, expected_xyz) + def test_reorder_xyz_string_atom_first(self): + """Test reordering atom-first XYZ strings with unit conversion""" + xyz_format = "C 0.0 1.0 2.0\nH -1.0 0.5 0.0" + converted = converter.reorder_xyz_string(xyz_str=xyz_format, reverse_atoms=True, convert_to="bohr") + expected = "0.0 1.8897259886 3.7794519772 C\n-1.8897259886 0.9448629943 0.0 H" + self.assertEqual(converted, expected) + + def test_reorder_xyz_string_coordinate_first(self): + """Test reordering coordinate-first XYZ strings back to atom-last order with conversion""" + xyz_format = "0.0 0.0 0.0 N\n1.0 0.0 0.0 H" + converted = converter.reorder_xyz_string( + xyz_str=xyz_format, + reverse_atoms=False, + units="bohr", + convert_to="angstrom", + ) + expected = "0.0 0.0 0.0 N\n0.529177 0.0 0.0 H" + self.assertEqual(converted, expected) + def test_xyz_to_str(self): """Test converting an ARC xyz format to a string xyz format""" xyz_str1 = converter.xyz_to_str(xyz_dict=self.xyz1['dict']) diff --git a/arc/species/species.py b/arc/species/species.py index 0fe014d080..49b7b18116 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -1556,10 +1556,11 @@ def cluster_tsgs(self): for tsg in self.ts_guesses: for cluster_tsg in cluster_tsgs: if cluster_tsg.almost_equal_tsgs(tsg): + logger.info(f"Similar TSGuesses found: {tsg.index} is similar to {cluster_tsg.index}") cluster_tsg.cluster.append(tsg.index) - if tsg.method not in cluster_tsg.method: - cluster_tsg.method += f' + {tsg.method}' - cluster_tsg.execution_time = f'{cluster_tsg.execution_time} + {tsg.execution_time}' + cluster_tsg.method_sources = TSGuess._normalize_method_sources( + (cluster_tsg.method_sources or []) + (tsg.method_sources or []) + ) break else: tsg.cluster = [tsg.index] @@ -2193,6 +2194,7 @@ class TSGuess(object): initial_xyz (dict): The 3D coordinates guess. opt_xyz (dict): The 3D coordinates after optimization at the ts_guesses level. method (str): The method/source used for the xyz guess. + method_sources (List[str]): All methods/sources that produced an equivalent xyz guess. method_index (int): A subindex, used for cases where a single method generates several guesses. Counts separately for each direction, 'F' and 'R'. method_direction (str): The reaction direction used for generating the guess ('F' or 'R'). @@ -2237,6 +2239,7 @@ def __init__(self, # Not reading from a dictionary self.index = index self.method = method.lower() if method is not None else 'user guess' + self.method_sources = self._normalize_method_sources([self.method]) self.method_index = method_index self.method_direction = method_direction self.constraints = constraints @@ -2293,6 +2296,22 @@ def opt_xyz(self, value): """Allow setting the initial coordinate guess""" self._opt_xyz = check_xyz_dict(value) + @staticmethod + def _normalize_method_sources(method_sources: Optional[List[str]]) -> List[str]: + """ + Normalize method_sources to a unique, ordered, lowercase list. + """ + if not method_sources: + return [] + normalized = [] + for method in method_sources: + if method is None: + continue + method = method.lower() + if method not in normalized: + normalized.append(method) + return normalized + def as_dict(self, for_report: bool = False) -> dict: """ A helper function for dumping this object as a dictionary. @@ -2306,6 +2325,8 @@ def as_dict(self, for_report: bool = False) -> dict: """ ts_dict = dict() ts_dict['method'] = self.method + if self.method_sources: + ts_dict['method_sources'] = list(self.method_sources) ts_dict['method_index'] = self.method_index if self.method_direction is not None: ts_dict['method_direction'] = self.method_direction @@ -2354,6 +2375,10 @@ def from_dict(self, ts_dict: dict): and isinstance(ts_dict['execution_time'], str) \ else ts_dict['execution_time'] if 'execution_time' in ts_dict else None self.method = ts_dict['method'].lower() if 'method' in ts_dict else 'user guess' + if 'method_sources' in ts_dict and isinstance(ts_dict['method_sources'], list): + self.method_sources = self._normalize_method_sources(ts_dict['method_sources']) + else: + self.method_sources = self._normalize_method_sources([self.method]) self.method_index = ts_dict['method_index'] if 'method_index' in ts_dict else None self.method_direction = ts_dict['method_direction'] if 'method_direction' in ts_dict else None self.imaginary_freqs = ts_dict['imaginary_freqs'] if 'imaginary_freqs' in ts_dict else None diff --git a/arc/species/species_test.py b/arc/species/species_test.py index 6cb6cfded9..330b18ec64 100644 --- a/arc/species/species_test.py +++ b/arc/species/species_test.py @@ -2225,8 +2225,9 @@ def test_cluster_tsgs(self): self.assertEqual(len(spc_1.ts_guesses), 4) spc_1.cluster_tsgs() self.assertEqual(len(spc_1.ts_guesses), 2) - self.assertEqual(spc_1.ts_guesses[0].method, 'user guess 0 + kinbot') - self.assertEqual(spc_1.ts_guesses[0].execution_time, '00:00:02 + 00:00:02') + self.assertEqual(spc_1.ts_guesses[0].method, 'user guess 0') + self.assertEqual(spc_1.ts_guesses[0].method_sources, ['user guess 0', 'kinbot']) + self.assertEqual(spc_1.ts_guesses[0].execution_time, '00:00:02') self.assertEqual(spc_1.ts_guesses[0].index, 0) self.assertEqual(spc_1.ts_guesses[1].method, 'gcn') self.assertEqual(spc_1.ts_guesses[1].execution_time, '00:00:02') @@ -2888,6 +2889,7 @@ def test_as_dict(self): """Test TSGuess.as_dict()""" tsg_dict = self.tsg1.as_dict() expected_dict = {'method': 'autotst', + 'method_sources': ['autotst'], 'conformer_index': None, 'family': 'H_Abstraction', 'index': None, @@ -2906,9 +2908,10 @@ def test_from_dict(self): ts_dict = self.tsg1.as_dict() tsg = TSGuess(ts_dict=ts_dict) self.assertEqual(tsg.method, 'autotst') + self.assertEqual(tsg.method_sources, ['autotst']) ts_dict_for_report = self.tsg1.as_dict(for_report=True) - self.assertEqual(list(ts_dict_for_report.keys()), ['method', 'method_index', 'success', 'index', - 'conformer_index', 'initial_xyz', 'opt_xyz']) + self.assertEqual(list(ts_dict_for_report.keys()), ['method', 'method_sources', 'method_index', 'success', + 'index', 'conformer_index', 'initial_xyz', 'opt_xyz']) def test_process_xyz(self): """Test the process_xyz() method""" diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf deleted file mode 100644 index 36f9d855ac..0000000000 --- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf +++ /dev/null @@ -1,12 +0,0 @@ -%chk=check.chk -%mem=14336mb -%NProcShared=8 - -#P opt=(calcfc) cbs-qb3 IOp(2/9=2000) - -spc1 - -0 3 -O 0.00000000 0.00000000 1.00000000 - - diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub deleted file mode 100644 index 00b840cd67..0000000000 --- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -l -#SBATCH -p normal -#SBATCH -J server1 -#SBATCH -N 1 -#SBATCH -n 8 -#SBATCH --time=120:00:00 -#SBATCH --mem-per-cpu=15770 -#SBATCH -o out.txt -#SBATCH -e err.txt - -export g16root=/home/gridsan/groups/GRPAPI/Software -export PATH=$g16root/g16/:$g16root/gv:$PATH -which g16 - -echo "============================================================" -echo "Job ID : $SLURM_JOB_ID" -echo "Job Name : $SLURM_JOB_NAME" -echo "Starting on : $(date)" -echo "Running on node : $SLURMD_NODENAME" -echo "Current directory : $(pwd)" -echo "============================================================" - -touch initial_time - -GAUSS_SCRDIR=/state/partition1/user//$SLURM_JOB_NAME-$SLURM_JOB_ID -export $GAUSS_SCRDIR -. $g16root/g16/bsd/g16.profile - -mkdir -p $GAUSS_SCRDIR - -g16 < input.gjf > input.log - -rm -rf $GAUSS_SCRDIR - -touch final_time - - \ No newline at end of file diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt b/arc/testing/trsh/wall_exceeded.txt similarity index 100% rename from arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt rename to arc/testing/trsh/wall_exceeded.txt diff --git a/devtools/crest_environment.yml b/devtools/crest_environment.yml new file mode 100644 index 0000000000..2291e72d37 --- /dev/null +++ b/devtools/crest_environment.yml @@ -0,0 +1,6 @@ +name: crest_env +channels: + - conda-forge +dependencies: + - python>=3.7 + - crest=2.12 diff --git a/devtools/install_all.sh b/devtools/install_all.sh index c958fdd548..c9de207ef7 100644 --- a/devtools/install_all.sh +++ b/devtools/install_all.sh @@ -26,6 +26,8 @@ run_devtool () { bash "$DEVTOOLS_DIR/$1" "${@:2}"; } SKIP_CLEAN=false SKIP_EXT=false SKIP_ARC=false +SKIP_RMG=false +ARC_INSTALLED=false RMG_ARGS=() ARC_ARGS=() EXT_ARGS=() @@ -36,6 +38,7 @@ while [[ $# -gt 0 ]]; do --no-clean) SKIP_CLEAN=true ;; --no-ext) SKIP_EXT=true ;; --no-arc) SKIP_ARC=true ;; + --no-rmg) SKIP_RMG=true ;; --rmg-*) RMG_ARGS+=("--${1#--rmg-}") ;; --arc-*) ARC_ARGS+=("--${1#--arc-}") ;; --ext-*) EXT_ARGS+=("--${1#--ext-}") ;; @@ -44,6 +47,7 @@ while [[ $# -gt 0 ]]; do Usage: $0 [global-flags] [--rmg-xxx] [--arc-yyy] [--ext-zzz] --no-clean Skip micromamba/conda cache cleanup --no-ext Skip external tools (AutoTST, KinBot, …) + --no-rmg Skip RMG-Py entirely --rmg-path Forward '--path' to RMG installer --rmg-pip Forward '--pip' to RMG installer ... @@ -67,16 +71,15 @@ echo " EXT sub-flags : ${EXT_ARGS[*]:-(none)}" echo ">>> Beginning full ARC external repo installation…" pushd . >/dev/null -# 1) RMG -echo "=== Installing RMG ===" -run_devtool install_rmg.sh "${RMG_ARGS[@]}" - - - # 2) PyRDL - echo "=== Installing PyRDL ===" - bash devtools/install_pyrdl.sh +# 1) RMG (optional) +if [[ $SKIP_RMG == false ]]; then + echo "=== Installing RMG ===" + run_devtool install_rmg.sh "${RMG_ARGS[@]}" +else + echo "ℹ️ --no-rmg flag set. Skipping RMG installation." +fi -# 3) ARC itself (skip env creation in CI or if user requests it) +# 2) ARC itself (skip env creation in CI or if user requests it) if [[ "${CI:-false}" != "true" && "${SKIP_ARC:-false}" != "true" ]]; then if [[ $SKIP_CLEAN == false ]]; then echo "=== Cleaning up old ARC build artifacts ===" @@ -88,10 +91,23 @@ if [[ "${CI:-false}" != "true" && "${SKIP_ARC:-false}" != "true" ]]; then echo "=== Installing ARC ===" run_devtool install_arc.sh "${ARC_ARGS[@]}" + ARC_INSTALLED=true else + ARC_INSTALLED=false echo ":information_source: CI detected or --no-arc flag set. Skip cleaning ARC installation." fi +# 3) PyRDL (needs arc_env, but not ARC install) +if [[ "${CI:-false}" == "true" ]]; then + echo "=== Installing PyRDL (CI) ===" + bash devtools/install_pyrdl.sh +elif [[ $ARC_INSTALLED == true ]]; then + echo "=== Installing PyRDL ===" + bash devtools/install_pyrdl.sh +else + echo "ℹ️ Skipping PyRDL install because ARC installation was skipped." +fi + if [[ $SKIP_EXT == false ]]; then # map of friendly names → installer scripts declare -A EXT_INSTALLERS=( @@ -100,6 +116,7 @@ if [[ $SKIP_EXT == false ]]; then [KinBot]=install_kinbot.sh [OpenBabel]=install_ob.sh [xtb]=install_xtb.sh + [CREST]=install_crest.sh [Sella]=install_sella.sh [TorchANI]=install_torchani.sh ) diff --git a/devtools/install_autotst.sh b/devtools/install_autotst.sh index 5e3bc35288..e71e42d035 100644 --- a/devtools/install_autotst.sh +++ b/devtools/install_autotst.sh @@ -31,6 +31,8 @@ done # where "$(pwd)" is the path to the AutoTST repository. write_hook () { local env="$1" repo_path="$2" # repo_path="$(pwd)" in AutoTST + local repo_path_escaped + repo_path_escaped=$(printf '%q' "$repo_path") $COMMAND_PKG env list | awk '{print $1}' | grep -qx "$env" || return 0 # env prefix @@ -50,16 +52,37 @@ write_hook () { # --- activation -------------------------------------------------------- cat >"$act" <>"$act" <<'EOF' +# Remove RMG-Py from PATH/PYTHONPATH to avoid clashes while AutoTST is active. +if [[ -n "${RMG_PY_PATH:-}" ]]; then + export PATH="$(_strip_path "$RMG_PY_PATH" "$PATH")" + export PYTHONPATH="$(_strip_path "$RMG_PY_PATH" "${PYTHONPATH:-}")" +fi +EOF + fi + + cat >>"$act" <<'EOF' case ":\$PYTHONPATH:" in *":\$AUTOTST_ROOT:"*) ;; \ *) export PYTHONPATH="\$AUTOTST_ROOT:\${PYTHONPATH:-}" ;; esac EOF # --- de-activation ----------------------------------------------------- cat >"$deact" <<'EOF' -_strip () { local n=":$1:"; local s=":$2:"; echo "${s//$n/:}" | sed 's/^://;s/:$//'; } -export PYTHONPATH=$(_strip "$AUTOTST_ROOT" ":${PYTHONPATH:-}:") -unset AUTOTST_ROOT +export PATH="${AUTOTST_OLD_PATH:-$PATH}" +if [[ -n "${AUTOTST_OLD_PYTHONPATH+x}" ]]; then + export PYTHONPATH="$AUTOTST_OLD_PYTHONPATH" +else + unset PYTHONPATH +fi +unset AUTOTST_ROOT AUTOTST_OLD_PATH AUTOTST_OLD_PYTHONPATH EOF echo "🔗 AutoTST hook refreshed in $env" } @@ -115,12 +138,53 @@ fi if [[ $MODE == "path" ]]; then - AUTO_PATH_LINE="export PYTHONPATH=\"\$PYTHONPATH:$(pwd)\"" - if ! grep -Fqx "$AUTO_PATH_LINE" ~/.bashrc; then - echo "$AUTO_PATH_LINE" >> ~/.bashrc - echo "✔️ Added AutoTST path to ~/.bashrc" + HOOK_SENTINEL="# AutoTST path-mode hook" + if ! grep -Fqx "$HOOK_SENTINEL" ~/.bashrc; then + cat <<'EOF' >> ~/.bashrc +# AutoTST path-mode hook +_strip_path () { + local needle=":$1:" + local haystack=":$2:" + echo "${haystack//$needle/:}" | sed 's/^://;s/:$//' +} + +autotst_on () { + export AUTOTST_ROOT="__AUTOTST_PATH__" + export AUTOTST_OLD_PATH="$PATH" + export AUTOTST_OLD_PYTHONPATH="${PYTHONPATH:-}" + if [[ -n "${RMG_PY_PATH:-}" ]]; then + PATH="$(_strip_path "$RMG_PY_PATH" "$PATH")" + PYTHONPATH="$(_strip_path "$RMG_PY_PATH" "${PYTHONPATH:-}")" + fi + + case ":$PYTHONPATH:" in *":$AUTOTST_ROOT:"*) ;; \ + *) PYTHONPATH="$AUTOTST_ROOT:${PYTHONPATH:-}" ;; esac + export PATH PYTHONPATH +} + +autotst_off () { + export PATH="${AUTOTST_OLD_PATH:-$PATH}" + if [[ -n "${AUTOTST_OLD_PYTHONPATH+x}" ]]; then + export PYTHONPATH="$AUTOTST_OLD_PYTHONPATH" + else + unset PYTHONPATH + fi + unset AUTOTST_ROOT AUTOTST_OLD_PATH AUTOTST_OLD_PYTHONPATH +} + +# Enable AutoTST by default in new shells and keep RMG-Py out of the way. +autotst_on +EOF + # replace placeholder with actual path (portable across GNU/BSD sed) + AUTOTST_ESCAPED_PATH="$(printf '%q' "$(pwd)" | sed 's#/#\\\\/#g')" + if sed --version >/dev/null 2>&1; then + sed -i "s#__AUTOTST_PATH__#${AUTOTST_ESCAPED_PATH}#" ~/.bashrc + else + sed -i '' "s#__AUTOTST_PATH__#${AUTOTST_ESCAPED_PATH}#" ~/.bashrc + fi + echo "✔️ Added AutoTST path-mode hook to ~/.bashrc" else - echo "ℹ️ AutoTST path already exists in ~/.bashrc" + echo "ℹ️ AutoTST path-mode hook already exists in ~/.bashrc" fi elif [[ $MODE == "conda" ]]; then write_hook tst_env "$(pwd)" diff --git a/devtools/install_crest.sh b/devtools/install_crest.sh new file mode 100644 index 0000000000..1086ec9db2 --- /dev/null +++ b/devtools/install_crest.sh @@ -0,0 +1,64 @@ +#!/bin/bash -l +set -eo pipefail + +if command -v micromamba &> /dev/null; then + echo "✔️ Micromamba is installed." + COMMAND_PKG=micromamba +elif command -v mamba &> /dev/null; then + echo "✔️ Mamba is installed." + COMMAND_PKG=mamba +elif command -v conda &> /dev/null; then + echo "✔️ Conda is installed." + COMMAND_PKG=conda +else + echo "❌ Micromamba, Mamba, or Conda is required. Please install one." + exit 1 +fi + +if [ "$COMMAND_PKG" = "micromamba" ]; then + eval "$(micromamba shell hook --shell=bash)" +else + BASE=$(conda info --base) + . "$BASE/etc/profile.d/conda.sh" +fi + +ENV_FILE="devtools/crest_environment.yml" + +if [ ! -f "$ENV_FILE" ]; then + echo "❌ File not found: $ENV_FILE" + exit 1 +fi + +if $COMMAND_PKG env list | grep -q '^crest_env\s'; then + echo ">>> Updating existing crest_env..." + $COMMAND_PKG env update -n crest_env -f "$ENV_FILE" --prune +else + echo ">>> Creating new crest_env..." + $COMMAND_PKG env create -n crest_env -f "$ENV_FILE" -y +fi + +echo ">>> Checking CREST installation..." + +if [ "$COMMAND_PKG" = "micromamba" ]; then + CREST_RUNNER="micromamba run -n crest_env" + CREST_LISTER="micromamba list -n crest_env" +else + CREST_RUNNER="conda run -n crest_env" + CREST_LISTER="conda list -n crest_env" +fi + +if $CREST_RUNNER crest --version &> /dev/null; then + version_output=$($CREST_RUNNER crest --version 2>&1) + echo "$version_output" + installed_version=$(printf '%s' "$version_output" | tr '\n' ' ' | sed -n 's/.*Version[[:space:]]\+\([0-9.][0-9.]*\).*/\1/p') + if [ "$installed_version" != "2.12" ]; then + echo "❌ CREST version mismatch (expected 2.12)." + exit 1 + fi + echo "✔️ CREST 2.12 is successfully installed." +else + echo "❌ CREST is not found in PATH. Please check the environment." + exit 1 +fi + +echo "✅ Done installing CREST (crest_env)." diff --git a/devtools/install_gcn.sh b/devtools/install_gcn.sh index 8f83a2cda1..5273353d77 100644 --- a/devtools/install_gcn.sh +++ b/devtools/install_gcn.sh @@ -93,12 +93,12 @@ write_hook() { # env_name repo_path rm -f "$act" "$deact" # --- activation hook ----------------------------------------------------- - cat <<'ACTHOOK' >"$act" + cat <"$act" # TS-GCN hook – $(date +%F) export TSGCN_ROOT="$repo" -case ":$PYTHONPATH:" in - *":$TSGCN_ROOT:") ;; \ - *) export PYTHONPATH="$TSGCN_ROOT:\${PYTHONPATH:-}" ;; +case ":\$PYTHONPATH:" in + *":\$TSGCN_ROOT:") ;; \ + *) export PYTHONPATH="\$TSGCN_ROOT:\${PYTHONPATH:-}" ;; esac ACTHOOK @@ -182,46 +182,43 @@ CORE_PKGS=( # ── inline env creation & unified PyTorch install -------------------------- if $COMMAND_PKG env list | awk '{print $1}' | grep -qx ts_gcn; then - $COMMAND_PKG env update -n ts_gcn \ + $COMMAND_PKG install -n ts_gcn \ -c schrodinger -c conda-forge \ --channel-priority flexible \ "${CORE_PKGS[@]}" \ - --prune -y + --yes else - $COMMAND_PKG env create -n ts_gcn \ + $COMMAND_PKG create -n ts_gcn \ -c schrodinger -c conda-forge \ --channel-priority flexible \ "${CORE_PKGS[@]}" \ - -y + --yes fi - # 2) activate it - we set +u to avoid printing variable names - # that are not set yet - set +u; $COMMAND_PKG activate ts_gcn; set -u - - # 3) pip‐install exactly the CPU or CUDA wheels (no ROCm on that index) - WHEEL=https://download.pytorch.org/whl/torch_stable.html - if [[ $CUDA_VERSION == cpu ]]; then -pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f $WHEEL - else - pip install torch==1.7.1+${CUDA_VERSION} \ - torchvision==0.8.2+${CUDA_VERSION} \ - torchaudio==0.7.2+${CUDA_VERSION} \ - -f $WHEEL - fi - # for PyG wheels use the official PyG index—with a real '+' in the URL - TORCH_VER=1.7.1 - WHEEL_URL="https://pytorch-geometric.com/whl/torch-${TORCH_VER}+${CUDA_VERSION}.html" - - # install ONLY the prebuilt binaries, never fall back to source - pip install torch-scatter -f "$WHEEL_URL" --only-binary torch-scatter - pip install torch-sparse -f "$WHEEL_URL" --only-binary torch-sparse - pip install torch-cluster -f "$WHEEL_URL" --only-binary torch-cluster - pip install torch-spline-conv -f "$WHEEL_URL" --only-binary torch-spline-conv - - # finally the meta‐package (this one can install from PyPI) - pip install torch-geometric - echo "✅ ts_gcn environment ready" +# 2) pip‐install exactly the CPU or CUDA wheels (no ROCm on that index) +PIP_RUN=("$COMMAND_PKG" run -n ts_gcn) +WHEEL=https://download.pytorch.org/whl/torch_stable.html +if [[ $CUDA_VERSION == cpu ]]; then + "${PIP_RUN[@]}" pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f $WHEEL +else + "${PIP_RUN[@]}" pip install torch==1.7.1+${CUDA_VERSION} \ + torchvision==0.8.2+${CUDA_VERSION} \ + torchaudio==0.7.2+${CUDA_VERSION} \ + -f $WHEEL +fi +# for PyG wheels use the official PyG index—with a real '+' in the URL +TORCH_VER=1.7.1 +WHEEL_URL="https://pytorch-geometric.com/whl/torch-${TORCH_VER}+${CUDA_VERSION}.html" + +# install ONLY the prebuilt binaries, never fall back to source +"${PIP_RUN[@]}" pip install torch-scatter -f "$WHEEL_URL" --only-binary torch-scatter +"${PIP_RUN[@]}" pip install torch-sparse -f "$WHEEL_URL" --only-binary torch-sparse +"${PIP_RUN[@]}" pip install torch-cluster -f "$WHEEL_URL" --only-binary torch-cluster +"${PIP_RUN[@]}" pip install torch-spline-conv -f "$WHEEL_URL" --only-binary torch-spline-conv + +# finally the meta‐package (this one can install from PyPI) +"${PIP_RUN[@]}" pip install torch-geometric +echo "✅ ts_gcn environment ready" # ── write hooks into conda envs if required ------------------------------- if [[ $MODE == conda ]]; then diff --git a/devtools/install_pyrdl.sh b/devtools/install_pyrdl.sh index 2b2cc9340c..87f1ccf454 100644 --- a/devtools/install_pyrdl.sh +++ b/devtools/install_pyrdl.sh @@ -49,8 +49,8 @@ fi # Ensure CMake is installed in the environment if ! command -v cmake &> /dev/null; then - echo "Installing CMake..." - "$COMMAND_PKG" install -y cmake + echo "Installing CMake into arc_env..." + "$COMMAND_PKG" install -n arc_env -c conda-forge -y cmake fi # Clone and build RingDecomposerLib diff --git a/devtools/install_torchani.sh b/devtools/install_torchani.sh index d446b3688e..62719757ed 100644 --- a/devtools/install_torchani.sh +++ b/devtools/install_torchani.sh @@ -2,9 +2,10 @@ set -eo pipefail # Enable tracing of each command, but tee it to a logfile +LOGFILE="tani_env_setup.log" exec 3>&1 4>&2 trap 'exec 2>&4 1>&3' EXIT -exec 1> >(tee .log) 2>&1 +exec 1> >(tee "$LOGFILE") 2>&1 set -x echo ">>> Starting TANI environment setup at $(date)" @@ -53,7 +54,7 @@ fi echo ">>> Creating conda env from $ENV_YAML (name=$ENV_NAME)" if ! $COMMAND_PKG env create -n "$ENV_NAME" -f "$ENV_YAML" -v; then echo "❌ Environment creation failed. Dumping last 200 lines of log:" - tail -n 200 tani_env_setup.log + tail -n 200 "$LOGFILE" echo "---- Disk usage at failure ----" df -h . exit 1 diff --git a/functional/restart_test.py b/functional/restart_test.py index fa3407b863..7b8998cfaf 100644 --- a/functional/restart_test.py +++ b/functional/restart_test.py @@ -16,6 +16,14 @@ from arc.main import ARC +def _project_name(base: str) -> str: + """Return a per-xdist-worker project name to avoid parallel cleanup collisions.""" + worker_id = os.environ.get('PYTEST_XDIST_WORKER') + if worker_id: + return f'{base}_{worker_id}' + return base + + class TestRestart(unittest.TestCase): """ Contains unit tests for restarting ARC. @@ -36,7 +44,7 @@ def test_restart_thermo(self): """ restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '1_restart_thermo') restart_path = os.path.join(restart_dir, 'restart.yml') - project = 'arc_project_for_testing_delete_after_usage_restart_thermo' + project = _project_name('arc_project_for_testing_delete_after_usage_restart_thermo') project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs', 'Species'), dirs_exist_ok=True) @@ -55,7 +63,7 @@ def test_restart_thermo(self): break self.assertTrue(thermo_dft_ccsdtf12_bac) - with open(os.path.join(project_directory, 'arc_project_for_testing_delete_after_usage_restart_thermo.info'), 'r') as f: + with open(os.path.join(project_directory, f'{project}.info'), 'r') as f: sts, n2h3, oet, lot, ap = False, False, False, False, False for line in f.readlines(): if 'Considered the following species and TSs:' in line: @@ -66,7 +74,7 @@ def test_restart_thermo(self): oet = True elif 'Levels of theory used:' in line: lot = True - elif 'ARC project arc_project_for_testing_delete_after_usage_restart_thermo' in line: + elif f'ARC project {project}' in line: ap = True self.assertTrue(sts) self.assertTrue(n2h3) @@ -133,7 +141,7 @@ def test_restart_rate_1(self): """Test restarting ARC and attaining a reaction rate coefficient""" restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '2_restart_rate') restart_path = os.path.join(restart_dir, 'restart.yml') - project = 'arc_project_for_testing_delete_after_usage_restart_rate_1' + project = _project_name('arc_project_for_testing_delete_after_usage_restart_rate_1') project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True) @@ -154,7 +162,7 @@ def test_restart_rate_1(self): def test_restart_rate_2(self): """Test restarting ARC and attaining a reaction rate coefficient""" - project = 'arc_project_for_testing_delete_after_usage_restart_rate_2' + project = _project_name('arc_project_for_testing_delete_after_usage_restart_rate_2') project_directory = os.path.join(ARC_PATH, 'Projects', project) base_path = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '5_TS1') restart_path = os.path.join(base_path, 'restart.yml') @@ -183,7 +191,7 @@ def test_restart_bde (self): """Test restarting ARC and attaining a BDE for anilino_radical.""" restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '3_restart_bde') restart_path = os.path.join(restart_dir, 'restart.yml') - project = 'test_restart_bde' + project = _project_name('test_restart_bde') project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True) @@ -192,7 +200,7 @@ def test_restart_bde (self): arc1 = ARC(**input_dict) arc1.execute() - report_path = os.path.join(ARC_PATH, 'Projects', 'test_restart_bde', 'output', 'BDE_report.txt') + report_path = os.path.join(ARC_PATH, 'Projects', project, 'output', 'BDE_report.txt') with open(report_path, 'r') as f: lines = f.readlines() self.assertIn(' BDE report for anilino_radical:\n', lines) @@ -218,10 +226,10 @@ def tearDownClass(cls): A function that is run ONCE after all unit tests in this class. Delete all project directories created during these unit tests """ - projects = ['arc_project_for_testing_delete_after_usage_restart_thermo', - 'arc_project_for_testing_delete_after_usage_restart_rate_1', - 'arc_project_for_testing_delete_after_usage_restart_rate_2', - 'test_restart_bde', + projects = [_project_name('arc_project_for_testing_delete_after_usage_restart_thermo'), + _project_name('arc_project_for_testing_delete_after_usage_restart_rate_1'), + _project_name('arc_project_for_testing_delete_after_usage_restart_rate_2'), + _project_name('test_restart_bde'), ] for project in projects: project_directory = os.path.join(ARC_PATH, 'Projects', project)