From 61ddc664f18d419f356a444d58301a8aa8b10515 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Mon, 2 Feb 2026 11:45:11 +0200 Subject: [PATCH 1/9] Adds CREST TS search adapter Adds a CREST TS search adapter for finding transition state conformers, leveraging heuristics-generated guesses and CREST's conformer search capabilities. This allows ARC to explore TS geometries using CREST. Adds method source to TS guesses from CREST Ensures CREST is recorded as a method source when a TS guess from CREST is deemed unique and considered as a possible TS guess. This change ensures proper provenance tracking for TS guesses. Improves error logging in CREST adapter Enhances error messages in the CREST adapter to provide more context when failing to determine H abstraction atoms. The error message now includes the reaction label and crest seed iteration number, which aids in debugging. Improves CREST TS search robustness Addresses potential errors in CREST TS searches by: - Preventing crashes due to missing digits in atom labels by raising a ValueError if `extract_digits` fails to find digits. - Handling cases where no H-abstraction atoms can be determined from the distance matrix, preventing related errors and skipping the corresponding seed. - Removes unused local_path argument from h_abstraction function --- arc/job/adapter.py | 1 + arc/job/adapters/ts/crest.py | 638 ++++++++++++++++++++++++++++++ arc/job/adapters/ts/crest_test.py | 100 +++++ 3 files changed, 739 insertions(+) create mode 100644 arc/job/adapters/ts/crest.py create mode 100644 arc/job/adapters/ts/crest_test.py diff --git a/arc/job/adapter.py b/arc/job/adapter.py index fbef435827..5aeaba802b 100644 --- a/arc/job/adapter.py +++ b/arc/job/adapter.py @@ -97,6 +97,7 @@ class JobEnum(str, Enum): # TS search methods autotst = 'autotst' # AutoTST, 10.1021/acs.jpca.7b07361, 10.26434/chemrxiv.13277870.v2 heuristics = 'heuristics' # ARC's heuristics + crest = 'crest' # CREST conformer/TS search kinbot = 'kinbot' # KinBot, 10.1016/j.cpc.2019.106947 gcn = 'gcn' # Graph neural network for isomerization, https://doi.org/10.1021/acs.jpclett.0c00500 user = 'user' # user guesses diff --git a/arc/job/adapters/ts/crest.py b/arc/job/adapters/ts/crest.py new file mode 100644 index 0000000000..2f280f1bd6 --- /dev/null +++ b/arc/job/adapters/ts/crest.py @@ -0,0 +1,638 @@ +""" +Utilities for running CREST within ARC. + +Separated from heuristics so CREST can be conditionally imported and reused. +""" + +import datetime +import os +import re +import time +from typing import TYPE_CHECKING, List, Optional, Union + +import numpy as np +import pandas as pd + +from arc.common import almost_equal_coords, get_logger +from arc.imports import settings, submit_scripts +from arc.job.adapter import JobAdapter +from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family +from arc.job.factory import register_job_adapter +from arc.job.local import check_job_status, submit_job +from arc.plotter import save_geo +from arc.species.converter import reorder_xyz_string, str_to_xyz, xyz_to_dmat, xyz_to_str +from arc.species.species import ARCSpecies, TSGuess +from arc.job.adapters.ts.heuristics import h_abstraction + +if TYPE_CHECKING: + from arc.level import Level + from arc.reaction import ARCReaction + +logger = get_logger() + +MAX_CHECK_INTERVAL_SECONDS = 100 + +try: + CREST_PATH = settings["CREST_PATH"] + CREST_ENV_PATH = settings["CREST_ENV_PATH"] + SERVERS = settings["servers"] +except KeyError: + CREST_PATH = None + CREST_ENV_PATH = None + SERVERS = {} + + +def crest_available() -> bool: + """ + Return whether CREST is configured for use. + """ + return bool(SERVERS.get("local")) and bool(CREST_PATH or CREST_ENV_PATH) + + +class CrestAdapter(JobAdapter): + """ + A class for executing CREST TS conformer searches based on heuristics-generated guesses. + """ + + def __init__(self, + project: str, + project_directory: str, + job_type: Union[List[str], str], + args: Optional[dict] = None, + bath_gas: Optional[str] = None, + checkfile: Optional[str] = None, + conformer: Optional[int] = None, + constraints: Optional[List] = None, + cpu_cores: Optional[str] = None, + dihedral_increment: Optional[float] = None, + dihedrals: Optional[List[float]] = None, + directed_scan_type: Optional[str] = None, + ess_settings: Optional[dict] = None, + ess_trsh_methods: Optional[List[str]] = None, + execution_type: Optional[str] = None, + fine: bool = False, + initial_time: Optional[Union['datetime.datetime', str]] = None, + irc_direction: Optional[str] = None, + job_id: Optional[int] = None, + job_memory_gb: float = 14.0, + job_name: Optional[str] = None, + job_num: Optional[int] = None, + job_server_name: Optional[str] = None, + job_status: Optional[List[Union[dict, str]]] = None, + level: Optional['Level'] = None, + max_job_time: Optional[float] = None, + run_multi_species: bool = False, + reactions: Optional[List['ARCReaction']] = None, + rotor_index: Optional[int] = None, + server: Optional[str] = None, + server_nodes: Optional[list] = None, + queue: Optional[str] = None, + attempted_queues: Optional[List[str]] = None, + species: Optional[List[ARCSpecies]] = None, + testing: bool = False, + times_rerun: int = 0, + torsions: Optional[List[List[int]]] = None, + tsg: Optional[int] = None, + xyz: Optional[dict] = None, + ): + + self.incore_capacity = 50 + self.job_adapter = 'crest' + self.command = None + self.execution_type = execution_type or 'incore' + + if reactions is None: + raise ValueError('Cannot execute TS CREST without ARCReaction object(s).') + + dihedral_increment = dihedral_increment or 30 + + _initialize_adapter(obj=self, + is_ts=True, + project=project, + project_directory=project_directory, + job_type=job_type, + args=args, + bath_gas=bath_gas, + checkfile=checkfile, + conformer=conformer, + constraints=constraints, + cpu_cores=cpu_cores, + dihedral_increment=dihedral_increment, + dihedrals=dihedrals, + directed_scan_type=directed_scan_type, + ess_settings=ess_settings, + ess_trsh_methods=ess_trsh_methods, + fine=fine, + initial_time=initial_time, + irc_direction=irc_direction, + job_id=job_id, + job_memory_gb=job_memory_gb, + job_name=job_name, + job_num=job_num, + job_server_name=job_server_name, + job_status=job_status, + level=level, + max_job_time=max_job_time, + run_multi_species=run_multi_species, + reactions=reactions, + rotor_index=rotor_index, + server=server, + server_nodes=server_nodes, + queue=queue, + attempted_queues=attempted_queues, + species=species, + testing=testing, + times_rerun=times_rerun, + torsions=torsions, + tsg=tsg, + xyz=xyz, + ) + + def write_input_file(self) -> None: + pass + + def set_files(self) -> None: + pass + + def set_additional_file_paths(self) -> None: + pass + + def set_input_file_memory(self) -> None: + pass + + def execute_incore(self): + self._log_job_execution() + self.initial_time = self.initial_time if self.initial_time else datetime.datetime.now() + + supported_families = [key for key, val in ts_adapters_by_rmg_family.items() if 'crest' in val] + + self.reactions = [self.reactions] if not isinstance(self.reactions, list) else self.reactions + for rxn in self.reactions: + if rxn.family not in supported_families: + logger.warning(f'The CREST TS search adapter does not support the {rxn.family} reaction family.') + continue + if any(spc.get_xyz() is None for spc in rxn.r_species + rxn.p_species): + logger.warning(f'The CREST TS search adapter cannot process a reaction if 3D coordinates of ' + f'some/all of its reactants/products are missing.\nNot processing {rxn}.') + continue + if not crest_available(): + logger.warning('CREST is not available. Skipping CREST TS search.') + continue + + if rxn.ts_species is None: + rxn.ts_species = ARCSpecies(label='TS', + is_ts=True, + charge=rxn.charge, + multiplicity=rxn.multiplicity, + ) + + tsg = TSGuess(method='CREST') + tsg.tic() + + crest_job_dirs = [] + xyz_guesses = h_abstraction(reaction=rxn, + dihedral_increment=self.dihedral_increment) + if not xyz_guesses: + logger.warning(f'CREST TS search failed to generate any seed guesses for {rxn.label}.') + tsg.tok() + continue + + for iteration, xyz_entry in enumerate(xyz_guesses): + if isinstance(xyz_entry, dict): + xyz_guess = xyz_entry.get("xyz") + else: + xyz_guess = xyz_entry + if xyz_guess is None: + continue + + df_dmat = convert_xyz_to_df(xyz_guess) + try: + h_abs_atoms_dict = get_h_abs_atoms(df_dmat) + if not h_abs_atoms_dict: + logger.warning( + f"Could not determine the H abstraction atoms for {rxn.label} crest seed {iteration}. " + "Skipping this CREST seed." + ) + continue + crest_job_dir = crest_ts_conformer_search( + xyz_guess, + h_abs_atoms_dict["A"], + h_abs_atoms_dict["H"], + h_abs_atoms_dict["B"], + path=self.local_path, + xyz_crest_int=iteration, + ) + crest_job_dirs.append(crest_job_dir) + except KeyError as e: + logger.error( + f"Could not determine the H abstraction atoms (missing key {e}) for " + f"{rxn.label} crest seed {iteration}." + ) + except ValueError as e: + logger.error( + f"Could not determine the H abstraction atoms for {rxn.label} crest seed {iteration}: {e}" + ) + + if not crest_job_dirs: + logger.warning(f'CREST TS search failed to prepare any jobs for {rxn.label}.') + tsg.tok() + continue + + crest_jobs = submit_crest_jobs(crest_job_dirs) + monitor_crest_jobs(crest_jobs) + xyz_guesses_crest = process_completed_jobs(crest_jobs) + tsg.tok() + + for method_index, xyz in enumerate(xyz_guesses_crest): + if xyz is None: + continue + unique = True + for other_tsg in rxn.ts_species.ts_guesses: + if almost_equal_coords(xyz, other_tsg.initial_xyz): + if hasattr(other_tsg, "method_sources"): + other_tsg.method_sources = other_tsg._normalize_method_sources( + (other_tsg.method_sources or []) + ["crest"] + ) + unique = False + break + if unique: + ts_guess = TSGuess(method='CREST', + index=len(rxn.ts_species.ts_guesses), + method_index=method_index, + t0=tsg.t0, + execution_time=tsg.execution_time, + success=True, + family=rxn.family, + xyz=xyz, + ) + rxn.ts_species.ts_guesses.append(ts_guess) + save_geo(xyz=xyz, + path=self.local_path, + filename=f'CREST_{method_index}', + format_='xyz', + comment=f'CREST {method_index}, family: {rxn.family}', + ) + + if len(self.reactions) < 5: + successes = [tsg for tsg in rxn.ts_species.ts_guesses if tsg.success and 'crest' in tsg.method.lower()] + if successes: + logger.info(f'CREST successfully found {len(successes)} TS guesses for {rxn.label}.') + else: + logger.info(f'CREST did not find any successful TS guesses for {rxn.label}.') + + self.final_time = datetime.datetime.now() + + def execute_queue(self): + self.execute_incore() + + +def crest_ts_conformer_search( + xyz_guess: dict, + a_atom: int, + h_atom: int, + b_atom: int, + path: str = "", + xyz_crest_int: int = 0, +) -> str: + """ + Prepare a CREST TS conformer search job: + - Write coords.ref and constraints.inp + - Write a PBS/HTCondor submit script using submit_scripts["local"]["crest"] + - Return the CREST job directory path + """ + path = os.path.join(path, f"crest_{xyz_crest_int}") + os.makedirs(path, exist_ok=True) + + # --- coords.ref --- + symbols = xyz_guess["symbols"] + converted_coords = reorder_xyz_string( + xyz_str=xyz_to_str(xyz_guess), + reverse_atoms=True, + convert_to="bohr", + ) + coords_ref_content = f"$coord\n{converted_coords}\n$end\n" + coords_ref_path = os.path.join(path, "coords.ref") + with open(coords_ref_path, "w") as f: + f.write(coords_ref_content) + + # --- constraints.inp --- + num_atoms = len(symbols) + # CREST uses 1-based indices + a_atom += 1 + h_atom += 1 + b_atom += 1 + + # All atoms not directly involved in A–H–B go into the metadynamics atom list + list_of_atoms_numbers_not_participating_in_reaction = [ + i for i in range(1, num_atoms + 1) if i not in [a_atom, h_atom, b_atom] + ] + + constraints_path = os.path.join(path, "constraints.inp") + with open(constraints_path, "w") as f: + f.write("$constrain\n") + f.write(f" atoms: {a_atom}, {h_atom}, {b_atom}\n") + f.write(" force constant: 0.5\n") + f.write(" reference=coords.ref\n") + f.write(f" distance: {a_atom}, {h_atom}, auto\n") + f.write(f" distance: {h_atom}, {b_atom}, auto\n") + f.write("$metadyn\n") + if list_of_atoms_numbers_not_participating_in_reaction: + f.write( + f' atoms: {", ".join(map(str, list_of_atoms_numbers_not_participating_in_reaction))}\n' + ) + f.write("$end\n") + + # --- build CREST command string --- + # Example: crest coords.ref --cinp constraints.inp --noreftopo -T 8 + local_server = SERVERS.get("local", {}) + cpus = int(local_server.get("cpus", 8)) + if CREST_ENV_PATH: + crest_exe = "crest" + else: + crest_exe = CREST_PATH if CREST_PATH is not None else "crest" + + commands = [ + crest_exe, + "coords.ref", + "--cinp constraints.inp", + "--noreftopo", + f"-T {cpus}", + ] + command = " ".join(commands) + + # --- activation line (optional) --- + activation_line = CREST_ENV_PATH or "" + + if SERVERS.get("local") is not None: + cluster_soft = SERVERS["local"]["cluster_soft"].lower() + + if cluster_soft in ["condor", "htcondor"]: + # HTCondor branch (kept for completeness – you can delete if you don't use it) + sub_job = submit_scripts["local"]["crest"] + format_params = { + "name": f"crest_{xyz_crest_int}", + "cpus": cpus, + "memory": int(SERVERS["local"].get("memory", 32.0) * 1024), + } + sub_job = sub_job.format(**format_params) + + with open( + os.path.join(path, settings["submit_filenames"]["HTCondor"]), "w" + ) as f: + f.write(sub_job) + + crest_job = submit_scripts["local"]["crest_job"] + crest_job = crest_job.format( + path=path, + activation_line=activation_line, + commands=command, + ) + + with open(os.path.join(path, "job.sh"), "w") as f: + f.write(crest_job) + os.chmod(os.path.join(path, "job.sh"), 0o700) + + # Pre-create out/err for any status checkers that expect them + for fname in ("out.txt", "err.txt"): + fpath = os.path.join(path, fname) + if not os.path.exists(fpath): + with open(fpath, "w") as f: + f.write("") + os.chmod(fpath, 0o600) + + elif cluster_soft == "pbs": + # PBS branch that matches your 'crest' template above + sub_job = submit_scripts["local"]["crest"] + format_params = { + "queue": SERVERS["local"].get("queue", "alon_q"), + "name": f"crest_{xyz_crest_int}", + "cpus": cpus, + # 'memory' is in GB for the template: mem={memory}gb + "memory": int( + SERVERS["local"].get("memory", 32) + if SERVERS["local"].get("memory", 32) < 60 + else 40 + ), + "activation_line": activation_line, + "commands": command, + } + sub_job = sub_job.format(**format_params) + + submit_filename = settings["submit_filenames"]["PBS"] # usually 'submit.sh' + submit_path = os.path.join(path, submit_filename) + with open(submit_path, "w") as f: + f.write(sub_job) + os.chmod(submit_path, 0o700) + + else: + raise ValueError(f"Unsupported cluster_soft for CREST: {cluster_soft!r}") + + return path + + +def submit_crest_jobs(crest_paths: List[str]) -> dict: + """ + Submit CREST jobs to the server. + + Args: + crest_paths (List[str]): List of paths to the CREST directories. + + Returns: + dict: A dictionary containing job IDs as keys and their statuses as values. + """ + crest_jobs = {} + for crest_path in crest_paths: + job_status, job_id = submit_job(path=crest_path) + logger.info(f"CREST job {job_id} submitted for {crest_path}") + crest_jobs[job_id] = {"path": crest_path, "status": job_status} + return crest_jobs + + +def monitor_crest_jobs(crest_jobs: dict, check_interval: int = 300) -> None: + """ + Monitor CREST jobs until they are complete. + + Args: + crest_jobs (dict): Dictionary containing job information (job ID, path, and status). + check_interval (int): Time interval (in seconds) to wait between status checks. + """ + while True: + all_done = True + for job_id, job_info in crest_jobs.items(): + if job_info["status"] not in ["done", "failed"]: + try: + job_info["status"] = check_job_status(job_id) # Update job status + except Exception as e: + logger.error(f"Error checking job status for job {job_id}: {e}") + job_info["status"] = "failed" + if job_info["status"] not in ["done", "failed"]: + all_done = False + if all_done: + break + time.sleep(min(check_interval, MAX_CHECK_INTERVAL_SECONDS)) + + +def process_completed_jobs(crest_jobs: dict) -> list: + """ + Process the completed CREST jobs and update XYZ guesses. + + Args: + crest_jobs (dict): Dictionary containing job information. + """ + xyz_guesses = [] + for job_id, job_info in crest_jobs.items(): + crest_path = job_info["path"] + if job_info["status"] == "done": + crest_best_path = os.path.join(crest_path, "crest_best.xyz") + if os.path.exists(crest_best_path): + with open(crest_best_path, "r") as f: + content = f.read() + xyz_guess = str_to_xyz(content) + xyz_guesses.append(xyz_guess) + else: + logger.error(f"crest_best.xyz not found in {crest_path}") + elif job_info["status"] == "failed": + logger.error(f"CREST job failed for {crest_path}") + + return xyz_guesses + + +def extract_digits(s: str) -> int: + """ + Extract the first integer from a string + + Args: + s (str): The string to extract the integer from + + Returns: + int: The first integer in the string + + """ + digit_str = re.sub(r"[^\d]", "", s) + if not digit_str: + raise ValueError(f"No digits found in string: {s!r}") + return int(digit_str) + + +def convert_xyz_to_df(xyz: dict) -> pd.DataFrame: + """ + Convert a dictionary of xyz coords to a pandas DataFrame with bond distances + + Args: + xyz (dict): The xyz coordinates of the molecule + + Return: + pd.DataFrame: The xyz coordinates as a pandas DataFrame + + """ + symbols = xyz["symbols"] + symbol_enum = [f"{symbol}{i}" for i, symbol in enumerate(symbols)] + ts_dmat = xyz_to_dmat(xyz) + + return pd.DataFrame(ts_dmat, columns=symbol_enum, index=symbol_enum) + + +def get_h_abs_atoms(dataframe: pd.DataFrame) -> Optional[dict]: + """ + Get the donating/accepting hydrogen atom, and the two heavy atoms that are bonded to it + + Args: + dataframe (pd.DataFrame): The dataframe of the bond distances, columns and index are the atom symbols + + Returns: + Optional[dict]: The hydrogen atom and the two heavy atoms (keys: 'H', 'A', 'B'), + or None if they cannot be determined. + """ + + closest_atoms = {} + for index, row in dataframe.iterrows(): + + row[index] = np.inf + closest = row.nsmallest(2).index.tolist() + closest_atoms[index] = closest + + hydrogen_keys = [key for key in dataframe.index if key.startswith("H")] + condition_occurrences = [] + + for hydrogen_key in hydrogen_keys: + atom_neighbours = closest_atoms[hydrogen_key] + is_heavy_present = any( + atom for atom in atom_neighbours if not atom.startswith("H") + ) + if_hydrogen_present = any( + atom + for atom in atom_neighbours + if atom.startswith("H") and atom != hydrogen_key + ) + + if is_heavy_present and if_hydrogen_present: + # Store the details of this occurrence + condition_occurrences.append( + {"H": hydrogen_key, "A": atom_neighbours[0], "B": atom_neighbours[1]} + ) + + # Check if the condition was met + if condition_occurrences: + if len(condition_occurrences) > 1: + # Store distances to decide which occurrence to use + occurrence_distances = [] + for occurrence in condition_occurrences: + # Calculate the sum of distances to the two heavy atoms + hydrogen_key = f"{occurrence['H']}" + heavy_atoms = [f"{occurrence['A']}", f"{occurrence['B']}"] + try: + distances = dataframe.loc[hydrogen_key, heavy_atoms].sum() + occurrence_distances.append((occurrence, distances)) + except KeyError as e: + logger.error(f"Error accessing distances for occurrence {occurrence}: {e}") + + if not occurrence_distances: + logger.warning("No valid occurrences with distance data found for CREST H-abstraction atoms.") + return None + + # Select the occurrence with the smallest distance + best_occurrence = min(occurrence_distances, key=lambda x: x[1])[0] + return { + "H": extract_digits(best_occurrence["H"]), + "A": extract_digits(best_occurrence["A"]), + "B": extract_digits(best_occurrence["B"]), + } + else: + single_occurrence = condition_occurrences[0] + return { + "H": extract_digits(single_occurrence["H"]), + "A": extract_digits(single_occurrence["A"]), + "B": extract_digits(single_occurrence["B"]), + } + else: + + # Check the all the hydrogen atoms, and see the closest two heavy atoms and aggregate their distances to determine which Hyodrogen atom has the lowest distance aggregate + min_distance = np.inf + selected_hydrogen = None + selected_heavy_atoms = None + + for hydrogen_key in hydrogen_keys: + atom_neighbours = closest_atoms[hydrogen_key] + heavy_atoms = [atom for atom in atom_neighbours if not atom.startswith("H")] + + if len(heavy_atoms) < 2: + continue + + distances = dataframe.loc[hydrogen_key, heavy_atoms[:2]].sum() + if distances < min_distance: + min_distance = distances + selected_hydrogen = hydrogen_key + selected_heavy_atoms = heavy_atoms + + if selected_hydrogen: + return { + "H": extract_digits(selected_hydrogen), + "A": extract_digits(selected_heavy_atoms[0]), + "B": extract_digits(selected_heavy_atoms[1]), + } + else: + logger.warning("No valid hydrogen atom found for CREST H-abstraction atoms.") + return None + + +register_job_adapter('crest', CrestAdapter) diff --git a/arc/job/adapters/ts/crest_test.py b/arc/job/adapters/ts/crest_test.py new file mode 100644 index 0000000000..1d5320ad5e --- /dev/null +++ b/arc/job/adapters/ts/crest_test.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# encoding: utf-8 + +""" +Unit tests for arc.job.adapters.ts.crest +""" + +import os +import tempfile +import unittest + +from arc.species.converter import str_to_xyz + + +class TestCrestAdapter(unittest.TestCase): + """ + Tests for CREST input generation. + """ + + def setUp(self): + self.tmpdir = tempfile.TemporaryDirectory() + + def tearDown(self): + self.tmpdir.cleanup() + + def test_creates_valid_input_files(self): + """ + Ensure CREST inputs are written with expected content/format. + """ + from arc.job.adapters.ts import crest as crest_mod + + xyz = str_to_xyz( + """O 0.0 0.0 0.0 + H 0.0 0.0 0.96 + H 0.9 0.0 0.0""" + ) + + backups = { + "settings": crest_mod.settings, + "submit_scripts": crest_mod.submit_scripts, + "CREST_PATH": crest_mod.CREST_PATH, + "CREST_ENV_PATH": crest_mod.CREST_ENV_PATH, + "SERVERS": crest_mod.SERVERS, + } + + try: + crest_mod.settings = {"submit_filenames": {"PBS": "submit.sh"}} + crest_mod.submit_scripts = { + "local": { + "crest": ( + "#PBS -q {queue}\n" + "#PBS -N {name}\n" + "#PBS -l select=1:ncpus={cpus}:mem={memory}gb\n" + ), + "crest_job": "{activation_line}\ncd {path}\n{commands}\n", + } + } + crest_mod.CREST_PATH = "/usr/bin/crest" + crest_mod.CREST_ENV_PATH = "" + crest_mod.SERVERS = { + "local": {"cluster_soft": "pbs", "cpus": 4, "memory": 8, "queue": "testq"} + } + + crest_dir = crest_mod.crest_ts_conformer_search( + xyz_guess=xyz, a_atom=0, h_atom=1, b_atom=2, path=self.tmpdir.name, xyz_crest_int=0 + ) + + coords_path = os.path.join(crest_dir, "coords.ref") + constraints_path = os.path.join(crest_dir, "constraints.inp") + submit_path = os.path.join(crest_dir, "submit.sh") + + self.assertTrue(os.path.exists(coords_path)) + self.assertTrue(os.path.exists(constraints_path)) + self.assertTrue(os.path.exists(submit_path)) + + with open(coords_path) as f: + coords = f.read().strip().splitlines() + self.assertEqual(coords[0].strip(), "$coord") + self.assertEqual(coords[-1].strip(), "$end") + self.assertEqual(len(coords) - 2, len(xyz["symbols"])) + + with open(constraints_path) as f: + constraints = f.read() + self.assertIn("atoms: 1, 2, 3", constraints) + self.assertIn("force constant: 0.5", constraints) + self.assertIn("reference=coords.ref", constraints) + self.assertIn("distance: 1, 2, auto", constraints) + self.assertIn("distance: 2, 3, auto", constraints) + self.assertIn("$metadyn", constraints) + self.assertTrue(constraints.strip().endswith("$end")) + finally: + crest_mod.settings = backups["settings"] + crest_mod.submit_scripts = backups["submit_scripts"] + crest_mod.CREST_PATH = backups["CREST_PATH"] + crest_mod.CREST_ENV_PATH = backups["CREST_ENV_PATH"] + crest_mod.SERVERS = backups["SERVERS"] + + +if __name__ == "__main__": + unittest.main() From 69d33b18eccc0cb5e7f1c116be10b4275b919da1 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Mon, 2 Feb 2026 11:46:25 +0200 Subject: [PATCH 2/9] Adds installation support for CREST Adds a script to install CREST using a dedicated conda environment. Integrates CREST into the `install_all.sh` script and the help message. Also adds a `--no-rmg` flag in `install_all.sh` to optionally skip RMG-Py installation, and ensures PyRDL is installed only when ARC is installed. Corrects a problem with the AutoTST activation/deactivation hooks and ensures that RMG-Py's path is correctly removed from the PATH and PYTHONPATH. Also ensures TS-GCN can be correctly installed into its environment. Sanitizes paths for AutoTST hook activation Ensures that paths containing special characters are properly escaped when writing the AutoTST activation hook. This prevents issues with the hook failing to activate correctly when the AutoTST repository or current working directory contain characters that are interpreted specially by the shell. --- Makefile | 4 ++ devtools/crest_environment.yml | 6 +++ devtools/install_all.sh | 35 +++++++++++---- devtools/install_autotst.sh | 82 ++++++++++++++++++++++++++++++---- devtools/install_crest.sh | 64 ++++++++++++++++++++++++++ devtools/install_gcn.sh | 67 +++++++++++++-------------- devtools/install_pyrdl.sh | 4 +- devtools/install_torchani.sh | 5 ++- 8 files changed, 210 insertions(+), 57 deletions(-) create mode 100644 devtools/crest_environment.yml create mode 100644 devtools/install_crest.sh diff --git a/Makefile b/Makefile index ff5b1e7091..4fd3dfbc35 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ help: @echo " install-kinbot Install KinBot" @echo " install-sella Install Sella" @echo " install-xtb Install xTB" + @echo " install-crest Install CREST" @echo " install-torchani Install TorchANI" @echo " install-ob Install OpenBabel" @echo "" @@ -100,6 +101,9 @@ install-sella: install-xtb: bash $(DEVTOOLS_DIR)/install_xtb.sh +install-crest: + bash $(DEVTOOLS_DIR)/install_crest.sh + install-torchani: bash $(DEVTOOLS_DIR)/install_torchani.sh diff --git a/devtools/crest_environment.yml b/devtools/crest_environment.yml new file mode 100644 index 0000000000..2291e72d37 --- /dev/null +++ b/devtools/crest_environment.yml @@ -0,0 +1,6 @@ +name: crest_env +channels: + - conda-forge +dependencies: + - python>=3.7 + - crest=2.12 diff --git a/devtools/install_all.sh b/devtools/install_all.sh index c958fdd548..c9de207ef7 100644 --- a/devtools/install_all.sh +++ b/devtools/install_all.sh @@ -26,6 +26,8 @@ run_devtool () { bash "$DEVTOOLS_DIR/$1" "${@:2}"; } SKIP_CLEAN=false SKIP_EXT=false SKIP_ARC=false +SKIP_RMG=false +ARC_INSTALLED=false RMG_ARGS=() ARC_ARGS=() EXT_ARGS=() @@ -36,6 +38,7 @@ while [[ $# -gt 0 ]]; do --no-clean) SKIP_CLEAN=true ;; --no-ext) SKIP_EXT=true ;; --no-arc) SKIP_ARC=true ;; + --no-rmg) SKIP_RMG=true ;; --rmg-*) RMG_ARGS+=("--${1#--rmg-}") ;; --arc-*) ARC_ARGS+=("--${1#--arc-}") ;; --ext-*) EXT_ARGS+=("--${1#--ext-}") ;; @@ -44,6 +47,7 @@ while [[ $# -gt 0 ]]; do Usage: $0 [global-flags] [--rmg-xxx] [--arc-yyy] [--ext-zzz] --no-clean Skip micromamba/conda cache cleanup --no-ext Skip external tools (AutoTST, KinBot, …) + --no-rmg Skip RMG-Py entirely --rmg-path Forward '--path' to RMG installer --rmg-pip Forward '--pip' to RMG installer ... @@ -67,16 +71,15 @@ echo " EXT sub-flags : ${EXT_ARGS[*]:-(none)}" echo ">>> Beginning full ARC external repo installation…" pushd . >/dev/null -# 1) RMG -echo "=== Installing RMG ===" -run_devtool install_rmg.sh "${RMG_ARGS[@]}" - - - # 2) PyRDL - echo "=== Installing PyRDL ===" - bash devtools/install_pyrdl.sh +# 1) RMG (optional) +if [[ $SKIP_RMG == false ]]; then + echo "=== Installing RMG ===" + run_devtool install_rmg.sh "${RMG_ARGS[@]}" +else + echo "ℹ️ --no-rmg flag set. Skipping RMG installation." +fi -# 3) ARC itself (skip env creation in CI or if user requests it) +# 2) ARC itself (skip env creation in CI or if user requests it) if [[ "${CI:-false}" != "true" && "${SKIP_ARC:-false}" != "true" ]]; then if [[ $SKIP_CLEAN == false ]]; then echo "=== Cleaning up old ARC build artifacts ===" @@ -88,10 +91,23 @@ if [[ "${CI:-false}" != "true" && "${SKIP_ARC:-false}" != "true" ]]; then echo "=== Installing ARC ===" run_devtool install_arc.sh "${ARC_ARGS[@]}" + ARC_INSTALLED=true else + ARC_INSTALLED=false echo ":information_source: CI detected or --no-arc flag set. Skip cleaning ARC installation." fi +# 3) PyRDL (needs arc_env, but not ARC install) +if [[ "${CI:-false}" == "true" ]]; then + echo "=== Installing PyRDL (CI) ===" + bash devtools/install_pyrdl.sh +elif [[ $ARC_INSTALLED == true ]]; then + echo "=== Installing PyRDL ===" + bash devtools/install_pyrdl.sh +else + echo "ℹ️ Skipping PyRDL install because ARC installation was skipped." +fi + if [[ $SKIP_EXT == false ]]; then # map of friendly names → installer scripts declare -A EXT_INSTALLERS=( @@ -100,6 +116,7 @@ if [[ $SKIP_EXT == false ]]; then [KinBot]=install_kinbot.sh [OpenBabel]=install_ob.sh [xtb]=install_xtb.sh + [CREST]=install_crest.sh [Sella]=install_sella.sh [TorchANI]=install_torchani.sh ) diff --git a/devtools/install_autotst.sh b/devtools/install_autotst.sh index 5e3bc35288..e71e42d035 100644 --- a/devtools/install_autotst.sh +++ b/devtools/install_autotst.sh @@ -31,6 +31,8 @@ done # where "$(pwd)" is the path to the AutoTST repository. write_hook () { local env="$1" repo_path="$2" # repo_path="$(pwd)" in AutoTST + local repo_path_escaped + repo_path_escaped=$(printf '%q' "$repo_path") $COMMAND_PKG env list | awk '{print $1}' | grep -qx "$env" || return 0 # env prefix @@ -50,16 +52,37 @@ write_hook () { # --- activation -------------------------------------------------------- cat >"$act" <>"$act" <<'EOF' +# Remove RMG-Py from PATH/PYTHONPATH to avoid clashes while AutoTST is active. +if [[ -n "${RMG_PY_PATH:-}" ]]; then + export PATH="$(_strip_path "$RMG_PY_PATH" "$PATH")" + export PYTHONPATH="$(_strip_path "$RMG_PY_PATH" "${PYTHONPATH:-}")" +fi +EOF + fi + + cat >>"$act" <<'EOF' case ":\$PYTHONPATH:" in *":\$AUTOTST_ROOT:"*) ;; \ *) export PYTHONPATH="\$AUTOTST_ROOT:\${PYTHONPATH:-}" ;; esac EOF # --- de-activation ----------------------------------------------------- cat >"$deact" <<'EOF' -_strip () { local n=":$1:"; local s=":$2:"; echo "${s//$n/:}" | sed 's/^://;s/:$//'; } -export PYTHONPATH=$(_strip "$AUTOTST_ROOT" ":${PYTHONPATH:-}:") -unset AUTOTST_ROOT +export PATH="${AUTOTST_OLD_PATH:-$PATH}" +if [[ -n "${AUTOTST_OLD_PYTHONPATH+x}" ]]; then + export PYTHONPATH="$AUTOTST_OLD_PYTHONPATH" +else + unset PYTHONPATH +fi +unset AUTOTST_ROOT AUTOTST_OLD_PATH AUTOTST_OLD_PYTHONPATH EOF echo "🔗 AutoTST hook refreshed in $env" } @@ -115,12 +138,53 @@ fi if [[ $MODE == "path" ]]; then - AUTO_PATH_LINE="export PYTHONPATH=\"\$PYTHONPATH:$(pwd)\"" - if ! grep -Fqx "$AUTO_PATH_LINE" ~/.bashrc; then - echo "$AUTO_PATH_LINE" >> ~/.bashrc - echo "✔️ Added AutoTST path to ~/.bashrc" + HOOK_SENTINEL="# AutoTST path-mode hook" + if ! grep -Fqx "$HOOK_SENTINEL" ~/.bashrc; then + cat <<'EOF' >> ~/.bashrc +# AutoTST path-mode hook +_strip_path () { + local needle=":$1:" + local haystack=":$2:" + echo "${haystack//$needle/:}" | sed 's/^://;s/:$//' +} + +autotst_on () { + export AUTOTST_ROOT="__AUTOTST_PATH__" + export AUTOTST_OLD_PATH="$PATH" + export AUTOTST_OLD_PYTHONPATH="${PYTHONPATH:-}" + if [[ -n "${RMG_PY_PATH:-}" ]]; then + PATH="$(_strip_path "$RMG_PY_PATH" "$PATH")" + PYTHONPATH="$(_strip_path "$RMG_PY_PATH" "${PYTHONPATH:-}")" + fi + + case ":$PYTHONPATH:" in *":$AUTOTST_ROOT:"*) ;; \ + *) PYTHONPATH="$AUTOTST_ROOT:${PYTHONPATH:-}" ;; esac + export PATH PYTHONPATH +} + +autotst_off () { + export PATH="${AUTOTST_OLD_PATH:-$PATH}" + if [[ -n "${AUTOTST_OLD_PYTHONPATH+x}" ]]; then + export PYTHONPATH="$AUTOTST_OLD_PYTHONPATH" + else + unset PYTHONPATH + fi + unset AUTOTST_ROOT AUTOTST_OLD_PATH AUTOTST_OLD_PYTHONPATH +} + +# Enable AutoTST by default in new shells and keep RMG-Py out of the way. +autotst_on +EOF + # replace placeholder with actual path (portable across GNU/BSD sed) + AUTOTST_ESCAPED_PATH="$(printf '%q' "$(pwd)" | sed 's#/#\\\\/#g')" + if sed --version >/dev/null 2>&1; then + sed -i "s#__AUTOTST_PATH__#${AUTOTST_ESCAPED_PATH}#" ~/.bashrc + else + sed -i '' "s#__AUTOTST_PATH__#${AUTOTST_ESCAPED_PATH}#" ~/.bashrc + fi + echo "✔️ Added AutoTST path-mode hook to ~/.bashrc" else - echo "ℹ️ AutoTST path already exists in ~/.bashrc" + echo "ℹ️ AutoTST path-mode hook already exists in ~/.bashrc" fi elif [[ $MODE == "conda" ]]; then write_hook tst_env "$(pwd)" diff --git a/devtools/install_crest.sh b/devtools/install_crest.sh new file mode 100644 index 0000000000..1086ec9db2 --- /dev/null +++ b/devtools/install_crest.sh @@ -0,0 +1,64 @@ +#!/bin/bash -l +set -eo pipefail + +if command -v micromamba &> /dev/null; then + echo "✔️ Micromamba is installed." + COMMAND_PKG=micromamba +elif command -v mamba &> /dev/null; then + echo "✔️ Mamba is installed." + COMMAND_PKG=mamba +elif command -v conda &> /dev/null; then + echo "✔️ Conda is installed." + COMMAND_PKG=conda +else + echo "❌ Micromamba, Mamba, or Conda is required. Please install one." + exit 1 +fi + +if [ "$COMMAND_PKG" = "micromamba" ]; then + eval "$(micromamba shell hook --shell=bash)" +else + BASE=$(conda info --base) + . "$BASE/etc/profile.d/conda.sh" +fi + +ENV_FILE="devtools/crest_environment.yml" + +if [ ! -f "$ENV_FILE" ]; then + echo "❌ File not found: $ENV_FILE" + exit 1 +fi + +if $COMMAND_PKG env list | grep -q '^crest_env\s'; then + echo ">>> Updating existing crest_env..." + $COMMAND_PKG env update -n crest_env -f "$ENV_FILE" --prune +else + echo ">>> Creating new crest_env..." + $COMMAND_PKG env create -n crest_env -f "$ENV_FILE" -y +fi + +echo ">>> Checking CREST installation..." + +if [ "$COMMAND_PKG" = "micromamba" ]; then + CREST_RUNNER="micromamba run -n crest_env" + CREST_LISTER="micromamba list -n crest_env" +else + CREST_RUNNER="conda run -n crest_env" + CREST_LISTER="conda list -n crest_env" +fi + +if $CREST_RUNNER crest --version &> /dev/null; then + version_output=$($CREST_RUNNER crest --version 2>&1) + echo "$version_output" + installed_version=$(printf '%s' "$version_output" | tr '\n' ' ' | sed -n 's/.*Version[[:space:]]\+\([0-9.][0-9.]*\).*/\1/p') + if [ "$installed_version" != "2.12" ]; then + echo "❌ CREST version mismatch (expected 2.12)." + exit 1 + fi + echo "✔️ CREST 2.12 is successfully installed." +else + echo "❌ CREST is not found in PATH. Please check the environment." + exit 1 +fi + +echo "✅ Done installing CREST (crest_env)." diff --git a/devtools/install_gcn.sh b/devtools/install_gcn.sh index 8f83a2cda1..5273353d77 100644 --- a/devtools/install_gcn.sh +++ b/devtools/install_gcn.sh @@ -93,12 +93,12 @@ write_hook() { # env_name repo_path rm -f "$act" "$deact" # --- activation hook ----------------------------------------------------- - cat <<'ACTHOOK' >"$act" + cat <"$act" # TS-GCN hook – $(date +%F) export TSGCN_ROOT="$repo" -case ":$PYTHONPATH:" in - *":$TSGCN_ROOT:") ;; \ - *) export PYTHONPATH="$TSGCN_ROOT:\${PYTHONPATH:-}" ;; +case ":\$PYTHONPATH:" in + *":\$TSGCN_ROOT:") ;; \ + *) export PYTHONPATH="\$TSGCN_ROOT:\${PYTHONPATH:-}" ;; esac ACTHOOK @@ -182,46 +182,43 @@ CORE_PKGS=( # ── inline env creation & unified PyTorch install -------------------------- if $COMMAND_PKG env list | awk '{print $1}' | grep -qx ts_gcn; then - $COMMAND_PKG env update -n ts_gcn \ + $COMMAND_PKG install -n ts_gcn \ -c schrodinger -c conda-forge \ --channel-priority flexible \ "${CORE_PKGS[@]}" \ - --prune -y + --yes else - $COMMAND_PKG env create -n ts_gcn \ + $COMMAND_PKG create -n ts_gcn \ -c schrodinger -c conda-forge \ --channel-priority flexible \ "${CORE_PKGS[@]}" \ - -y + --yes fi - # 2) activate it - we set +u to avoid printing variable names - # that are not set yet - set +u; $COMMAND_PKG activate ts_gcn; set -u - - # 3) pip‐install exactly the CPU or CUDA wheels (no ROCm on that index) - WHEEL=https://download.pytorch.org/whl/torch_stable.html - if [[ $CUDA_VERSION == cpu ]]; then -pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f $WHEEL - else - pip install torch==1.7.1+${CUDA_VERSION} \ - torchvision==0.8.2+${CUDA_VERSION} \ - torchaudio==0.7.2+${CUDA_VERSION} \ - -f $WHEEL - fi - # for PyG wheels use the official PyG index—with a real '+' in the URL - TORCH_VER=1.7.1 - WHEEL_URL="https://pytorch-geometric.com/whl/torch-${TORCH_VER}+${CUDA_VERSION}.html" - - # install ONLY the prebuilt binaries, never fall back to source - pip install torch-scatter -f "$WHEEL_URL" --only-binary torch-scatter - pip install torch-sparse -f "$WHEEL_URL" --only-binary torch-sparse - pip install torch-cluster -f "$WHEEL_URL" --only-binary torch-cluster - pip install torch-spline-conv -f "$WHEEL_URL" --only-binary torch-spline-conv - - # finally the meta‐package (this one can install from PyPI) - pip install torch-geometric - echo "✅ ts_gcn environment ready" +# 2) pip‐install exactly the CPU or CUDA wheels (no ROCm on that index) +PIP_RUN=("$COMMAND_PKG" run -n ts_gcn) +WHEEL=https://download.pytorch.org/whl/torch_stable.html +if [[ $CUDA_VERSION == cpu ]]; then + "${PIP_RUN[@]}" pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f $WHEEL +else + "${PIP_RUN[@]}" pip install torch==1.7.1+${CUDA_VERSION} \ + torchvision==0.8.2+${CUDA_VERSION} \ + torchaudio==0.7.2+${CUDA_VERSION} \ + -f $WHEEL +fi +# for PyG wheels use the official PyG index—with a real '+' in the URL +TORCH_VER=1.7.1 +WHEEL_URL="https://pytorch-geometric.com/whl/torch-${TORCH_VER}+${CUDA_VERSION}.html" + +# install ONLY the prebuilt binaries, never fall back to source +"${PIP_RUN[@]}" pip install torch-scatter -f "$WHEEL_URL" --only-binary torch-scatter +"${PIP_RUN[@]}" pip install torch-sparse -f "$WHEEL_URL" --only-binary torch-sparse +"${PIP_RUN[@]}" pip install torch-cluster -f "$WHEEL_URL" --only-binary torch-cluster +"${PIP_RUN[@]}" pip install torch-spline-conv -f "$WHEEL_URL" --only-binary torch-spline-conv + +# finally the meta‐package (this one can install from PyPI) +"${PIP_RUN[@]}" pip install torch-geometric +echo "✅ ts_gcn environment ready" # ── write hooks into conda envs if required ------------------------------- if [[ $MODE == conda ]]; then diff --git a/devtools/install_pyrdl.sh b/devtools/install_pyrdl.sh index 2b2cc9340c..87f1ccf454 100644 --- a/devtools/install_pyrdl.sh +++ b/devtools/install_pyrdl.sh @@ -49,8 +49,8 @@ fi # Ensure CMake is installed in the environment if ! command -v cmake &> /dev/null; then - echo "Installing CMake..." - "$COMMAND_PKG" install -y cmake + echo "Installing CMake into arc_env..." + "$COMMAND_PKG" install -n arc_env -c conda-forge -y cmake fi # Clone and build RingDecomposerLib diff --git a/devtools/install_torchani.sh b/devtools/install_torchani.sh index d446b3688e..62719757ed 100644 --- a/devtools/install_torchani.sh +++ b/devtools/install_torchani.sh @@ -2,9 +2,10 @@ set -eo pipefail # Enable tracing of each command, but tee it to a logfile +LOGFILE="tani_env_setup.log" exec 3>&1 4>&2 trap 'exec 2>&4 1>&3' EXIT -exec 1> >(tee .log) 2>&1 +exec 1> >(tee "$LOGFILE") 2>&1 set -x echo ">>> Starting TANI environment setup at $(date)" @@ -53,7 +54,7 @@ fi echo ">>> Creating conda env from $ENV_YAML (name=$ENV_NAME)" if ! $COMMAND_PKG env create -n "$ENV_NAME" -f "$ENV_YAML" -v; then echo "❌ Environment creation failed. Dumping last 200 lines of log:" - tail -n 200 tani_env_setup.log + tail -n 200 "$LOGFILE" echo "---- Disk usage at failure ----" df -h . exit 1 From 52dc30687dbef546cfaf812e2ad1c23d8ecb0555 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Mon, 2 Feb 2026 11:48:33 +0200 Subject: [PATCH 3/9] Adds the CREST adapter for TS search Adds the CREST adapter as an option for transition state (TS) guess generation, particularly for H_Abstraction reactions, improving the exploration of potential TS geometries. Includes CREST in the default list of in-core adapters and allows it to be used in heuristics. The heuristic adapter now passes the local path of the files to the h_abstraction method so the geometries are saved to the appropriate folders. Saves the method used to generate the TS guesses in the file name so the geometries generated with CREST are clearly labeled as such. Refactors TS guess method source handling Updates TS guess generation to correctly handle multiple method sources. Specifically, it normalizes the method sources for TS guesses by aggregating method labels. Also, removes the unused `path` argument from the `h_abstraction` function, streamlining the H_Abstraction TS guess generation. --- arc/job/adapters/common.py | 5 +- arc/job/adapters/ts/__init__.py | 1 + arc/job/adapters/ts/heuristics.py | 412 +++++++++++++++++++----------- arc/species/species.py | 1 + 4 files changed, 265 insertions(+), 154 deletions(-) diff --git a/arc/job/adapters/common.py b/arc/job/adapters/common.py index 8fb331522c..0256a300bf 100644 --- a/arc/job/adapters/common.py +++ b/arc/job/adapters/common.py @@ -41,7 +41,7 @@ 'Cyclic_Ether_Formation': ['kinbot'], 'Cyclopentadiene_scission': ['gcn', 'xtb_gsm'], 'Diels_alder_addition': ['kinbot'], - 'H_Abstraction': ['heuristics', 'autotst'], + 'H_Abstraction': ['heuristics', 'autotst', 'crest'], 'carbonyl_based_hydrolysis': ['heuristics'], 'ether_hydrolysis': ['heuristics'], 'nitrile_hydrolysis': ['heuristics'], @@ -77,7 +77,8 @@ adapters_that_do_not_require_a_level_arg = ['xtb', 'torchani'] # Default is "queue", "pipe" will be called whenever needed. So just list 'incore'. -default_incore_adapters = ['autotst', 'gcn', 'heuristics', 'kinbot', 'psi4', 'xtb', 'xtb_gsm', 'torchani', 'openbabel'] +default_incore_adapters = ['autotst', 'crest', 'gcn', 'heuristics', 'kinbot', 'psi4', 'xtb', 'xtb_gsm', 'torchani', + 'openbabel'] def _initialize_adapter(obj: 'JobAdapter', diff --git a/arc/job/adapters/ts/__init__.py b/arc/job/adapters/ts/__init__.py index 29444e0ed4..dab3af7f4d 100644 --- a/arc/job/adapters/ts/__init__.py +++ b/arc/job/adapters/ts/__init__.py @@ -1,4 +1,5 @@ import arc.job.adapters.ts.autotst_ts +import arc.job.adapters.ts.crest import arc.job.adapters.ts.gcn_ts import arc.job.adapters.ts.heuristics import arc.job.adapters.ts.kinbot_ts diff --git a/arc/job/adapters/ts/heuristics.py b/arc/job/adapters/ts/heuristics.py index 9031aa9ec3..3ce88b3d1b 100644 --- a/arc/job/adapters/ts/heuristics.py +++ b/arc/job/adapters/ts/heuristics.py @@ -21,15 +21,29 @@ import os from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from arc.common import (ARC_PATH, almost_equal_coords, get_angle_in_180_range, get_logger, is_angle_linear, - is_xyz_linear, key_by_val, read_yaml_file) +from arc.common import ( + ARC_PATH, + almost_equal_coords, + get_angle_in_180_range, + get_logger, + is_angle_linear, + is_xyz_linear, + key_by_val, + read_yaml_file, +) from arc.family import get_reaction_family_products from arc.job.adapter import JobAdapter from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family from arc.job.factory import register_job_adapter from arc.plotter import save_geo -from arc.species.converter import (compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz, - add_atom_to_xyz_using_internal_coords, sorted_distances_of_atom) +from arc.species.converter import ( + add_atom_to_xyz_using_internal_coords, + compare_zmats, + relocate_zmat_dummy_atoms_to_the_end, + sorted_distances_of_atom, + zmat_from_xyz, + zmat_to_xyz, +) from arc.mapping.engine import map_two_species from arc.molecule.molecule import Molecule from arc.species.species import ARCSpecies, TSGuess, SpeciesError, colliding_atoms @@ -40,9 +54,10 @@ from arc.level import Level from arc.reaction import ARCReaction - -FAMILY_SETS = {'hydrolysis_set_1': ['carbonyl_based_hydrolysis', 'ether_hydrolysis'], - 'hydrolysis_set_2': ['nitrile_hydrolysis']} +FAMILY_SETS = { + 'hydrolysis_set_1': ['carbonyl_based_hydrolysis', 'ether_hydrolysis'], + 'hydrolysis_set_2': ['nitrile_hydrolysis'], +} DIHEDRAL_INCREMENT = 30 @@ -270,24 +285,35 @@ def execute_incore(self): try: tsg = TSGuess(method='Heuristics') tsg.tic() - xyzs, families, indices = hydrolysis(reaction=rxn) + xyzs_raw, families, indices = hydrolysis(reaction=rxn) tsg.tok() - if not xyzs: - logger.warning(f'Heuristics TS search failed to generate any valid TS guesses for {rxn.label}.') + if not xyzs_raw: + logger.warning( + f'Heuristics TS search failed to generate any valid TS guesses for {rxn.label}.' + ) continue + xyzs = [{'xyz': xyz, 'method': 'Heuristics'} for xyz in xyzs_raw] except ValueError: continue - for method_index, xyz in enumerate(xyzs): + for method_index, xyz_entry in enumerate(xyzs): + xyz = xyz_entry.get("xyz") + method_label = xyz_entry.get("method", "Heuristics") + if xyz is None: + continue unique = True for other_tsg in rxn.ts_species.ts_guesses: if almost_equal_coords(xyz, other_tsg.initial_xyz): - if 'heuristics' not in other_tsg.method.lower(): - other_tsg.method += ' and Heuristics' + existing_sources = getattr(other_tsg, "method_sources", None) + if existing_sources is not None: + combined_sources = list(existing_sources) + [method_label] + else: + combined_sources = [other_tsg.method, method_label] + other_tsg.method_sources = TSGuess._normalize_method_sources(combined_sources) unique = False break if unique: - ts_guess = TSGuess(method='Heuristics', + ts_guess = TSGuess(method=method_label, index=len(rxn.ts_species.ts_guesses), method_index=method_index, t0=tsg.t0, @@ -299,15 +325,15 @@ def execute_incore(self): rxn.ts_species.ts_guesses.append(ts_guess) save_geo(xyz=xyz, path=self.local_path, - filename=f'Heuristics_{method_index}', + filename=f'{method_label}_{method_index}', format_='xyz', - comment=f'Heuristics {method_index}, family: {rxn.family}', + comment=f'{method_label} {method_index}, family: {rxn.family}', ) if len(self.reactions) < 5: - successes = len([tsg for tsg in rxn.ts_species.ts_guesses if tsg.success and 'heuristics' in tsg.method]) + successes = [tsg for tsg in rxn.ts_species.ts_guesses if tsg.success] if successes: - logger.info(f'Heuristics successfully found {successes} TS guesses for {rxn.label}.') + logger.info(f'Heuristics successfully found {len(successes)} TS guesses for {rxn.label}.') else: logger.info(f'Heuristics did not find any successful TS guesses for {rxn.label}.') @@ -873,7 +899,7 @@ def h_abstraction(reaction: 'ARCReaction', dihedral_increment (int, optional): The dihedral increment to use for B-H-A-C and D-B-H-C dihedral scans. Returns: List[dict] - Entries are Cartesian coordinates of TS guesses for all reactions. + Entries hold Cartesian coordinates of TS guesses and the generating method label. """ xyz_guesses = list() dihedral_increment = dihedral_increment or DIHEDRAL_INCREMENT @@ -952,7 +978,8 @@ def h_abstraction(reaction: 'ARCReaction', else: # This TS is unique, and has no atom collisions. zmats.append(zmat_guess) - xyz_guesses.append(xyz_guess) + xyz_guesses.append({"xyz": xyz_guess, "method": "Heuristics"}) + return xyz_guesses @@ -987,9 +1014,11 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in is_set_1 = reaction_family in hydrolysis_parameters["family_sets"]["set_1"] is_set_2 = reaction_family in hydrolysis_parameters["family_sets"]["set_2"] - main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices(reaction, - product_dict, - is_set_1) + main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices( + reaction, + product_dict, + is_set_1, + ) base_xyz_indices = { "a": xyz_indices["a"], "b": xyz_indices["b"], @@ -999,9 +1028,19 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in } adjustments_to_try = [False, True] if dihedrals_to_change_num == 1 else [True] for adjust_dihedral in adjustments_to_try: - chosen_xyz_indices, xyz_guesses, zmats_total, n_dihedrals_found = process_chosen_d_indices(initial_xyz, base_xyz_indices, xyz_indices, - hydrolysis_parameters,reaction_family, water, zmats_total, is_set_1, is_set_2, - dihedrals_to_change_num, should_adjust_dihedral=adjust_dihedral) + chosen_xyz_indices, xyz_guesses, zmats_total, n_dihedrals_found = process_chosen_d_indices( + initial_xyz, + base_xyz_indices, + xyz_indices, + hydrolysis_parameters, + reaction_family, + water, + zmats_total, + is_set_1, + is_set_2, + dihedrals_to_change_num, + should_adjust_dihedral=adjust_dihedral, + ) max_dihedrals_found = max(max_dihedrals_found, n_dihedrals_found) if xyz_guesses: xyz_guesses_total.extend(xyz_guesses) @@ -1015,8 +1054,8 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in condition_met = len(xyz_guesses_total) > 0 nitrile_in_inputs = any( - (pd.get("family") == "nitrile_hydrolysis") or - (isinstance(pd.get("family"), list) and "nitrile_hydrolysis" in pd.get("family")) + (pd.get("family") == "nitrile_hydrolysis") + or (isinstance(pd.get("family"), list) and "nitrile_hydrolysis" in pd.get("family")) for pd in product_dicts ) nitrile_already_found = any(fam == "nitrile_hydrolysis" for fam in reaction_families) @@ -1032,9 +1071,11 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in is_set_1 = reaction_family in hydrolysis_parameters["family_sets"]["set_1"] is_set_2 = reaction_family in hydrolysis_parameters["family_sets"]["set_2"] - main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices(reaction, - product_dict, - is_set_1) + main_reactant, water, initial_xyz, xyz_indices = extract_reactant_and_indices( + reaction, + product_dict, + is_set_1, + ) base_xyz_indices = { "a": xyz_indices["a"], "b": xyz_indices["b"], @@ -1048,10 +1089,18 @@ def hydrolysis(reaction: 'ARCReaction') -> Tuple[List[dict], List[dict], List[in break dihedrals_to_change_num += 1 chosen_xyz_indices, xyz_guesses, zmats_total, n_dihedrals_found = process_chosen_d_indices( - initial_xyz, base_xyz_indices, xyz_indices, - hydrolysis_parameters, reaction_family, water, zmats_total, is_set_1, is_set_2, - dihedrals_to_change_num, should_adjust_dihedral=True, - allow_nitrile_dihedrals=True + initial_xyz, + base_xyz_indices, + xyz_indices, + hydrolysis_parameters, + reaction_family, + water, + zmats_total, + is_set_1, + is_set_2, + dihedrals_to_change_num, + should_adjust_dihedral=True, + allow_nitrile_dihedrals=True, ) max_dihedrals_found = max(max_dihedrals_found, n_dihedrals_found) @@ -1083,11 +1132,13 @@ def get_products_and_check_families(reaction: 'ARCReaction') -> Tuple[List[dict] consider_arc_families=True, ) carbonyl_based_present = any( - "carbonyl_based_hydrolysis" in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) + "carbonyl_based_hydrolysis" + in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) for d in product_dicts ) ether_present = any( - "ether_hydrolysis" in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) + "ether_hydrolysis" + in (d.get("family", []) if isinstance(d.get("family"), list) else [d.get("family")]) for d in product_dicts ) @@ -1118,9 +1169,11 @@ def has_carbonyl_based_hydrolysis(reaction_families: List[dict]) -> bool: return any(family == "carbonyl_based_hydrolysis" for family in reaction_families) -def extract_reactant_and_indices(reaction: 'ARCReaction', - product_dict: dict, - is_set_1: bool) -> Tuple[ARCSpecies, ARCSpecies, dict, dict]: +def extract_reactant_and_indices( + reaction: 'ARCReaction', + product_dict: dict, + is_set_1: bool, +) -> Tuple[ARCSpecies, ARCSpecies, dict, dict]: """ Extract the reactant molecules and relevant atomic indices (a,b,e,d,o,h1) for the hydrolysis reaction. @@ -1163,11 +1216,13 @@ def extract_reactant_and_indices(reaction: 'ARCReaction', main_reactant, a_xyz_index, b_xyz_index, - two_neighbors + two_neighbors, ) except ValueError as e: - raise ValueError(f"Failed to determine neighbors by electronegativity for atom {a_xyz_index} " - f"in species {main_reactant.label}: {e}") + raise ValueError( + f"Failed to determine neighbors by electronegativity for atom {a_xyz_index} " + f"in species {main_reactant.label}: {e}" + ) o_index = len(main_reactant.mol.atoms) h1_index = o_index + 1 @@ -1178,25 +1233,26 @@ def extract_reactant_and_indices(reaction: 'ARCReaction', "e": e_xyz_index, "d": d_xyz_indices, "o": o_index, - "h1": h1_index + "h1": h1_index, } return main_reactant, water, initial_xyz, xyz_indices -def process_chosen_d_indices(initial_xyz: dict, - base_xyz_indices: dict, - xyz_indices: dict, - hydrolysis_parameters: dict, - reaction_family: str, - water: 'ARCSpecies', - zmats_total: List[dict], - is_set_1: bool, - is_set_2: bool, - dihedrals_to_change_num: int, - should_adjust_dihedral: bool, - allow_nitrile_dihedrals: bool = False - ) -> Tuple[Dict[str, int], List[Dict[str, Any]], List[Dict[str, Any]], int]: +def process_chosen_d_indices( + initial_xyz: dict, + base_xyz_indices: dict, + xyz_indices: dict, + hydrolysis_parameters: dict, + reaction_family: str, + water: 'ARCSpecies', + zmats_total: List[dict], + is_set_1: bool, + is_set_2: bool, + dihedrals_to_change_num: int, + should_adjust_dihedral: bool, + allow_nitrile_dihedrals: bool = False, +) -> Tuple[Dict[str, int], List[Dict[str, Any]], List[Dict[str, Any]], int]: """ Iterates over the 'd' indices to process TS guess generation. @@ -1214,7 +1270,6 @@ def process_chosen_d_indices(initial_xyz: dict, should_adjust_dihedral (bool): Whether to adjust dihedral angles. allow_nitrile_dihedrals (bool, optional): Force-enable dihedral adjustments for nitriles. Defaults to False. - Returns: Tuple[Dict[str, int], List[Dict[str, Any]], List[Dict[str, Any]]]: - Chosen indices for TS generation. @@ -1224,11 +1279,18 @@ def process_chosen_d_indices(initial_xyz: dict, """ max_dihedrals_found = 0 for d_index in xyz_indices.get("d", []) or [None]: - chosen_xyz_indices = {**base_xyz_indices, "d": d_index} if d_index is not None else {**base_xyz_indices, - "d": None} + chosen_xyz_indices = {**base_xyz_indices, "d": d_index} if d_index is not None else { + **base_xyz_indices, + "d": None, + } current_zmat, zmat_indices = setup_zmat_indices(initial_xyz, chosen_xyz_indices) - matches = get_matching_dihedrals(current_zmat, zmat_indices['a'], zmat_indices['b'], - zmat_indices['e'], zmat_indices['d']) + matches = get_matching_dihedrals( + current_zmat, + zmat_indices['a'], + zmat_indices['b'], + zmat_indices['e'], + zmat_indices['d'], + ) max_dihedrals_found = max(max_dihedrals_found, len(matches)) if should_adjust_dihedral and dihedrals_to_change_num > len(matches): continue @@ -1246,22 +1308,28 @@ def process_chosen_d_indices(initial_xyz: dict, zmat_variants = generate_dihedral_variants(current_zmat, indices, adjustment_factors) if zmat_variants: adjusted_zmats.extend(zmat_variants) - if not adjusted_zmats: - pass - else: + if adjusted_zmats: zmats_to_process = adjusted_zmats ts_guesses_list = [] for zmat_to_process in zmats_to_process: ts_guesses, updated_zmats = process_family_specific_adjustments( - is_set_1, is_set_2, reaction_family, hydrolysis_parameters, - zmat_to_process, water, chosen_xyz_indices, zmats_total) + is_set_1, + is_set_2, + reaction_family, + hydrolysis_parameters, + zmat_to_process, + water, + chosen_xyz_indices, + zmats_total, + ) zmats_total = updated_zmats ts_guesses_list.extend(ts_guesses) if attempted_dihedral_adjustments and not ts_guesses_list and ( - reaction_family != 'nitrile_hydrolysis' or allow_nitrile_dihedrals): - flipped_zmats= [] + reaction_family != 'nitrile_hydrolysis' or allow_nitrile_dihedrals + ): + flipped_zmats = [] adjustment_factors = [15, 25, 35, 45, 55] for indices in indices_list: flipped_variants = generate_dihedral_variants(current_zmat, indices, adjustment_factors, flip=True) @@ -1269,8 +1337,14 @@ def process_chosen_d_indices(initial_xyz: dict, for zmat_to_process in flipped_zmats: ts_guesses, updated_zmats = process_family_specific_adjustments( - is_set_1, is_set_2, reaction_family, hydrolysis_parameters, - zmat_to_process, water, chosen_xyz_indices, zmats_total + is_set_1, + is_set_2, + reaction_family, + hydrolysis_parameters, + zmat_to_process, + water, + chosen_xyz_indices, + zmats_total, ) zmats_total = updated_zmats ts_guesses_list.extend(ts_guesses) @@ -1311,10 +1385,12 @@ def get_main_reactant_and_water_from_hydrolysis_reaction(reaction: 'ARCReaction' return arc_reactant, water -def get_neighbors_by_electronegativity(spc: 'ARCSpecies', - atom_index: int, - exclude_index: int, - two_neighbors: bool = True) -> Tuple[int, List[int]]: +def get_neighbors_by_electronegativity( + spc: 'ARCSpecies', + atom_index: int, + exclude_index: int, + two_neighbors: bool = True, +) -> Tuple[int, List[int]]: """ Retrieve the top two neighbors of a given atom in a species, sorted by their effective electronegativity, excluding a specified neighbor. @@ -1340,8 +1416,11 @@ def get_neighbors_by_electronegativity(spc: 'ARCSpecies', Raises: ValueError: If the atom has no valid neighbors. """ - neighbors = [neighbor for neighbor in spc.mol.atoms[atom_index].edges.keys() - if spc.mol.atoms.index(neighbor) != exclude_index] + neighbors = [ + neighbor + for neighbor in spc.mol.atoms[atom_index].edges.keys() + if spc.mol.atoms.index(neighbor) != exclude_index + ] if not neighbors: raise ValueError(f"Atom at index {atom_index} has no valid neighbors.") @@ -1355,12 +1434,17 @@ def get_neighbor_total_electronegativity(neighbor: 'Atom') -> float: float: The total electronegativity of the neighbor """ return sum( - ELECTRONEGATIVITIES[n.symbol] * neighbor.edges[n].order - for n in neighbor.edges.keys() + ELECTRONEGATIVITIES[n.symbol] * neighbor.edges[n].order for n in neighbor.edges.keys() ) - effective_electronegativities = [(ELECTRONEGATIVITIES[n.symbol] * spc.mol.atoms[atom_index].edges[n].order, - get_neighbor_total_electronegativity(n), n ) for n in neighbors] + effective_electronegativities = [ + ( + ELECTRONEGATIVITIES[n.symbol] * spc.mol.atoms[atom_index].edges[n].order, + get_neighbor_total_electronegativity(n), + n, + ) + for n in neighbors + ] effective_electronegativities.sort(reverse=True, key=lambda x: (x[0], x[1])) sorted_neighbors = [spc.mol.atoms.index(n[2]) for n in effective_electronegativities] most_electronegative = sorted_neighbors[0] @@ -1368,8 +1452,7 @@ def get_neighbor_total_electronegativity(neighbor: 'Atom') -> float: return most_electronegative, remaining_neighbors -def setup_zmat_indices(initial_xyz: dict, - xyz_indices: dict) -> Tuple[dict, dict]: +def setup_zmat_indices(initial_xyz: dict, xyz_indices: dict) -> Tuple[dict, dict]: """ Convert XYZ coordinates to Z-matrix format and set up corresponding indices. @@ -1387,26 +1470,28 @@ def setup_zmat_indices(initial_xyz: dict, 'a': key_by_val(initial_zmat.get('map', {}), xyz_indices['a']), 'b': key_by_val(initial_zmat.get('map', {}), xyz_indices['b']), 'e': key_by_val(initial_zmat.get('map', {}), xyz_indices['e']), - 'd': key_by_val(initial_zmat.get('map', {}), xyz_indices['d']) if xyz_indices['d'] is not None else None + 'd': key_by_val(initial_zmat.get('map', {}), xyz_indices['d']) if xyz_indices['d'] is not None else None, } return initial_zmat, zmat_indices -def generate_dihedral_variants(zmat: dict, - indices: List[int], - adjustment_factors: List[float], - flip: bool = False, - tolerance_degrees: float = 10.0) -> List[dict]: +def generate_dihedral_variants( + zmat: dict, + indices: List[int], + adjustment_factors: List[float], + flip: bool = False, + tolerance_degrees: float = 10.0, +) -> List[dict]: """ - Create variants of a Z-matrix by adjusting dihedral angles using multiple adjustment factors. + Create variants of a Z-matrix by adjusting dihedral angles using multiple adjustment factors. This function creates variants of the Z-matrix using different adjustment factors: - 1. Retrieve the current dihedral value and normalize it to the (-180°, 180°] range. - 2. For each adjustment factor, slightly push the angle away from 0° or ±180° to avoid - unstable, boundary configurations. - 3. If `flip=True`, the same procedure is applied starting from a flipped - (180°-shifted) baseline angle. - 4. Each adjusted or flipped variant is deep-copied to ensure independence. + 1. Retrieve the current dihedral value and normalize it to the (-180°, 180°] range. + 2. For each adjustment factor, slightly push the angle away from 0° or ±180° to avoid + unstable, boundary configurations. + 3. If `flip=True`, the same procedure is applied starting from a flipped + (180°-shifted) baseline angle. + 4. Each adjusted or flipped variant is deep-copied to ensure independence. Args: zmat (dict): The initial Z-matrix. @@ -1414,7 +1499,8 @@ def generate_dihedral_variants(zmat: dict, adjustment_factors (List[float], optional): List of factors to try. flip (bool, optional): Whether to start from a flipped (180°) baseline dihedral angle. Defaults to False. - tolerance_degrees (float, optional): Tolerance (in degrees) for detecting angles near 0° or ±180°. Defaults to 10.0. + tolerance_degrees (float, optional): Tolerance (in degrees) for detecting angles near 0° or ±180°. + Defaults to 10.0. Returns: List[dict]: List of Z-matrix variants with adjusted dihedral angles. @@ -1440,8 +1526,9 @@ def push_up_dihedral(val: float, adj_factor: float) -> float: seed_value = normalized_value if flip: seed_value = get_angle_in_180_range(normalized_value + 180.0) - boundary_like = ((abs(seed_value) < tolerance_degrees) - or (180 - tolerance_degrees <= abs(seed_value) <= 180+tolerance_degrees)) + boundary_like = (abs(seed_value) < tolerance_degrees) or ( + 180 - tolerance_degrees <= abs(seed_value) <= 180 + tolerance_degrees + ) if boundary_like: for factor in adjustment_factors: variant = copy.deepcopy(zmat) @@ -1450,11 +1537,13 @@ def push_up_dihedral(val: float, adj_factor: float) -> float: return variants -def get_matching_dihedrals(zmat: dict, - a: int, - b: int, - e: int, - d: Optional[int]) -> List[List[int]]: +def get_matching_dihedrals( + zmat: dict, + a: int, + b: int, + e: int, + d: Optional[int], +) -> List[List[int]]: """ Retrieve all dihedral angles in the Z-matrix that match the given atom indices. This function scans the Z-matrix for dihedral parameters (keys starting with 'D_' or 'DX_') @@ -1484,11 +1573,13 @@ def get_matching_dihedrals(zmat: dict, return matches -def stretch_ab_bond(initial_zmat: 'dict', - xyz_indices: 'dict', - zmat_indices: 'dict', - hydrolysis_parameters: 'dict', - reaction_family: str) -> None: +def stretch_ab_bond( + initial_zmat: dict, + xyz_indices: dict, + zmat_indices: dict, + hydrolysis_parameters: dict, + reaction_family: str, +) -> None: """ Stretch the bond between atoms a and b in the Z-matrix based on the reaction family parameters. @@ -1519,16 +1610,18 @@ def stretch_ab_bond(initial_zmat: 'dict', stretch_zmat_bond(zmat=initial_zmat, indices=indices, stretch=stretch_degree) -def process_family_specific_adjustments(is_set_1: bool, - is_set_2: bool, - reaction_family: str, - hydrolysis_parameters: dict, - initial_zmat: dict, - water: 'ARCSpecies', - xyz_indices: dict, - zmats_total: List[dict]) -> Tuple[List[dict], List[dict]]: +def process_family_specific_adjustments( + is_set_1: bool, + is_set_2: bool, + reaction_family: str, + hydrolysis_parameters: dict, + initial_zmat: dict, + water: 'ARCSpecies', + xyz_indices: dict, + zmats_total: List[dict], +) -> Tuple[List[dict], List[dict]]: """ - Process specific adjustments for different hydrolysis reaction families if needed, then generate TS guesses . + Process specific adjustments for different hydrolysis reaction families if needed, then generate TS guesses. Args: is_set_1 (bool): Whether the reaction belongs to parameter set 1. @@ -1546,38 +1639,52 @@ def process_family_specific_adjustments(is_set_1: bool, Raises: ValueError: If the reaction family is not supported. """ - a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz= xyz_indices.values() + a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz = xyz_indices.values() r_atoms = [a_xyz, o_xyz, o_xyz] a_atoms = [[b_xyz, a_xyz], [a_xyz, o_xyz], [h1_xyz, o_xyz]] - d_atoms = ([[e_xyz, d_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]] - if d_xyz is not None else - [[e_xyz, b_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]]) + d_atoms = ( + [[e_xyz, d_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]] + if d_xyz is not None + else [[e_xyz, b_xyz, a_xyz], [b_xyz, a_xyz, o_xyz], [a_xyz, h1_xyz, o_xyz]] + ) r_value = hydrolysis_parameters['family_parameters'][str(reaction_family)]['r_value'] a_value = hydrolysis_parameters['family_parameters'][str(reaction_family)]['a_value'] d_values = hydrolysis_parameters['family_parameters'][str(reaction_family)]['d_values'] if is_set_1 or is_set_2: initial_xyz = zmat_to_xyz(initial_zmat) - return generate_hydrolysis_ts_guess(initial_xyz, xyz_indices.values(), water, r_atoms, a_atoms, d_atoms, - r_value, a_value, d_values, zmats_total, is_set_1, - threshold=0.6 if reaction_family == 'nitrile_hydrolysis' else 0.8) + return generate_hydrolysis_ts_guess( + initial_xyz, + xyz_indices.values(), + water, + r_atoms, + a_atoms, + d_atoms, + r_value, + a_value, + d_values, + zmats_total, + is_set_1, + threshold=0.6 if reaction_family == 'nitrile_hydrolysis' else 0.8, + ) else: raise ValueError(f"Family {reaction_family} not supported for hydrolysis TS guess generation.") -def generate_hydrolysis_ts_guess(initial_xyz: dict, - xyz_indices: List[int], - water: 'ARCSpecies', - r_atoms: List[int], - a_atoms: List[List[int]], - d_atoms: List[List[int]], - r_value: List[float], - a_value: List[float], - d_values: List[List[float]], - zmats_total: List[dict], - is_set_1: bool, - threshold: float - ) -> Tuple[List[dict], List[dict]]: +def generate_hydrolysis_ts_guess( + initial_xyz: dict, + xyz_indices: List[int], + water: 'ARCSpecies', + r_atoms: List[int], + a_atoms: List[List[int]], + d_atoms: List[List[int]], + r_value: List[float], + a_value: List[float], + d_values: List[List[float]], + zmats_total: List[dict], + is_set_1: bool, + threshold: float, +) -> Tuple[List[dict], List[dict]]: """ Generate Z-matrices and Cartesian coordinates for transition state (TS) guesses. @@ -1600,7 +1707,7 @@ def generate_hydrolysis_ts_guess(initial_xyz: dict, """ xyz_guesses = [] - for index, d_value in enumerate(d_values): + for d_value in d_values: xyz_guess = copy.deepcopy(initial_xyz) for i in range(3): xyz_guess = add_atom_to_xyz_using_internal_coords( @@ -1611,23 +1718,22 @@ def generate_hydrolysis_ts_guess(initial_xyz: dict, d_indices=d_atoms[i], r_value=r_value[i], a_value=a_value[i], - d_value=d_value[i] + d_value=d_value[i], ) - a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz= xyz_indices - are_valid_bonds=check_ts_bonds(xyz_guess, [o_xyz, h1_xyz, h1_xyz+1, a_xyz, b_xyz]) - colliding=colliding_atoms(xyz_guess, threshold=threshold) + a_xyz, b_xyz, e_xyz, o_xyz, h1_xyz, d_xyz = xyz_indices + are_valid_bonds = check_ts_bonds(xyz_guess, [o_xyz, h1_xyz, h1_xyz + 1, a_xyz, b_xyz]) + colliding = colliding_atoms(xyz_guess, threshold=threshold) duplicate = any(compare_zmats(existing, xyz_to_zmat(xyz_guess)) for existing in zmats_total) if is_set_1: - dihedral_edao=[e_xyz, d_xyz, a_xyz, o_xyz] - dao_is_linear=check_dao_angle(dihedral_edao, xyz_guess) + dihedral_edao = [e_xyz, d_xyz, a_xyz, o_xyz] + dao_is_linear = check_dao_angle(dihedral_edao, xyz_guess) else: - dao_is_linear=False + dao_is_linear = False if xyz_guess is not None and not colliding and not duplicate and are_valid_bonds and not dao_is_linear: xyz_guesses.append(xyz_guess) zmats_total.append(xyz_to_zmat(xyz_guess)) - return xyz_guesses, zmats_total @@ -1644,7 +1750,7 @@ def check_dao_angle(d_indices: List[int], xyz_guess: dict) -> bool: """ angle_indices = [d_indices[1], d_indices[2], d_indices[3]] angle_value = calculate_angle(xyz_guess, angle_indices) - norm_value=(angle_value + 180) % 180 + norm_value = (angle_value + 180) % 180 return (norm_value < 10) or (norm_value > 170) @@ -1659,7 +1765,7 @@ def check_ts_bonds(transition_state_xyz: dict, tested_atom_indices: list) -> boo Returns: bool: Whether the transition state guess has the expected water-related bonds. """ - oxygen_index, h1_index, h2_index, a_index, b_index= tested_atom_indices + oxygen_index, h1_index, h2_index, a_index, b_index = tested_atom_indices oxygen_bonds = sorted_distances_of_atom(transition_state_xyz, oxygen_index) h1_bonds = sorted_distances_of_atom(transition_state_xyz, h1_index) h2_bonds = sorted_distances_of_atom(transition_state_xyz, h2_index) @@ -1678,10 +1784,12 @@ def check_oxygen_bonds(bonds): return rel_error <= 0.1 return False - oxygen_has_valid_bonds = (oxygen_bonds[0][0] == h2_index and check_oxygen_bonds(oxygen_bonds)) - h1_has_valid_bonds = (h1_bonds[0][0] in {oxygen_index, b_index}and h1_bonds[1][0] in {oxygen_index, b_index}) + oxygen_has_valid_bonds = oxygen_bonds[0][0] == h2_index and check_oxygen_bonds(oxygen_bonds) + h1_has_valid_bonds = (h1_bonds[0][0] in {oxygen_index, b_index}) and ( + h1_bonds[1][0] in {oxygen_index, b_index} + ) h2_has_valid_bonds = h2_bonds[0][0] == oxygen_index return oxygen_has_valid_bonds and h1_has_valid_bonds and h2_has_valid_bonds -register_job_adapter('heuristics', HeuristicsAdapter) +register_job_adapter("heuristics", HeuristicsAdapter) diff --git a/arc/species/species.py b/arc/species/species.py index 0fe014d080..9e4947d7fb 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -1556,6 +1556,7 @@ def cluster_tsgs(self): for tsg in self.ts_guesses: for cluster_tsg in cluster_tsgs: if cluster_tsg.almost_equal_tsgs(tsg): + logger.info(f"Similar TSGuesses found: {tsg.index} is similar to {cluster_tsg.index}") cluster_tsg.cluster.append(tsg.index) if tsg.method not in cluster_tsg.method: cluster_tsg.method += f' + {tsg.method}' From 1afcd040d6bd7f92618eb7cb0e615d078b173547 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Mon, 2 Feb 2026 11:48:49 +0200 Subject: [PATCH 4/9] Adds XYZ string reordering function Adds a function to reorder XYZ strings between atom-first and coordinate-first formats, with optional unit conversion. Also introduces a backward compatible wrapper with a deprecation warning. Uses constants for unit conversion factors Replaces hardcoded unit conversion factors with values from the `constants` module. Also, determines atom ordering more robustly by inspecting only the first line. Corrects angstrom to bohr conversion factor The conversion factor between angstrom and bohr was inverted, which led to incorrect geometry conversions. This commit corrects the conversion factor to ensure proper unit conversion when dealing with xyz coordinates. --- arc/species/converter.py | 98 +++++++++++++++++++++++++++++++++++ arc/species/converter_test.py | 19 +++++++ 2 files changed, 117 insertions(+) diff --git a/arc/species/converter.py b/arc/species/converter.py index ce0d484541..9d19a4b13d 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -5,6 +5,7 @@ import math import numpy as np import os +import warnings from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union from ase import Atoms @@ -48,6 +49,103 @@ DIST_PRECISION = 0.01 # Angstrom ANGL_PRECISION = 0.1 # rad (for both bond angle and dihedral) +def reorder_xyz_string(xyz_str: str, + reverse_atoms: bool = False, + units: str = 'angstrom', + convert_to: str = 'angstrom', + project_directory: Optional[str] = None + ) -> str: + """ + Reorder an XYZ string between ``ATOM X Y Z`` and ``X Y Z ATOM`` with optional unit conversion. + + Args: + xyz_str (str): The string xyz format to be converted. + reverse_atoms (bool, optional): Whether to reverse the atoms and coordinates. + units (str, optional): Units of the input coordinates ('angstrom' or 'bohr'). + convert_to (str, optional): The units to convert to (either 'angstrom' or 'bohr'). + project_directory (str, optional): The path to the project directory. + + Raises: + ConverterError: If xyz_str is not a string or does not have four space-separated entries per non-empty line. + + Returns: str + The converted string xyz format. + """ + if isinstance(xyz_str, tuple): + xyz_str = '\n'.join(xyz_str) + if isinstance(xyz_str, list): + xyz_str = '\n'.join(xyz_str) + if not isinstance(xyz_str, str): + raise ConverterError(f'Expected a string input, got {type(xyz_str)}') + if project_directory is not None: + file_path = os.path.join(project_directory, xyz_str) + if os.path.isfile(file_path): + with open(file_path, 'r') as f: + xyz_str = f.read() + + + if units.lower() == 'angstrom' and convert_to.lower() == 'angstrom': + conversion_factor = 1 + elif units.lower() == 'bohr' and convert_to.lower() == 'bohr': + conversion_factor = 1 + elif units.lower() == 'angstrom' and convert_to.lower() == 'bohr': + conversion_factor = constants.angstrom_to_bohr + elif units.lower() == 'bohr' and convert_to.lower() == 'angstrom': + conversion_factor = constants.bohr_to_angstrom + else: + raise ConverterError("Invalid target unit. Choose 'angstrom' or 'bohr'.") + + processed_lines = list() + # Split the string into lines + lxyz = xyz_str.strip().splitlines() + # Determine whether the atom label appears first or last in each line + first_line_tokens = lxyz[0].strip().split() + atom_first = not is_str_float(first_line_tokens[0]) + + for item in lxyz: + parts = item.strip().split() + + if len(parts) != 4: + raise ConverterError(f'xyz_str has an incorrect format, expected 4 elements in each line, ' + f'got "{item}" in:\n{xyz_str}') + if atom_first: + atom, x_str, y_str, z_str = parts + else: + x_str, y_str, z_str, atom = parts + + try: + x = float(x_str) * conversion_factor + y = float(y_str) * conversion_factor + z = float(z_str) * conversion_factor + + except ValueError as e: + raise ConverterError(f'Could not convert {x_str}, {y_str}, or {z_str} to floats.') from e + + if reverse_atoms and atom_first: + formatted_line = f'{x} {y} {z} {atom}' + elif reverse_atoms and not atom_first: + formatted_line = f'{atom} {x} {y} {z}' + elif not reverse_atoms and atom_first: + formatted_line = f'{atom} {x} {y} {z}' + elif not reverse_atoms and not atom_first: + formatted_line = f'{x} {y} {z} {atom}' + + processed_lines.append(formatted_line) + + return '\n'.join(processed_lines) + + +def str_to_str(*args, **kwargs) -> str: + """ + Backwards compatible wrapper for reorder_xyz_string. + """ + warnings.warn( + "str_to_str was renamed to reorder_xyz_string and will be removed in a future ARC release", + DeprecationWarning, + stacklevel=2, + ) + return reorder_xyz_string(*args, **kwargs) + def str_to_xyz(xyz_str: str, project_directory: Optional[str] = None, diff --git a/arc/species/converter_test.py b/arc/species/converter_test.py index 0d1345d94c..2a1265917a 100644 --- a/arc/species/converter_test.py +++ b/arc/species/converter_test.py @@ -700,6 +700,25 @@ def test_str_to_xyz(self): xyz = converter.str_to_xyz(xyz_format) self.assertEqual(xyz, expected_xyz) + def test_reorder_xyz_string_atom_first(self): + """Test reordering atom-first XYZ strings with unit conversion""" + xyz_format = "C 0.0 1.0 2.0\nH -1.0 0.5 0.0" + converted = converter.reorder_xyz_string(xyz_str=xyz_format, reverse_atoms=True, convert_to="bohr") + expected = "0.0 1.8897259886 3.7794519772 C\n-1.8897259886 0.9448629943 0.0 H" + self.assertEqual(converted, expected) + + def test_reorder_xyz_string_coordinate_first(self): + """Test reordering coordinate-first XYZ strings back to atom-last order with conversion""" + xyz_format = "0.0 0.0 0.0 N\n1.0 0.0 0.0 H" + converted = converter.reorder_xyz_string( + xyz_str=xyz_format, + reverse_atoms=False, + units="bohr", + convert_to="angstrom", + ) + expected = "0.0 0.0 0.0 N\n0.529177 0.0 0.0 H" + self.assertEqual(converted, expected) + def test_xyz_to_str(self): """Test converting an ARC xyz format to a string xyz format""" xyz_str1 = converter.xyz_to_str(xyz_dict=self.xyz1['dict']) From 50e0256019f95ce3e5ddb18bcd893ccbf527ad6a Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Mon, 2 Feb 2026 11:49:20 +0200 Subject: [PATCH 5/9] Removes files for server time limit test Removes input files and submit script related to a specific server time limit test case. The `err.txt` file is renamed to `wall_exceeded.txt` and moved to the `trsh` folder. --- arc/job/adapter_test.py | 6 +++ .../calcs/Species/spc1/spc1/input.gjf | 12 ------ .../calcs/Species/spc1/spc1/submit.sub | 37 ------------------- .../spc1/err.txt => trsh/wall_exceeded.txt} | 0 4 files changed, 6 insertions(+), 49 deletions(-) delete mode 100644 arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf delete mode 100644 arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub rename arc/testing/{test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt => trsh/wall_exceeded.txt} (100%) diff --git a/arc/job/adapter_test.py b/arc/job/adapter_test.py index 429bf31110..aa90ad84b4 100644 --- a/arc/job/adapter_test.py +++ b/arc/job/adapter_test.py @@ -208,6 +208,12 @@ def setUpClass(cls): server='server3', testing=True, ) + os.makedirs(cls.job_5.local_path, exist_ok=True) + fixture_path = os.path.join(ARC_PATH, 'arc', 'testing', 'trsh', 'wall_exceeded.txt') + with open(fixture_path, 'r') as f: + log_content = f.read() + with open(os.path.join(cls.job_5.local_path, 'out.txt'), 'w') as f: + f.write(log_content) cls.job_6 = GaussianAdapter(execution_type='queue', job_name='spc1', job_type='opt', diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf deleted file mode 100644 index 36f9d855ac..0000000000 --- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/input.gjf +++ /dev/null @@ -1,12 +0,0 @@ -%chk=check.chk -%mem=14336mb -%NProcShared=8 - -#P opt=(calcfc) cbs-qb3 IOp(2/9=2000) - -spc1 - -0 3 -O 0.00000000 0.00000000 1.00000000 - - diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub b/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub deleted file mode 100644 index 00b840cd67..0000000000 --- a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/submit.sub +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -l -#SBATCH -p normal -#SBATCH -J server1 -#SBATCH -N 1 -#SBATCH -n 8 -#SBATCH --time=120:00:00 -#SBATCH --mem-per-cpu=15770 -#SBATCH -o out.txt -#SBATCH -e err.txt - -export g16root=/home/gridsan/groups/GRPAPI/Software -export PATH=$g16root/g16/:$g16root/gv:$PATH -which g16 - -echo "============================================================" -echo "Job ID : $SLURM_JOB_ID" -echo "Job Name : $SLURM_JOB_NAME" -echo "Starting on : $(date)" -echo "Running on node : $SLURMD_NODENAME" -echo "Current directory : $(pwd)" -echo "============================================================" - -touch initial_time - -GAUSS_SCRDIR=/state/partition1/user//$SLURM_JOB_NAME-$SLURM_JOB_ID -export $GAUSS_SCRDIR -. $g16root/g16/bsd/g16.profile - -mkdir -p $GAUSS_SCRDIR - -g16 < input.gjf > input.log - -rm -rf $GAUSS_SCRDIR - -touch final_time - - \ No newline at end of file diff --git a/arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt b/arc/testing/trsh/wall_exceeded.txt similarity index 100% rename from arc/testing/test_JobAdapter_ServerTimeLimit/calcs/Species/spc1/spc1/err.txt rename to arc/testing/trsh/wall_exceeded.txt From 9e8669ed1cad0df2cc37c076a60e399d4bd13e57 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Mon, 2 Feb 2026 11:49:51 +0200 Subject: [PATCH 6/9] Adds CREST adapter to transition state search Enables the usage of CREST as a TS adapter. The implementation includes: - Adding 'crest' to the list of available TS adapters. - Implementing a function to automatically detect the CREST executable in various locations (standalone builds, conda environments, PATH). - Providing instructions for activating the CREST environment, if necessary. This allows ARC to leverage CREST for enhanced transition state search capabilities. Improves crest executable search logic Refactors the crest executable search to more reliably locate the correct version by normalizing path joining. Updates the documentation for clarity. Makes crest standalone directory configurable Allows users to configure the directory where standalone crest builds are located. This enables greater flexibility in managing crest installations, especially in environments where the default location (/Local/ce_dana) is not suitable. Exports all settings variables Makes all settings variables available for import in other modules, which is needed by the CREST adapter. --- arc/settings/settings.py | 179 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 178 insertions(+), 1 deletion(-) diff --git a/arc/settings/settings.py b/arc/settings/settings.py index ea2c90a9cc..34a8ff2a03 100644 --- a/arc/settings/settings.py +++ b/arc/settings/settings.py @@ -9,6 +9,7 @@ import os import string import sys +import shutil # Users should update the following server dictionary. # Instructions for RSA key generation can be found here: @@ -88,7 +89,7 @@ supported_ess = ['cfour', 'gaussian', 'mockter', 'molpro', 'orca', 'qchem', 'terachem', 'onedmin', 'xtb', 'torchani', 'openbabel'] # TS methods to try when appropriate for a reaction (other than user guesses which are always allowed): -ts_adapters = ['heuristics', 'AutoTST', 'GCN', 'xtb_gsm'] +ts_adapters = ['heuristics', 'AutoTST', 'GCN', 'xtb_gsm', 'crest'] # List here job types to execute by default default_job_types = {'conf_opt': True, # defaults to True if not specified @@ -427,3 +428,179 @@ def add_rmg_db_candidates(prefix: str) -> None: if path and os.path.isdir(path): RMG_DB_PATH = path break + + + +def parse_version(folder_name): + """ + Parses the version from the folder name and returns a tuple for comparison. + Supports versions like: 3.0.2, v212, 2.1, 2 + """ + version_regex = re.compile(r"(?:v?(\d+)(?:\.(\d+))?(?:\.(\d+))?)", re.IGNORECASE) + match = version_regex.search(folder_name) + if not match: + return (0, 0, 0) + + major = int(match.group(1)) if match.group(1) else 0 + minor = int(match.group(2)) if match.group(2) else 0 + patch = int(match.group(3)) if match.group(3) else 0 + + # Example: v212 → (2, 1, 2) + if major >= 100 and match.group(2) is None and match.group(3) is None: + s = str(major).rjust(3, "0") + major = int(s[0]) + minor = int(s[1]) + patch = int(s[2]) + + return (major, minor, patch) + + +def find_highest_version_in_directory(directory, name_contains): + """ + Find the crest executable with the highest version in a directory. + + This function searches the given directory for subdirectories whose names contain + the specified string, interprets their names as versioned folders (e.g., "crest-3.0.2", + "v212", "2.1"), and within each candidate folder looks for an executable named + ``crest``. It returns the full path to the crest executable located in the folder + with the highest parsed version. + + Args: + directory (str): The path to the directory to search. + name_contains (str): A substring that must be present in the folder name. + + Returns: + Optional[str]: The full path to the crest executable with the highest version, + or None if no suitable executable is found. + """ + if not os.path.exists(directory): + return None + + highest_version_path = None + highest_version = () + + for folder in os.listdir(directory): + file_path = os.path.join(directory, folder) + if name_contains.lower() in folder.lower() and os.path.isdir(file_path): + crest_path = os.path.join(file_path, "crest") + if os.path.isfile(crest_path) and os.access(crest_path, os.X_OK): + version = parse_version(folder) + if highest_version == () or version > highest_version: + highest_version = version + highest_version_path = crest_path + return highest_version_path + + +def find_crest_executable(): + """ + Returns (crest_path, env_cmd): + + - crest_path: full path to 'crest' + - env_cmd: shell snippet to activate its environment (may be "") + """ + # Priority 1: standalone builds in a configurable directory (default: /Local/ce_dana) + standalone_dir = os.getenv("ARC_CREST_STANDALONE_DIR", "/Local/ce_dana") + crest_path = find_highest_version_in_directory(standalone_dir, "crest") + if crest_path and os.path.isfile(crest_path) and os.access(crest_path, os.X_OK): + # Standalone binary: no env activation needed + return crest_path, "" + + # Priority 2: Conda/Mamba/Micromamba envs + home = os.path.expanduser("~") + potential_env_paths = [ + os.path.join(home, "anaconda3", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "miniconda3", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "miniforge3", "envs", "crest_env", "bin", "crest"), + os.path.join(home, ".conda", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "mambaforge", "envs", "crest_env", "bin", "crest"), + os.path.join(home, "micromamba", "envs", "crest_env", "bin", "crest"), + ] + + # Also check the current env's bin + current_env_bin = os.path.dirname(sys.executable) + potential_env_paths.insert(0, os.path.join(current_env_bin, "crest")) + + for crest_path in potential_env_paths: + if os.path.isfile(crest_path) and os.access(crest_path, os.X_OK): + # env_root = .../anaconda3 or .../miniforge3 or .../mambaforge etc. + env_marker = os.path.join("envs", "crest_env") + os.path.sep + env_root = crest_path.split(env_marker)[0] + if "micromamba" in crest_path: + env_cmd = ( + f"source {env_root}/etc/profile.d/micromamba.sh && " + f"micromamba activate crest_env" + ) + elif any( + name in env_root + for name in ("anaconda3", "miniconda3", "miniforge3", "mambaforge", ".conda") + ): + env_cmd = ( + f"source {env_root}/etc/profile.d/conda.sh && " + f"conda activate crest_env" + ) + else: + # If for some reason it's just a random prefix with crest in bin + env_cmd = "" + return crest_path, env_cmd + + # Priority 3: PATH + crest_in_path = shutil.which("crest") + if crest_in_path: + return crest_in_path, "" + + return None, None + + +CREST_PATH, CREST_ENV_PATH = find_crest_executable() + +__all__ = [ + "servers", + "global_ess_settings", + "supported_ess", + "ts_adapters", + "default_job_types", + "levels_ess", + "check_status_command", + "submit_command", + "delete_command", + "list_available_nodes_command", + "submit_filenames", + "t_max_format", + "input_filenames", + "output_filenames", + "default_levels_of_theory", + "orca_default_options_dict", + "tani_default_options_dict", + "ob_default_settings", + "xtb_gsm_settings", + "valid_chars", + "rotor_scan_resolution", + "maximum_barrier", + "minimum_barrier", + "inconsistency_az", + "inconsistency_ab", + "max_rotor_trsh", + "preserve_params_in_scan", + "workers_coeff", + "default_job_settings", + "ARC_FAMILIES_PATH", + "home", + "TANI_PYTHON", + "OB_PYTHON", + "TS_GCN_PYTHON", + "AUTOTST_PYTHON", + "ARC_PYTHON", + "RMG_ENV_NAME", + "RMG_PYTHON", + "XTB", + "exported_rmg_path", + "exported_rmg_db_path", + "gw", + "find_executable", + "add_rmg_db_candidates", + "parse_version", + "find_highest_version_in_directory", + "find_crest_executable", + "CREST_PATH", + "CREST_ENV_PATH", +] From e0bc106c27f4c8d54081e623f53502c15a04c800 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Mon, 2 Feb 2026 15:02:43 +0200 Subject: [PATCH 7/9] Introduces method sources list for TSGuess Adds a `method_sources` attribute to the `TSGuess` class to track all methods/sources that generated a particular transition state guess, allowing for more comprehensive provenance information. This improves the clustering of similar TS guesses. Normalizes method sources to a unique, ordered, lowercase list. --- arc/species/species.py | 30 +++++++++++++++++++++++++++--- arc/species/species_test.py | 11 +++++++---- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/arc/species/species.py b/arc/species/species.py index 9e4947d7fb..49b7b18116 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -1558,9 +1558,9 @@ def cluster_tsgs(self): if cluster_tsg.almost_equal_tsgs(tsg): logger.info(f"Similar TSGuesses found: {tsg.index} is similar to {cluster_tsg.index}") cluster_tsg.cluster.append(tsg.index) - if tsg.method not in cluster_tsg.method: - cluster_tsg.method += f' + {tsg.method}' - cluster_tsg.execution_time = f'{cluster_tsg.execution_time} + {tsg.execution_time}' + cluster_tsg.method_sources = TSGuess._normalize_method_sources( + (cluster_tsg.method_sources or []) + (tsg.method_sources or []) + ) break else: tsg.cluster = [tsg.index] @@ -2194,6 +2194,7 @@ class TSGuess(object): initial_xyz (dict): The 3D coordinates guess. opt_xyz (dict): The 3D coordinates after optimization at the ts_guesses level. method (str): The method/source used for the xyz guess. + method_sources (List[str]): All methods/sources that produced an equivalent xyz guess. method_index (int): A subindex, used for cases where a single method generates several guesses. Counts separately for each direction, 'F' and 'R'. method_direction (str): The reaction direction used for generating the guess ('F' or 'R'). @@ -2238,6 +2239,7 @@ def __init__(self, # Not reading from a dictionary self.index = index self.method = method.lower() if method is not None else 'user guess' + self.method_sources = self._normalize_method_sources([self.method]) self.method_index = method_index self.method_direction = method_direction self.constraints = constraints @@ -2294,6 +2296,22 @@ def opt_xyz(self, value): """Allow setting the initial coordinate guess""" self._opt_xyz = check_xyz_dict(value) + @staticmethod + def _normalize_method_sources(method_sources: Optional[List[str]]) -> List[str]: + """ + Normalize method_sources to a unique, ordered, lowercase list. + """ + if not method_sources: + return [] + normalized = [] + for method in method_sources: + if method is None: + continue + method = method.lower() + if method not in normalized: + normalized.append(method) + return normalized + def as_dict(self, for_report: bool = False) -> dict: """ A helper function for dumping this object as a dictionary. @@ -2307,6 +2325,8 @@ def as_dict(self, for_report: bool = False) -> dict: """ ts_dict = dict() ts_dict['method'] = self.method + if self.method_sources: + ts_dict['method_sources'] = list(self.method_sources) ts_dict['method_index'] = self.method_index if self.method_direction is not None: ts_dict['method_direction'] = self.method_direction @@ -2355,6 +2375,10 @@ def from_dict(self, ts_dict: dict): and isinstance(ts_dict['execution_time'], str) \ else ts_dict['execution_time'] if 'execution_time' in ts_dict else None self.method = ts_dict['method'].lower() if 'method' in ts_dict else 'user guess' + if 'method_sources' in ts_dict and isinstance(ts_dict['method_sources'], list): + self.method_sources = self._normalize_method_sources(ts_dict['method_sources']) + else: + self.method_sources = self._normalize_method_sources([self.method]) self.method_index = ts_dict['method_index'] if 'method_index' in ts_dict else None self.method_direction = ts_dict['method_direction'] if 'method_direction' in ts_dict else None self.imaginary_freqs = ts_dict['imaginary_freqs'] if 'imaginary_freqs' in ts_dict else None diff --git a/arc/species/species_test.py b/arc/species/species_test.py index 6cb6cfded9..330b18ec64 100644 --- a/arc/species/species_test.py +++ b/arc/species/species_test.py @@ -2225,8 +2225,9 @@ def test_cluster_tsgs(self): self.assertEqual(len(spc_1.ts_guesses), 4) spc_1.cluster_tsgs() self.assertEqual(len(spc_1.ts_guesses), 2) - self.assertEqual(spc_1.ts_guesses[0].method, 'user guess 0 + kinbot') - self.assertEqual(spc_1.ts_guesses[0].execution_time, '00:00:02 + 00:00:02') + self.assertEqual(spc_1.ts_guesses[0].method, 'user guess 0') + self.assertEqual(spc_1.ts_guesses[0].method_sources, ['user guess 0', 'kinbot']) + self.assertEqual(spc_1.ts_guesses[0].execution_time, '00:00:02') self.assertEqual(spc_1.ts_guesses[0].index, 0) self.assertEqual(spc_1.ts_guesses[1].method, 'gcn') self.assertEqual(spc_1.ts_guesses[1].execution_time, '00:00:02') @@ -2888,6 +2889,7 @@ def test_as_dict(self): """Test TSGuess.as_dict()""" tsg_dict = self.tsg1.as_dict() expected_dict = {'method': 'autotst', + 'method_sources': ['autotst'], 'conformer_index': None, 'family': 'H_Abstraction', 'index': None, @@ -2906,9 +2908,10 @@ def test_from_dict(self): ts_dict = self.tsg1.as_dict() tsg = TSGuess(ts_dict=ts_dict) self.assertEqual(tsg.method, 'autotst') + self.assertEqual(tsg.method_sources, ['autotst']) ts_dict_for_report = self.tsg1.as_dict(for_report=True) - self.assertEqual(list(ts_dict_for_report.keys()), ['method', 'method_index', 'success', 'index', - 'conformer_index', 'initial_xyz', 'opt_xyz']) + self.assertEqual(list(ts_dict_for_report.keys()), ['method', 'method_sources', 'method_index', 'success', + 'index', 'conformer_index', 'initial_xyz', 'opt_xyz']) def test_process_xyz(self): """Test the process_xyz() method""" From fc54bc3990eefdf845c77d6a73b47cca3aeaa7ec Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Mon, 2 Feb 2026 16:30:47 +0200 Subject: [PATCH 8/9] Adds angstrom to bohr conversion factor Adds the angstrom to bohr conversion factor. This conversion factor is required for the CREST adapter. --- arc/constants.pxd | 2 +- arc/constants.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arc/constants.pxd b/arc/constants.pxd index 4a50c72602..9fc3b9127d 100644 --- a/arc/constants.pxd +++ b/arc/constants.pxd @@ -1 +1 @@ -cdef double pi, Na, kB, R, h, hbar, c, e, m_e, m_p, m_n, amu, a0, E_h, F, E_h_kJmol, bohr_to_angstrom +cdef double pi, Na, kB, R, h, hbar, c, e, m_e, m_p, m_n, amu, a0, E_h, F, E_h_kJmol, bohr_to_angstrom, angstrom_to_bohr diff --git a/arc/constants.py b/arc/constants.py index fef8e8f167..82c30212ae 100644 --- a/arc/constants.py +++ b/arc/constants.py @@ -79,6 +79,7 @@ epsilon_0 = 8.8541878128 bohr_to_angstrom = 0.529177 +angstrom_to_bohr = 1.8897259886 # Cython does not automatically place module-level variables into the module # symbol table when in compiled mode, so we must do this manually so that we @@ -102,4 +103,5 @@ 'F': F, 'epsilon_0': epsilon_0, 'bohr_to_angstrom': bohr_to_angstrom, + 'angstrom_to_bohr': angstrom_to_bohr, }) From 6f36b414f0c46fb9f9e702e28d3351dcaa722472 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Tue, 3 Feb 2026 17:37:14 +0200 Subject: [PATCH 9/9] Avoids project name collisions in parallel tests Introduces a function to generate unique project names for parallel test execution using xdist. This prevents collisions when cleaning up project directories, ensuring reliable test execution. --- functional/restart_test.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/functional/restart_test.py b/functional/restart_test.py index fa3407b863..7b8998cfaf 100644 --- a/functional/restart_test.py +++ b/functional/restart_test.py @@ -16,6 +16,14 @@ from arc.main import ARC +def _project_name(base: str) -> str: + """Return a per-xdist-worker project name to avoid parallel cleanup collisions.""" + worker_id = os.environ.get('PYTEST_XDIST_WORKER') + if worker_id: + return f'{base}_{worker_id}' + return base + + class TestRestart(unittest.TestCase): """ Contains unit tests for restarting ARC. @@ -36,7 +44,7 @@ def test_restart_thermo(self): """ restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '1_restart_thermo') restart_path = os.path.join(restart_dir, 'restart.yml') - project = 'arc_project_for_testing_delete_after_usage_restart_thermo' + project = _project_name('arc_project_for_testing_delete_after_usage_restart_thermo') project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs', 'Species'), dirs_exist_ok=True) @@ -55,7 +63,7 @@ def test_restart_thermo(self): break self.assertTrue(thermo_dft_ccsdtf12_bac) - with open(os.path.join(project_directory, 'arc_project_for_testing_delete_after_usage_restart_thermo.info'), 'r') as f: + with open(os.path.join(project_directory, f'{project}.info'), 'r') as f: sts, n2h3, oet, lot, ap = False, False, False, False, False for line in f.readlines(): if 'Considered the following species and TSs:' in line: @@ -66,7 +74,7 @@ def test_restart_thermo(self): oet = True elif 'Levels of theory used:' in line: lot = True - elif 'ARC project arc_project_for_testing_delete_after_usage_restart_thermo' in line: + elif f'ARC project {project}' in line: ap = True self.assertTrue(sts) self.assertTrue(n2h3) @@ -133,7 +141,7 @@ def test_restart_rate_1(self): """Test restarting ARC and attaining a reaction rate coefficient""" restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '2_restart_rate') restart_path = os.path.join(restart_dir, 'restart.yml') - project = 'arc_project_for_testing_delete_after_usage_restart_rate_1' + project = _project_name('arc_project_for_testing_delete_after_usage_restart_rate_1') project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True) @@ -154,7 +162,7 @@ def test_restart_rate_1(self): def test_restart_rate_2(self): """Test restarting ARC and attaining a reaction rate coefficient""" - project = 'arc_project_for_testing_delete_after_usage_restart_rate_2' + project = _project_name('arc_project_for_testing_delete_after_usage_restart_rate_2') project_directory = os.path.join(ARC_PATH, 'Projects', project) base_path = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '5_TS1') restart_path = os.path.join(base_path, 'restart.yml') @@ -183,7 +191,7 @@ def test_restart_bde (self): """Test restarting ARC and attaining a BDE for anilino_radical.""" restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '3_restart_bde') restart_path = os.path.join(restart_dir, 'restart.yml') - project = 'test_restart_bde' + project = _project_name('test_restart_bde') project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True) @@ -192,7 +200,7 @@ def test_restart_bde (self): arc1 = ARC(**input_dict) arc1.execute() - report_path = os.path.join(ARC_PATH, 'Projects', 'test_restart_bde', 'output', 'BDE_report.txt') + report_path = os.path.join(ARC_PATH, 'Projects', project, 'output', 'BDE_report.txt') with open(report_path, 'r') as f: lines = f.readlines() self.assertIn(' BDE report for anilino_radical:\n', lines) @@ -218,10 +226,10 @@ def tearDownClass(cls): A function that is run ONCE after all unit tests in this class. Delete all project directories created during these unit tests """ - projects = ['arc_project_for_testing_delete_after_usage_restart_thermo', - 'arc_project_for_testing_delete_after_usage_restart_rate_1', - 'arc_project_for_testing_delete_after_usage_restart_rate_2', - 'test_restart_bde', + projects = [_project_name('arc_project_for_testing_delete_after_usage_restart_thermo'), + _project_name('arc_project_for_testing_delete_after_usage_restart_rate_1'), + _project_name('arc_project_for_testing_delete_after_usage_restart_rate_2'), + _project_name('test_restart_bde'), ] for project in projects: project_directory = os.path.join(ARC_PATH, 'Projects', project)