diff --git a/.gitignore b/.gitignore index 881fb92..f27c67d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ dist tmp .ipynb_checkpoints /tests/non_rdm_repo/ +.vscode diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 7a910a2..df03fed 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -5,23 +5,19 @@ # Required version: 2 -# Set the version of Python and other tools you might need +# Set the version of Python and other tools that might be needed build: - os: ubuntu-22.04 + os: ubuntu-lts-latest tools: - python: "mambaforge-4.10" + python: "mambaforge-latest" + jobs: + install: + - pip install .[all] --group docs # Build documentation in the docs/ directory with Sphinx sphinx: - configuration: docs/source/conf.py + configuration: docs/source/conf.py -# Optionally declare the Python requirements required to build your docs +# Optionally declare the Python requirements required to build the docs conda: environment: environment.yml - -python: - install: - - method: pip - path: . - extra_requirements: - - docs diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000..92c994f --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,8 @@ +# Contributors + +* [Ronald Jäpel](https://github.com/ronald-jaepel) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Hannah Lanzrath](https://github.com/hannahlanzrath) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Johannes Schmölder](https://github.com/schmoelder) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Katharina Paul](https://github.com/katharinapaul2403) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Daniel Klauß](https://github.com/daklauss) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, Germany) +* [Eric von Lieres](https://github.com/lieres) (Forschungszentrum Juelich GmbH, IBG-1: Biotechnology, Juelich, - Germany) \ No newline at end of file diff --git a/CITATION.bib b/CITATION.bib new file mode 100644 index 0000000..c88f646 --- /dev/null +++ b/CITATION.bib @@ -0,0 +1,2 @@ +% As an open-source project, CADET-RDM relies on the support and recognition from users and researchers to thrive. +% Therefore, we kindly ask that any publications or projects leveraging the capabilities of CADET-RDM acknowledge its creators and their contributions by citing an adequate selection of our publications. \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..81bf3d9 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +cadet@fz-juelich.de. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/README.md b/README.md index 4a0d94d..41023c9 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Welcome to CADET-RDM, a project by the Forschungszentrum Jülich. -This tool aims to help track and version control: +This toolbox aims to help track and version control: - input data - code - software versions @@ -12,4 +12,5 @@ and allow for easy sharing, integration, and reproduction of generated results. ## Documentation -You can read the documentation [here](https://cadet-rdm.readthedocs.io). \ No newline at end of file +The documentation contains a user guide with helpful information on how to install CADET-RDM, how to quickly start working with it and a more detailed explaination of its tools. +The documentation can be found [here](https://cadet-rdm.readthedocs.io). \ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..3fa3663 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,17 @@ +# Security Policy + +## 1. No Network Dependencies + +CADET operates entirely offline and does not require any network access, nor does it handle sensitive information such as personal identifiers. This eliminates common attack vectors like network exploits or data breaches. + +## 2. Code Integrity and Contribution Reviews + +While there are no significant security risks associated with using CADET, we maintain strict controls over our source code. Any external contributions to the project are thoroughly reviewed and must meet our contribution guidelines. All pull requests are checked to ensure they do not introduce vulnerabilities. + +## 3. Dependency Management + +CADET does not rely on third-party libraries that introduce network or verification components. We carefully manage dependencies to ensure they are up-to-date and secure. + +## 4. Reporting Vulnerabilities + +Though CADET does not involve typical security risks, we encourage users and contributors to report any unexpected behavior or potential vulnerabilities they may discover. Please contact us via cadet@fz-juelich.de if you believe you've found a security issue. diff --git a/cadetrdm/cli_integration.py b/cadetrdm/cli_integration.py index fc0abd4..2583b1b 100644 --- a/cadetrdm/cli_integration.py +++ b/cadetrdm/cli_integration.py @@ -13,8 +13,8 @@ def cli(): @cli.command(help="Create an empty CADET-RDM repository or initialize over an existing git repo.") -@click.option('--output_repo_name', default="output", - help='Name of the folder where the tracked output should be stored. Optional. Default: "output".') +@click.option('--output_directory_name', default="output", + help='Name of the directory where the tracked output should be stored. Optional. Default: "output".') @click.option('--gitignore', default=None, help='List of files to be added to the gitignore file. Optional.') @click.option('--gitattributes', default=None, @@ -22,21 +22,21 @@ def cli(): @click.option('--cookiecutter', default=None, help='URL or path to cookiecutter template. Optional.') @click.argument('path_to_repo', required=False) -def init(path_to_repo: str = None, output_repo_name: (str | bool) = "output", gitignore: list = None, +def init(path_to_repo: str = None, output_directory_name: (str | bool) = "output", gitignore: list = None, gitattributes: list = None, cookiecutter: str = None, output_repo_kwargs: dict = None): if path_to_repo is None: path_to_repo = "." from cadetrdm.initialize_repo import initialize_repo as initialize_git_repo_implementation - initialize_git_repo_implementation(path_to_repo, output_repo_name, gitignore, + initialize_git_repo_implementation(path_to_repo, output_directory_name, gitignore, gitattributes, output_repo_kwargs, cookiecutter) -@cli.command(help="Clone a repository into a new d^irectory.") +@cli.command(help="Clone a repository into a new empty directory.") @click.argument('project_url') -@click.argument('dest', required=False) -def clone(project_url, dest: str = None): +@click.argument('destination_path', required=False) +def clone(project_url, destination_path: str = None): from cadetrdm import ProjectRepo - repo = ProjectRepo.clone(url=project_url, to_path=dest) + repo = ProjectRepo.clone(url=project_url, to_path=destination_path) del repo diff --git a/cadetrdm/container/ApptainerAdapter.py b/cadetrdm/container/ApptainerAdapter.py index ae5b3bd..e0a1617 100644 --- a/cadetrdm/container/ApptainerAdapter.py +++ b/cadetrdm/container/ApptainerAdapter.py @@ -61,7 +61,7 @@ # # ssh_location = Path.home() / ".ssh" # if not ssh_location.exists(): -# raise FileNotFoundError("No ssh folder found. Please report this on GitHub/CADET/CADET-RDM") +# raise FileNotFoundError("No ssh directory found. Please report this on GitHub/CADET/CADET-RDM") # # container = self.client.containers.run_yml( # image=image, diff --git a/cadetrdm/container/containerAdapter.py b/cadetrdm/container/containerAdapter.py index 34536a9..b3858f8 100644 --- a/cadetrdm/container/containerAdapter.py +++ b/cadetrdm/container/containerAdapter.py @@ -45,7 +45,7 @@ def _prepare_case_command(case, command, container_options_filename): if command_install is not None: commands.append(command_install) - # pull the study from the URL into a "study" folder + # pull the study from the URL into a "study" repository command_pull = f"rdm clone {case.project_repo.url} study" # cd into the "study" folder command_cd = "cd study" diff --git a/cadetrdm/container/dockerAdapter.py b/cadetrdm/container/dockerAdapter.py index 10e9d8c..6faeea2 100644 --- a/cadetrdm/container/dockerAdapter.py +++ b/cadetrdm/container/dockerAdapter.py @@ -72,7 +72,7 @@ def _run_command(self, full_command, image, mounts=None): ssh_location = Path.home() / ".ssh" if not ssh_location.exists(): - raise FileNotFoundError("No ssh folder found. Please report this on GitHub/CADET/CADET-RDM") + raise FileNotFoundError("No ssh directory found. Please report this on GitHub/CADET/CADET-RDM") volumes = { f"{Path.home()}/.ssh": {'bind': "/root/.ssh_host_os", 'mode': "ro"}, diff --git a/cadetrdm/container/podmanAdapter.py b/cadetrdm/container/podmanAdapter.py index 8927bf9..2a6f479 100644 --- a/cadetrdm/container/podmanAdapter.py +++ b/cadetrdm/container/podmanAdapter.py @@ -69,7 +69,7 @@ def _run_command(self, full_command, image, mounts=None): ssh_location = Path.home() / ".ssh" if not ssh_location.exists(): - raise FileNotFoundError("No ssh folder found. Please report this on GitHub/CADET/CADET-RDM") + raise FileNotFoundError("No ssh directory found. Please report this on GitHub/CADET/CADET-RDM") full_command = full_command.replace('"', "'") @@ -82,7 +82,7 @@ def _run_command(self, full_command, image, mounts=None): podman_command = ( f'podman run ' '--rm ' # remove container after run_yml (to keep space usage low) - f'-v {ssh_location}:/root/.ssh_host_os:ro ' # mount ssh folder for the container to access + f'-v {ssh_location}:/root/.ssh_host_os:ro ' # mount ssh directory for the container to access f'{volume_mounts}' # mount options file f'{image} ' # specify image name f'bash -c "{full_command}"' # run_yml command in bash shell diff --git a/cadetrdm/initialize_repo.py b/cadetrdm/initialize_repo.py index e9852dd..886a52f 100644 --- a/cadetrdm/initialize_repo.py +++ b/cadetrdm/initialize_repo.py @@ -18,22 +18,24 @@ from cadetrdm.io_utils import write_lines_to_file, wait_for_user, init_lfs, test_for_lfs import cadetrdm.templates.dockerfile_template as dockerfile_template -def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = "output", gitignore: list = None, +def initialize_repo(path_to_repo: str | Path, output_directory_name: (str | bool) = "output", gitignore: list = None, gitattributes: list = None, output_repo_kwargs: dict = None, cookiecutter_template: str = None): """ Initialize a git repository at the given path with an optional included output results repository. :param path_to_repo: - Path to main repository. - :param output_folder_name: + Path to main repository. If set to ".", the repository is initialized in the root directory without creating a new directory. If given as a relative path (e.g. 'repository_name'), a new directory with that name is created inside the root directory. If given as an absolute path (e.g. "C:\\User\\name\\project", a new directory is created at the specified location. + :param output_directory_name: Name for the output repository. :param gitignore: List of files to be added to the gitignore file. :param gitattributes: List of lines to be added to the gitattributes file :param output_repo_kwargs: - kwargs to be given to the creation of the output repo initalization function. + kwargs to be given to the creation of the output repository initalization function. Include gitignore, gitattributes, and lfs_filetypes kwargs. + :param cookiecutter_template: + Path to cookiecutter template to include files created by cookiecutter at repository initialization. """ test_for_lfs() @@ -43,8 +45,8 @@ def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = if gitignore is None: gitignore = get_default_gitignore() + ["*.ipynb", "*.h5"] - gitignore.append(f"/{output_folder_name}/") - gitignore.append(f"/{output_folder_name}_cached/") + gitignore.append(f"/{output_directory_name}/") + gitignore.append(f"/{output_directory_name}_cached/") if gitattributes is not None: write_lines_to_file(path=".gitattributes", lines=gitattributes, open_type="a") @@ -74,7 +76,7 @@ def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = "is_project_repo": True, "is_output_repo": False, "project_uuid": project_repo_uuid, "output_uuid": output_repo_uuid, "cadet_rdm_version": cadetrdm.__version__, - "output_remotes": {"output_folder_name": output_folder_name, "output_remotes": {}} + "output_remotes": {"output_directory_name": output_directory_name, "output_remotes": {}} } with open(".cadet-rdm-data.json", "w") as f: json.dump(rdm_data, f, indent=2) @@ -87,7 +89,7 @@ def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = "commit_hash": "6e3c26527999036e9490d2d86251258fe81d46dc" }}, f, indent=2) - initialize_output_repo(output_folder_name, project_repo_uuid=project_repo_uuid, + initialize_output_repo(output_directory_name, project_repo_uuid=project_repo_uuid, output_repo_uuid=output_repo_uuid, **output_repo_kwargs) repo = ProjectRepo(".") @@ -115,12 +117,14 @@ def initialize_repo(path_to_repo: str | Path, output_folder_name: (str | bool) = def init_cookiecutter(cookiecutter_template, path_to_repo): """ - Initialize from cookiecutter template. Because cookiecutter can only create the files in a sub-directory - but cadet-rdm init can be called from within a folder with "path_to_repo" == ".", we copy the files from the - generated_dir folder into the path_to_repo folder afterwards. + Initialize from cookiecutter template. Because cookiecutter can only create the file structure in a sub-directory + but cadet-rdm init can be called from within another directory by specifying the absolute path of the new rdm repository with "path_to_repo" == ".", we copy the files from the + generated_dir directory into the path_to_repo directory afterwards. This means that only the internal contents, the directory layout and files are copied into the path_to_repo. The surrounding top-level directory itself is not carried over. :param cookiecutter_template: + str, Path to cookiecutter template which creates a file structure that is copied into the "output_dir". :param path_to_repo: + str, Path to main repository. If set to ".", the repository will be initialized in the current directory without creating an additional subfolder. """ generated_dir = cookiecutter(cookiecutter_template, output_dir=path_to_repo) file_names = os.listdir(generated_dir) @@ -138,10 +142,10 @@ def init_cookiecutter(cookiecutter_template, path_to_repo): # # repo = ProjectRepo(".") # -# if Path(repo._output_folder).exists(): -# raise RuntimeError(f"Output repo at {repo._output_folder} already exists.") +# if Path(repo._output_directory).exists(): +# raise RuntimeError(f"Output repo at {repo._output_directory} already exists.") # -# initialize_output_repo(repo._output_folder, project_repo_uuid=repo._project_uuid, +# initialize_output_repo(repo._output_directory, project_repo_uuid=repo._project_uuid, # output_repo_uuid=repo._output_uuid, **output_repo_kwargs) # # os.chdir(starting_directory) @@ -154,8 +158,8 @@ def initialize_git(folder="."): try: repo = git.Repo(".") - proceed = wait_for_user('The target directory already contains a git repo.\n' - 'Please commit or stash all changes to the repo before continuing.\n' + proceed = wait_for_user('The target directory already contains a git repository.\n' + 'Please commit or stash all changes to the repository before continuing.\n' 'Proceed?') if not proceed: raise KeyboardInterrupt @@ -174,13 +178,13 @@ def get_default_lfs_filetypes(): return ["*.jpg", "*.png", "*.xlsx", "*.h5", "*.ipynb", "*.pdf", "*.docx", "*.zip", "*.html", "*.csv"] -def initialize_output_repo(output_folder_name, gitignore: list = None, +def initialize_output_repo(output_directory_name, gitignore: list = None, gitattributes: list = None, lfs_filetypes: list = None, project_repo_uuid: str = None, output_repo_uuid: str = None): """ Initialize a git repository at the given path with an optional included output results repository. - :param output_folder_name: + :param output_directory_name: Name for the output repository. :param gitignore: List of files to be added to the gitignore file. @@ -190,8 +194,8 @@ def initialize_output_repo(output_folder_name, gitignore: list = None, List of filetypes to be handled by git lfs. """ starting_directory = os.getcwd() - os.makedirs(output_folder_name, exist_ok=True) - os.chdir(output_folder_name) + os.makedirs(output_directory_name, exist_ok=True) + os.chdir(output_directory_name) if gitignore is None: gitignore = get_default_gitignore() @@ -244,7 +248,7 @@ def create_output_readme(): readme_lines = ["# Output repository for Example Simulation with CADET", "This repository stores the simulation results for RDM-Example. `CADET-RDM` automatically tracks all simulations that are started by running `main.py` from the corresponding project repository.", "", - "Each simulation run creates a dedicated branch in this output repository. The results are saved within the `src` folder of the respective branch. Additionally, a `log.tsv` file in the main branch records metadata for all runs, uniquely linking each output branch to its originating run in the project repository.", + "Each simulation run creates a dedicated branch in this output repository. The results are saved within the `src` directory of the respective branch. Additionally, a `log.tsv` file in the main branch records metadata for all runs, uniquely linking each output branch to its originating run in the project repository.", "", "## Project Repository", "", diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index 595e79f..23292c4 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -555,7 +555,7 @@ def add_remote(self, remote_url, remote_name=None): remote_name = "origin" self._git_repo.create_remote(remote_name, url=remote_url) if self._metadata["is_project_repo"]: - # This folder is a project repo. Use a project repo class to easily access the output repo. + # This directory is a project repository. Use a project repo class to easily access the output repo. output_repo = ProjectRepo(self.path).output_repo if output_repo.active_branch != output_repo.main_branch: @@ -566,7 +566,7 @@ def add_remote(self, remote_url, remote_name=None): output_repo.add("README.md") output_repo.commit("Add remote for project repo", verbosity=0, add_all=False) if self._metadata["is_output_repo"]: - # This folder is an output repo + # This directory is an output repository. project_repo = ProjectRepo(self.path.parent) project_repo.update_output_remotes_json() project_repo.add_list_of_remotes_in_readme_file("Link to Output Repository", self.remote_urls) @@ -587,7 +587,7 @@ def import_remote_repo(self, source_repo_location, source_repo_branch, target_re Branch of the source repo to check out. :param target_repo_location: - Place to store the repo. If None, the external_cache folder is used. + Place to store the repo. If None, the external_cache directory is used. :return: Path to the cloned repository @@ -758,7 +758,7 @@ def add_list_of_remotes_in_readme_file(self, repo_identifier: str, remotes_url_l class ProjectRepo(BaseRepo): - def __init__(self, path=None, output_folder=None, + def __init__(self, path=None, output_directory=None, search_parent_directories=True, suppress_lfs_warning=False, url=None, branch=None, options=None, *args, **kwargs): @@ -768,7 +768,7 @@ def __init__(self, path=None, output_folder=None, :param path: Path to the root of the git repository. - :param output_folder: + :param output_directory: Deprecated: Path to the root of the output repository. :param search_parent_directories: if True, all parent directories will be searched for a valid repo as well. @@ -797,21 +797,21 @@ def __init__(self, path=None, output_folder=None, if not suppress_lfs_warning: test_for_lfs() - if output_folder is not None: - print("Deprecation Warning. Setting the outputfolder manually during repo instantiation is deprecated" + if output_directory is not None: + print("Deprecation Warning. Setting the output directory manually during repo instantiation is deprecated" " and will be removed in a future update.") if not self.data_json_path.exists(): - raise RuntimeError(f"Folder {self.path} does not appear to be a CADET-RDM repository.") + raise RuntimeError(f"Directory {self.path} does not appear to be a CADET-RDM repository.") self._project_uuid = self._metadata["project_uuid"] self._output_uuid = self._metadata["output_uuid"] - self._output_folder = self._metadata["output_remotes"]["output_folder_name"] + self._output_directory = self._metadata["output_remotes"]["output_directory_name"] self.options = options - if not (self.path / self._output_folder).exists(): + if not (self.path / self._output_directory).exists(): print("Output repository was missing, cloning now.") self._clone_output_repo() - self.output_repo = OutputRepo(self.path / self._output_folder) + self.output_repo = OutputRepo(self.path / self._output_directory) if self._metadata["cadet_rdm_version"] != cadetrdm.__version__: self._update_version(self._metadata, cadetrdm.__version__) @@ -888,7 +888,7 @@ def fix_gitattributes_log_tsv(self): def _clone_output_repo(self, multi_options: List[str] = None): metadata = self.load_metadata() output_remotes = metadata["output_remotes"] - output_path = self.path / output_remotes["output_folder_name"] + output_path = self.path / output_remotes["output_directory_name"] ssh_remotes = list(output_remotes["output_remotes"].values()) if len(ssh_remotes) == 0: warnings.warn("No output remotes configured in .cadet-rdm-data.json") @@ -1046,7 +1046,7 @@ def _convert_csv_to_tsv_if_necessary(self): if self.output_log_file.exists(): return - csv_filepath = self.path / self._output_folder / "log.csv" + csv_filepath = self.path / self._output_directory / "log.csv" if not csv_filepath.exists(): # We have just initialized the repo and neither tsv nor csv exist. return @@ -1178,7 +1178,7 @@ def update_output_remotes_json(self): metadata = json.load(file_handle) remotes_dict = {remote.name: str(remote.url) for remote in self.output_repo.remotes} - metadata["output_remotes"] = {"output_folder_name": self._output_folder, "output_remotes": remotes_dict} + metadata["output_remotes"] = {"output_directory_name": self._output_directory, "output_remotes": remotes_dict} with open(self.data_json_path, "w", encoding="utf-8") as file_handle: json.dump(metadata, file_handle, indent=2) @@ -1201,11 +1201,11 @@ def download_file(self, url, file_path): def input_data(self, branch_name: str) -> Path: """ Load previously generated results to iterate upon. Copies entire branch of output repo - to the output_cached / branch_name folder. + to the output_cached / branch_name directory. :param branch_name: Name of the branch of the output repository in which the results are stored. :return: - Absolute path to the newly copied folder. + Absolute path to the newly copied directory. """ cached_branch_path = self.copy_data_to_cache(branch_name) @@ -1231,8 +1231,8 @@ def remove_cached_files(self): """ Delete all previously cached results. """ - if (self.path / (self._output_folder + "_cached")).exists(): - delete_path(self.path / (self._output_folder + "_cached")) + if (self.path / (self._output_directory + "_cached")).exists(): + delete_path(self.path / (self._output_directory + "_cached")) def import_static_data(self, source_path: Path | str, commit_message): """ @@ -1330,32 +1330,32 @@ def _get_new_output_branch(self, force=False): def cache_folder_for_branch(self, branch_name=None): """ - Returns the path to the cache folder for the given branch + Returns the path to the cache directory for the given branch :param branch_name: optional branch name, if None, current branch is used. :return Path: - Path to folder in cache + Path to directory in cache """ branch_name_path = branch_name.replace("/", "_") - # Define the target folder - cache_folder = self.path / f"{self._output_folder}_cached" / str(branch_name_path) + # Define the target directory + cache_folder = self.path / f"{self._output_directory}_cached" / str(branch_name_path) return cache_folder def copy_data_to_cache(self, branch_name=None, target_folder=None): """ - Copy all existing output results into a cached folder and make it read-only. + Copy all existing output results into a cached directory and make it read-only. :param branch_name: optional branch name, if None, current branch is used. :param target_folder: - optional target directory, if None, default cache folder is used. + optional target directory, if None, default cache directory is used. :return Path: - Path to folder in cache + Path to directory in cache """ # Determine the branch name if not provided if branch_name is None: @@ -1370,7 +1370,7 @@ def copy_data_to_cache(self, branch_name=None, target_folder=None): if branch_name not in local_branches: self.output_repo.checkout(branch_name) - # Create the target folder if it doesn't exist + # Create the target directory if it doesn't exist if not target_folder.exists(): target_folder.mkdir(parents=True, exist_ok=True) @@ -1437,7 +1437,7 @@ def _commit_output_data(self, message, output_dict): commit_return = self.output_repo._git.commit("-m", message) self.copy_data_to_cache() self.update_output_main_logs(output_dict) - main_cach_path = self.path / (self._output_folder + "_cached") / self.output_repo.main_branch + main_cach_path = self.path / (self._output_directory + "_cached") / self.output_repo.main_branch if main_cach_path.exists(): delete_path(main_cach_path) self.copy_data_to_cache(self.output_repo.main_branch) diff --git a/docs/Makefile b/docs/Makefile index f18b212..d568ce7 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,7 +1,7 @@ # Minimal makefile for Sphinx documentation # -# You can set these variables from the command line. +# These variables can be set from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = CADET-RDM diff --git a/docs/README.md b/docs/README.md index 71c4d02..8681d57 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,7 +7,7 @@ pip install -e .[docs] ``` from the CADET-RDM root directory. -Then, in the `docs` folder run: +Then, in the `docs` directory run: ``` sphinx-build -b html source build diff --git a/docs/source/bibliography.md b/docs/source/bibliography.md index 814c151..dce5e8c 100644 --- a/docs/source/bibliography.md +++ b/docs/source/bibliography.md @@ -7,6 +7,6 @@ ``` ```{bibliography} ./references.bib +:all: :style: unsrt -``` - +``` \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 8e5a1e4..6249797 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -30,7 +30,7 @@ # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# extensions coming with Sphinx (named 'sphinx.ext.*') or custom # ones. # Extensions @@ -43,7 +43,7 @@ '.rst': 'restructuredtext', '.ipynb': 'myst-nb', '.myst': 'myst-nb', - '.md': 'myst-nb', + '.md': 'myst-nb' } ## Numpydoc @@ -74,12 +74,15 @@ ## Viewcode extensions.append("sphinx.ext.viewcode") +## View figures +extensions.append("sphinx_subfigure") + ## Copy Button extensions.append("sphinx_copybutton") ## BibTeX extensions.append("sphinxcontrib.bibtex") -bibtex_bibfiles = ['references.bib'] +bibtex_bibfiles = ["references.bib"] # -- Internationalization ------------------------------------------------ # specifying the natural language populates some key tags @@ -94,6 +97,9 @@ sitemap_locales = [None] sitemap_url_scheme = "{link}" +### Figure +extensions.append("sphinx_subfigure") + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -107,7 +113,7 @@ myst_enable_extensions = [ "dollarmath", "amsmath", - "colon_fence", + "colon_fence" ] # -- Options for HTML output ------------------------------------------------- diff --git a/docs/source/index.md b/docs/source/index.md index be0fd70..e5d9ae9 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,4 +1,4 @@ -```{include} ../../README.md +```{include} ./user_guide/introduction.md ``` ```{toctree} @@ -6,9 +6,10 @@ :caption: User guide :hidden: +user_guide/introduction user_guide/installation user_guide/getting-started -user_guide/CLI-interface +user_guide/command-line-interface user_guide/python-interface user_guide/jupyter-interface ``` diff --git a/docs/source/user_guide/CLI-interface.md b/docs/source/user_guide/CLI-interface.md deleted file mode 100644 index e27a1b8..0000000 --- a/docs/source/user_guide/CLI-interface.md +++ /dev/null @@ -1,115 +0,0 @@ - -# CLI Interface - -## Initialize Project Repository - -Create a new project repository or convert an existing repository into a CADET-RDM repo: - -```bash -rdm init -``` - - -The `output_folder_name` can be given optionally. It defaults to `output`. - - -## Executing scripts - -You can execute python files or arbitray commands using the CLI: - -```bash -cd path/to/your/project -rdm run_yml python "commit message for the results" -rdm run_yml command "command as it would be run" "commit message for the results" -``` - -For the run-command option, the command must be given in quotes, so: - -```bash -rdm run_yml command "python example_file.py" "commit message for the results" -``` - -## Re-using results from previous iterations - -Each result stored with CADET-RDM is given a unique branch name, formatted as: -`__"from"__` - -With this branch name, previously generated data can be loaded in as input data for -further calculations. The following command will copy the contents of the `branch_name` branch to the -cache folder at `project_root/output_cached/branch_name`. - -```bash -rdm data cache branch_name -``` - - -## Using results from another repository - -You can load in results from another repository to use in your project using the CLI: - -```bash -cd path/to/your/project -rdm data import -rdm data import --target_repo_location -``` - -This will store the URL, branch_name and location in the .cadet-rdm-cache.json file, like this: - -```json -{ - "__example/path/to/repo__": { - "source_repo_location": "git@jugit.fz-juelich.de:IBG-1/ModSim/cadet/agile_cadet_rdm_presentation_output.git", - "branch_name": "output_from_master_3910c84_2023-10-25_00-17-23", - "commit_hash": "6e3c26527999036e9490d2d86251258fe81d46dc" - } -} -``` - -You can use this file to load the remote repositories based on the cache.json with - -```bash -rdm data fetch -``` - -## Cloning from remote - -You should use `cadet-rdm clone` instead of `git clone` to clone the repo to a new location. - -```bash -rdm clone -``` - - -## Sharing Results - -To share your project code and results with others, you need to create remote repositories on e.g. -[GitHub](https://github.com/) or GitLab. You need to create a remote for both the _project_ repo and the -_results_ repo. - -Once created, the remotes need to be added to the local repositories. - -```bash -rdm remote add git@:.git -cd output -rdm remote add git@:_output.git -``` - -Once remotes are configured, you can push all changes to the project repo and the results repos with the -command - -```bash -rdm push -``` - -## Migrating a repository - -If you want to migrate a repository to another remote, the easiest way to do that at the moment is to create the remote -repositories on GitHub or GitLab and change the `origin` URL for the project and output repositories with: - -```bash -rdm remote set-url origin git@:.git -cd output -rdm remote set-url origin git@:_output.git -cd .. -rdm push -``` diff --git a/docs/source/user_guide/command-line-interface.md b/docs/source/user_guide/command-line-interface.md new file mode 100644 index 0000000..05808bc --- /dev/null +++ b/docs/source/user_guide/command-line-interface.md @@ -0,0 +1,107 @@ + +# Command line interface (CLI) + +## Initialize Project Repository + +Create a new project repository or convert an existing repository into a CADET-RDM repository: + +```bash +rdm init +``` +- If no `` is provided, the repository is initialized in the root directory without creating a new directory. +- If `` is given as a relative path (e.g. "repository_name"), a new directory with that name is created inside the root directory. +- If `` is given as an absolute path (e.g. C:\Users\me\projects\myrepo), a new directory is created at the specified location. + +The `output_directory_name` can be given optionally. It defaults to `output`. + + +## Executing scripts + +Python files or arbitray commands can be executed using the CLI: + +```bash +cd path/to/project_repository +rdm run_yml python "commit message for the results" +rdm run_yml command "command as it would be run" "commit message for the results" +``` + +For the run-command option, the command must be given in quotes, so: + +```bash +rdm run_yml command "python example_file.py" "commit message for the results" +``` + +## Re-using results from previous iterations + +Each result stored with CADET-RDM is given a unique branch name within the output directory, formatted as: +`__` + +With this branch name, previously generated data can be loaded in as input data for +further calculations. The following command will copy the contents of the `branch_name` branch to the +cache directory at `project_root/output_cached/branch_name`. + +```bash +rdm data cache branch_name +``` + +## Using results from another repository + +The Project repository URL, branch_name and location of results can be stored in the .cadet-rdm-cache.json file, like this: + +```json +{ + "__example/path/to/repo__": { + "source_repo_location": "git@jugit.fz-juelich.de:IBG-1/ModSim/cadet/agile_cadet_rdm_presentation_output.git", + "branch_name": "output_from_master_3910c84_2023-10-25_00-17-23", + "commit_hash": "6e3c26527999036e9490d2d86251258fe81d46dc" + } +} +``` + +This cache.json file can be used to load remote repositories. + +```bash +rdm data fetch +``` + +## Cloning rdm repositories + +The command `rdm clone` should be used instead of `git clone` to clone an existing rdm repository to a new location. The destination directory must be empty. + +```bash +rdm clone +``` + + +## Sharing Results + +To share the project code and results (`output`) with others, remote repositories have to be configured on e.g. +[GitHub](https://github.com/) or GitLab. Remotes for both the _project_ repository and the +_output_ repository have to be created. + +Once created, the remotes need to be added to the local repositories. + +```bash +rdm remote add git@:.git +cd output +rdm remote add git@:_output.git +``` + +Once remotes are configured, all changes to the project repository and the output repository can be pushed with the following command from within the project repository: + +```bash +rdm push +``` + +## Migrating a repository + +The easiest way to migrate a repository to another remote, is to create the remote +repositories on GitHub or GitLab and change the `origin` URL for the project and output repositories with: + +```bash +rdm remote set-url origin git@:.git +cd output +rdm remote set-url origin git@:_output.git +cd .. +rdm push +``` diff --git a/docs/source/user_guide/figures/RDM-output-commits.png b/docs/source/user_guide/figures/RDM-output-commits.png new file mode 100644 index 0000000..a8b8c1d Binary files /dev/null and b/docs/source/user_guide/figures/RDM-output-commits.png differ diff --git a/docs/source/user_guide/figures/RDM-project-commits.png b/docs/source/user_guide/figures/RDM-project-commits.png new file mode 100644 index 0000000..69074d7 Binary files /dev/null and b/docs/source/user_guide/figures/RDM-project-commits.png differ diff --git a/docs/source/user_guide/figures/RDM-structure.png b/docs/source/user_guide/figures/RDM-structure.png new file mode 100644 index 0000000..5285750 Binary files /dev/null and b/docs/source/user_guide/figures/RDM-structure.png differ diff --git a/docs/source/user_guide/getting-started.md b/docs/source/user_guide/getting-started.md index 5be80c9..33db9c1 100644 --- a/docs/source/user_guide/getting-started.md +++ b/docs/source/user_guide/getting-started.md @@ -1,10 +1,13 @@ # Getting started -## Initialize Project Repository +A CADET-RDM repository typically consists of a `project repository` which contains the code and an `output repository` which contains the results generated by the project code. By initializing an RDM repository, the necessary file structure for tracking and version-controlling the project and the output will be created. -Create a new project repository or convert an existing repository into a CADET-RDM repo: +The following paragraphs contain a short guide to quickly start working with CADET-RDM. Almost all tools can be used either by running specific command line prompts or by executing code in python. +## Initialize RDM-Repositories + +Start using CADET-RDM by creating a new project repository or converting an existing repository into a CADET-RDM repository. ```bash rdm init ``` @@ -16,16 +19,53 @@ from cadetrdm import initialize_repo initialize_repo(path_to_repo) ``` +- If no `` is provided, the repository is initialized in the root directory without creating a new directory. +- If `` is given as a relative path (e.g. "repository_name"), a new directory with that name is created inside the root directory. +- If `` is given as an absolute path (e.g. C:\Users\me\projects\myrepo), a new directory is created at the specified location. + +The initialization of the RDM repository will generate a **project repository** containing the following contents, should they not have previously existed within the directory: + +```bash +.git +output +.cadet-rdm-cache.json +.cadet-rdm-data.json +.gitignore +Dockerfile +environment.yml +jupytext.yml +README.md +``` + +The **output repository** for storing and tracking the results of the project is also generated at initialization. It is created as a directory inside of the project repository. Both the project repository and the output repository are their own separate git repositories. +The `output_directory_name` can be specified optionally. It defaults to `output`. The output directory is initialized with the following contents: + +```bash +.git +.cadet-rdm-data.json +.gitattributes +.gitignore +README.md +``` +With every run of the project code, a new output branch with the results of the run is created. At the same time, the main branch of the output repository tracks these runs in the `run_history` directory. Every branch has its own sub-directory in the run_history directory. Within this branch, the following files are stored for every run: -The `output_folder_name` can be given optionally. It defaults to `output`. + conda_environment.yml + conda_independent_environment.yml + metadata.json + pip_independent_requirements.txt + pip_requirements.txt + +The information stored in these files can be used to reproduce the specific output created by running the project code with these configurations. ## Cookiecutter support [Cookiecutter](https://github.com/cookiecutter/cookiecutter) can be used to set a template as a starting position for the repository initialization. +If `` is given as an absolute or relative path, it overwrites the name which may be given to the directory in the cookiecutter prompt. If no `` is provided, the repository is initialized in the root directory. No new directory is created, even if the cookiecutter template would normally do so. +From the command line ```bash -rdm init --cookiecutter template_url +rdm init --cookiecutter ``` or from python @@ -37,27 +77,27 @@ initialize_repo(path_to_repo, cookiecutter_template="template_url") ``` -## Creating and adding remotes +## Creating remote repositories automatically -You can create remotes for both the project and the output repository with one command, using the GitLab or GitHub API. +Remotes for both the project and the output repository can be created with one command, using the GitLab or GitHub API. -You need to create a -[GitLab Personal Access Token (PAT)](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html) or [GitHub PAT](https://github.com/settings/tokens?type=beta) with api access rights -and store it in the Python `keyring` using an interactive Python session: +A +[GitLab Personal Access Token (PAT)](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html) or [GitHub PAT](https://github.com/settings/tokens?type=beta) with API access rights has to be created +and stored in the Python `keyring` using an interactive Python session: ```python import keyring -keyring.set_password("e.g. https://jugit.fz-juelich.de/", username, token) +keyring.set_password("e.g. https://jugit.fz-juelich.de/", "username", "token") ``` or in a command line -````commandline -keyring set "e.g. https://jugit.fz-juelich.de/" username +````bash +keyring set "e.g. https://jugit.fz-juelich.de/" ```` -Then you can run: +Then the following code is run to automatically create remotes for both the project repository and the output repository at the chosen URL: ```python from cadetrdm import ProjectRepo @@ -75,29 +115,50 @@ repo.create_remotes( or in a command line ```bash -rdm remote create url namespace name username +rdm remote create +``` +e.g. +```bash rdm remote create https://jugit.fz-juelich.de/ r.jaepel API_test_project r.jaepel ``` +The name of the created output remote is derived from the respective project repository name by appending `_output`, "e.g. API_test_project_output". -## Extending GIT-LFS scope -Several common datatypes are included in GIT-LFS by default. These currently are -`"*.jpg", "*.png", "*.xlsx", "*.h5", "*.ipynb", "*.pdf", "*.docx", "*.zip", "*.html"` +## Adding remote repositories -You can add datatypes you require by running: +To share the project code and results (*output*) with others, remote repositories have to be configured on e.g. +[GitHub](https://github.com/) or GitLab. Remotes for both the `project_repository` and the +`output_repository` have to be created. -````python -from cadetrdm import ProjectRepo +Once created, the remotes need to be added to the local repositories. Run the following commands in both repositories with their respective `remote_url`s. -repo = ProjectRepo() +```bash +rdm remote add +``` +e.g. -repo.output_repo.add_filetype_to_lfs("*.npy") -```` +```bash +cd project +rdm remote add git@:.git +cd output +rdm remote add git@:_output.git +``` + +## Staging, committing and pushing changes to the remote repositories +To check that all remotes are set correctly, the metadata is consistent and stage all changes within the project and output repository, run the following command in the project repository: -or from within the output folder in a command line: +```bash +rdm check +``` +To commit the staged changes run the following command in the project repository: ```bash -rdm lfs add *.npy +rdm commit -m ``` +To then push all commits from project and output repository, run the following command in the project repository: + +```bash +rdm push +``` \ No newline at end of file diff --git a/docs/source/user_guide/installation.md b/docs/source/user_guide/installation.md index 010ad5f..98c4b18 100644 --- a/docs/source/user_guide/installation.md +++ b/docs/source/user_guide/installation.md @@ -26,7 +26,7 @@ dependencies: and then run -```commandline +```bash mamba env create -f rdm_environment.yml ``` @@ -37,6 +37,50 @@ python==3.11 cadet-rdm>=0.0.15 ``` -```commandline +```bash pip install -r rdm_requirements.txt ``` + + +## Git-LFS +Running `cadet-rdm` requires [**Git LFS**](https://git-lfs.com/), which needs to be installed separately. + +* **Ubuntu/Debian**: + ```bash + sudo apt-get install git-lfs + git lfs install + ``` + + * **macOS** (with Homebrew): + + ```bash + brew install git-lfs + git lfs install + ``` + + * **Windows**: + Download and install from [https://git-lfs.com](https://git-lfs.com) + + + +## Extending GIT-LFS scope + +Several common datatypes are included in GIT-LFS by default. These currently are +`"*.jpg", "*.png", "*.xlsx", "*.h5", "*.ipynb", "*.pdf", "*.docx", "*.zip", "*.html"` + +Additional datatypes can be added if required by running: + +````python +from cadetrdm import ProjectRepo + +repo = ProjectRepo() + +repo.output_repo.add_filetype_to_lfs("*.npy") +```` + + +or from within the output directory in a command line: + +```bash +rdm lfs add *.npy +``` \ No newline at end of file diff --git a/docs/source/user_guide/introduction.md b/docs/source/user_guide/introduction.md new file mode 100644 index 0000000..d867dc3 --- /dev/null +++ b/docs/source/user_guide/introduction.md @@ -0,0 +1,68 @@ +# Introduction + +Welcome to CADET-Research Data Management, a project by the Forschungszentrum Jülich. + +This toolbox aims to help track and version control: + +* input data + +* code + +* software versions + +* configurations + +* metadata + +* output data + +and allow for easy sharing, integration, and reproduction of the generated results. + + +The tools of CADET-RDM can be applied to any project with the structure of an RDM project. + + +## RDM repository architecture + +CADET-RDM projects are structured into two distinct repositories. + +1. The **project repository** that contains the input data, code, software and configurations to execute the computations. The output repository is a directory within the project repository. +2. The **output repository** that contains the results of these computations, including all calculations, models and figures created by running the project code. Also stored in the output directory is the metadata used to create the specific result. This includes e.g. the software versions and requirements. + +:::{figure} figures/RDM-structure.png +:width: 295 +:alt: Project commits + +Caption for the first figure. +::: +::: + +Both the **project** and the **output** repository are their own git repositories. The commit architecture of CADET-RDM allows for easy tracking and reproducing of results and their respective project code. + +## RDM commit architecture + +Every run of the project code creates a new output branch (*result branch*) in the **output directory**. The repository on this new branch uniquely contains the files created by the execution of the project code.
At the same time, for every run of the project code the `run_history` directory on the master branch of the output repository is updated. This directory is unique to the master branch and contains the metadata and software specifications for every branch in the output repository. This directory also links the results in the output branch to the corresponding commit in the project repository used to create them. For transparency and easy accessibility, the most important specifications for every result branch are also documented in the `log.tsv` on the master branch of the output repository. + +```{eval-rst} +.. subfigure:: AB + :gap: 8px + :subcaptions: below + + .. image:: figures/RDM-project-commits.png + :alt: Descriptive Text 1 + :width: 300px + + .. image:: figures/RDM-output-commits.png + :alt: Descriptive Text 2 + :width: 420px + + Caption for both images. + + +Because of this simultanious log of the metadata and the environment used to create a specific output, results can be reproduced easily. + +## User function + +The tools of CADET-RDM can be used through the command line interface (CLI) or by executing script in python or in jupyter. + +The following documentation contains an installation guide, a user guide to quickly start using CADET-RDM and more detailed descriptions on using the command line interface, python interface and jupyter interface. \ No newline at end of file diff --git a/docs/source/user_guide/jupyter-interface.md b/docs/source/user_guide/jupyter-interface.md index 4ff80d4..6d44fcf 100644 --- a/docs/source/user_guide/jupyter-interface.md +++ b/docs/source/user_guide/jupyter-interface.md @@ -17,8 +17,8 @@ into a `.py` files, with the markdown cells included as block comments. All `.ip version control through the `.gitignore` file and only changes in the `.py` files are tracked. The `.py` files are automatically created and updated whenever a `.ipynb` file is saved. -Please ensure, that `juyptext` is working for you and that a `.py` file is created after saving your notebook, otherwise -your code will not be version-controlled. +Please ensure, that `juyptext` is working and that a `.py` file is created after saving the notebook, otherwise +the code will not be version-controlled. ### Reproducibility @@ -28,11 +28,11 @@ all previous outputs are cleared and all cells are executed sequentially from top to bottom and then committed to the output repository. To maintain the link between Markdown annotation, code, and inline graphs, the final notebook is also saved as -a `.html` webpage into the output folder for future inspection. +a `.html` webpage into the output directory for future inspection. ## Tracking Results -To use `CADET-RDM` from within an `.ipynb` file, please include this at the top of your file. +To use `CADET-RDM` from within an `.ipynb` file, please include this at the top of the file. ```python from cadetrdm.repositories import JupyterInterfaceRepo @@ -40,7 +40,7 @@ from cadetrdm.repositories import JupyterInterfaceRepo repo = JupyterInterfaceRepo() ``` -Then, at the end of your file, run: +Then, at the end of the file, run: ```python repo.commit_nb_output( "path-to-the-current-notebook.ipynb", @@ -51,9 +51,9 @@ repo.commit_nb_output( This will re-run the `.ipynb` file from the start, save a html version of the completed notebook into the output repo and commit all changes to the output repo. -## Committing changes to your code +## Committing changes to the code -You can commit all current changes to your code directly from Jupyter by running +All current changes to the code can be committed directly from Jupyter by running: ```python from cadetrdm.repositories import JupyterInterfaceRepo diff --git a/docs/source/user_guide/python-interface.md b/docs/source/user_guide/python-interface.md index 48860af..2b00fb3 100644 --- a/docs/source/user_guide/python-interface.md +++ b/docs/source/user_guide/python-interface.md @@ -7,7 +7,7 @@ from cadetrdm import ProjectRepo """ -Your imports and function declarations +Imports and function declarations e.g. generate_data(), write_data_to_file(), analyse_data() and plot_analysis_results() """ @@ -15,25 +15,25 @@ if __name__ == '__main__': # Instantiate CADET-RDM ProjectRepo handler repo = ProjectRepo() - # If you've made changes to the code, commit the changes + # Commit all changes to the code repo.commit("Add code to generate and analyse example data") - # Everything written to the output_folder within this context manager gets tracked - # The method repo.output_data() generates full paths to within your output_folder + # Everything written to the output_directory within this context manager gets tracked + # The method repo.output_data() generates full paths to within the output_directory with repo.track_results(results_commit_message="Generate and analyse example data"): data = generate_data() - write_data_to_file(data, output_folder=repo.output_folder) + write_data_to_file(data, output_directory=repo.output_directory) analysis_results = analyse_data(data) - plot_analysis_results(analysis_results, figure_path=repo.output_folder / "analysis" / "regression.png") + plot_analysis_results(analysis_results, figure_path=repo.output_directory / "analysis" / "regression.png") ``` ## Sharing Results -To share your project code and results with others, you need to create remote repositories on e.g. -[GitHub](https://github.com/) or GitLab. You need to create a remote for both the _project_ repo and the -_results_ repo. +To share the project code and results (`output`) with others, remote repositories have to be configured on e.g. +[GitHub](https://github.com/) or GitLab. Remotes for both the _project_ repository and the +_output_ repository have to be created. Once created, the remotes need to be added to the local repositories. @@ -43,8 +43,7 @@ repo.add_remote("git@:.git") repo.output_repo.add_remote("git@:_output.git") ``` -Once remotes are configured, you can push all changes to the project repo and the results repos with the -command +Once remotes are configured, all changes to the project repository and the output repository can be pushed with the following command from within the project repository: ```python # push all changes to the Project and Output repositories with one command: @@ -54,7 +53,7 @@ repo.push() ## Re-using results from previous iterations Each result stored with CADET-RDM is given a unique branch name, formatted as: -`__"from"__` +`__` With this branch name, previously generated data can be loaded in as input data for further calculations. @@ -76,12 +75,12 @@ cached_folder_path = repo.input_data(branch_name=branch_name) ## Using results from another repository -You can load in results from another repository to use in your project using the CLI: +The results from another repository can be to used by loading them into the target project with: ```python repo.import_remote_repo(source_repo_location="", source_repo_branch="") repo.import_remote_repo(source_repo_location="", source_repo_branch="", - target_repo_location="") + target_repo_location="") ``` This will store the URL, branch_name and location in the .cadet-rdm-cache.json file, like this: @@ -96,7 +95,7 @@ This will store the URL, branch_name and location in the .cadet-rdm-cache.json f } ``` -You can use this file to load the remote repositories based on the cache.json with +This file can be used to load remote repositories based on the cache.json with ```python repo.fill_data_from_cadet_rdm_json() @@ -104,10 +103,10 @@ repo.fill_data_from_cadet_rdm_json() ## Cloning from remote -You should use `cadetrdm.ProjectRepo.clone()` instead of `git clone` to clone the repo to a new location. +The method `cadetrdm.ProjectRepo.clone()` should be used instead of `git clone` to clone an rdm repository to a new location. ```python from cadetrdm import ProjectRepo -ProjectRepo.clone("") +ProjectRepo.clone(", ") ``` diff --git a/pyproject.toml b/pyproject.toml index 6dc452c..2f8ae52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,5 +86,13 @@ markers = [ "container: marks tests as using containerization interfaces such as Docker or Apptainer" ] +[tool.setuptools] +package-dir = {"" = "."} + +[tool.setuptools.packages.find] +include = ["cadetrdm*"] +exclude = ["tmp*", "batch_repos*"] + + [tool.setuptools.dynamic] version = { attr = "cadetrdm.__version__" } diff --git a/tests/case.yml b/tests/case.yml index 7f597b9..3636853 100644 --- a/tests/case.yml +++ b/tests/case.yml @@ -1,6 +1,6 @@ ProjectRepo: path: tmp/template - url: git@github.com:ronald-jaepel/rdm_testing_template.git + url: git@github.com:cadet/RDM-Testing-Template.git branch: main Options: commit_message: Trying out new things diff --git a/tests/test_container_docker.py b/tests/test_container_docker.py index 484ca51..8624200 100644 --- a/tests/test_container_docker.py +++ b/tests/test_container_docker.py @@ -12,7 +12,7 @@ def test_run_dockered(): rdm_example = ProjectRepo( path=WORK_DIR / 'template', - url="git@github.com:ronald-jaepel/rdm_testing_template.git", + url="git@github.com:cadet/RDM-Testing-Template.git", suppress_lfs_warning=True ) diff --git a/tests/test_container_podman.py b/tests/test_container_podman.py index 65d5c19..31d3afc 100644 --- a/tests/test_container_podman.py +++ b/tests/test_container_podman.py @@ -10,14 +10,14 @@ @pytest.mark.container def test_run_in_podman(): - # You need to install passt on your system and add it to the path + # passt needs to be installed and added to the path # os.environ["PATH"] += os.pathsep + "/home/bin/passt" WORK_DIR = Path.cwd() / "tmp" WORK_DIR.mkdir(parents=True, exist_ok=True) rdm_example = ProjectRepo( path=WORK_DIR / 'template', - url="git@github.com:ronald-jaepel/rdm_testing_template.git", + url="git@github.com:cadet/RDM-Testing-Template.git", branch="main", suppress_lfs_warning=True ) @@ -69,7 +69,7 @@ def test_run_in_podman(): @pytest.mark.slow @pytest.mark.container def test_pytest_in_podman(): - # You need to install passt on your system and add it to the path + # Passt needs to be installed and added to the path # os.environ["PATH"] += os.pathsep + "/home/bin/passt" WORK_DIR = Path.cwd() / "tmp" WORK_DIR.mkdir(parents=True, exist_ok=True) diff --git a/tests/test_git_adapter.py b/tests/test_git_adapter.py index f53f343..fd196ed 100644 --- a/tests/test_git_adapter.py +++ b/tests/test_git_adapter.py @@ -34,9 +34,9 @@ def count_commit_number(repo): return current_commit_number -def example_generate_results_array(path_to_repo, output_folder): +def example_generate_results_array(path_to_repo, output_directory): results_array = np.random.random((500, 3)) - np.savetxt(path_to_repo / output_folder / "result.csv", results_array, delimiter=",") + np.savetxt(path_to_repo / output_directory / "result.csv", results_array, delimiter=",") return results_array @@ -86,7 +86,7 @@ def try_commit_results_data(path_to_repo): repo = ProjectRepo(path_to_repo) current_commit_number = count_commit_number(repo.output_repo) with repo.track_results(results_commit_message="Add array") as output_branch: - example_generate_results_array(path_to_repo, output_folder=repo.output_path) + example_generate_results_array(path_to_repo, output_directory=repo.output_path) updated_commit_number = count_commit_number(repo.output_repo) assert current_commit_number <= updated_commit_number assert str(repo.output_repo.active_branch) == output_branch @@ -129,7 +129,7 @@ def try_commit_results_with_uncommitted_code_changes(path_to_repo): modify_code(path_to_repo) with pytest.raises(Exception): with repo.track_results(results_commit_message="Add array"): - example_generate_results_array(path_to_repo, output_folder=repo.output_path) + example_generate_results_array(path_to_repo, output_directory=repo.output_path) repo.commit("add code to print random number", add_all=True) @@ -158,7 +158,7 @@ def try_initialize_from_remote(): if Path("test_repo_from_remote").exists(): delete_path("test_repo_from_remote") ProjectRepo.clone( - url="git@github.com:ronald-jaepel/rdm_testing_template.git", + url="git@github.com:cadet/RDM-Testing-Template.git", to_path="test_repo_from_remote" ) assert try_init_gitpython_repo("test_repo_from_remote") @@ -408,7 +408,7 @@ def test_with_detached_head(): # repo.import_remote_repo(source_repo_location="../test_repo/results", source_repo_branch=branch_name) # repo.import_remote_repo(source_repo_location="../test_repo/results", source_repo_branch=branch_name, # target_repo_location="foo/bar/repo") -# # delete folder and reload +# # delete directory and reload # delete_path("foo/bar/repo") # # with pytest.raises(Exception):