From ddc29a37b0358d04713d011a8057818c2f592e58 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 16 Feb 2026 19:30:40 +0530 Subject: [PATCH 1/8] ruff changes and version updates --- .pre-commit-config.yaml | 23 +++++++---------------- pyproject.toml | 6 +----- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0987bad90..b21e2deb8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,26 +1,21 @@ default_language_version: python: python3 -files: | - (?x)^( - openml| - tests - )/.*\.py$ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.10 + rev: v0.15.1 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix, --no-cache] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.13.0 + rev: v1.19.1 hooks: - id: mypy additional_dependencies: - types-requests - types-python-dateutil - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.29.4 + rev: 0.36.2 hooks: - id: check-github-workflows files: '^github/workflows/.*\.ya?ml$' @@ -28,21 +23,17 @@ repos: - id: check-dependabot files: '^\.github/dependabot\.ya?ml$' - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-added-large-files - files: ".*" - id: check-case-conflict - files: ".*" - id: check-merge-conflict - files: ".*" - id: check-yaml - files: ".*" - id: end-of-file-fixer - files: ".*" - types: ["yaml"] - id: check-toml - files: ".*" types: ["toml"] - id: debug-statements files: '^src/.*\.py$' + - id: mixed-line-ending + args: ['--fix=lf'] + - id: trailing-whitespace diff --git a/pyproject.toml b/pyproject.toml index 93a6ffbfa..409dec482 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "pyarrow", "tqdm", # For MinIO download progress bars ] -requires-python = ">=3.10,<3.15" +requires-python = ">=3.10,<3.15" maintainers = [ { name = "Pieter Gijsbers", email="p.gijsbers@tue.nl"}, { name = "Lennart Purucker"}, @@ -144,13 +144,9 @@ markers = [ target-version = "py310" line-length = 100 output-format = "grouped" -src = ["openml", "tests", "examples"] unsafe-fixes = true exclude = [ - # TODO(eddiebergman): Tests should be re-enabled after the refactor - "tests", - # ".bzr", ".direnv", ".eggs", From 1adc5112c885d38dc52613b197158b2c40a924c0 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 16 Feb 2026 19:34:51 +0530 Subject: [PATCH 2/8] ruff changes and version updates --- pyproject.toml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 409dec482..b30e1bfb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -323,14 +323,3 @@ no_implicit_optional = true check_untyped_defs = true warn_return_any = true - - -[[tool.mypy.overrides]] -module = ["tests.*", "openml.extensions.sklearn.*"] - -# TODO(eddiebergman): This should be re-enabled after tests get refactored -ignore_errors = true -#disallow_untyped_defs = false # Sometimes we just want to ignore verbose types -#disallow_untyped_decorators = false # Test decorators are not properly typed -#disallow_incomplete_defs = false # Sometimes we just want to ignore verbose types -#disable_error_code = ["var-annotated"] From 4a9534122792dec0f409ceb8dd28c5eaf919dfbd Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 16 Feb 2026 19:37:33 +0530 Subject: [PATCH 3/8] mypy changes --- openml/_api_calls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 9e53bd9fa..2e09fc157 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -9,7 +9,7 @@ import shutil import time import urllib.parse -import xml +import xml.parsers.expat import zipfile from pathlib import Path From 0102a7352ab59cc7b3372e963703af3201afdcc0 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 16 Feb 2026 20:22:01 +0530 Subject: [PATCH 4/8] save --- pyproject.toml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b30e1bfb9..569d400a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,6 +147,9 @@ output-format = "grouped" unsafe-fixes = true exclude = [ + # TODO(eddiebergman): Tests should be re-enabled after the refactor + "tests", + # ".bzr", ".direnv", ".eggs", @@ -270,7 +273,6 @@ ignore = [ "S101", # Use of assert detected. "W292", # No newline at end of file "PLC1901", # "" can be simplified to be falsey - "TC003", # Move stdlib import into TYPE_CHECKING "COM812", # Trailing comma missing (handled by linter, ruff recommend disabling if using formatter) "N803", # Argument should be lowercase (but we accept things like `X`) "PLC0415", # Allow imports inside functions / non-top-level scope @@ -306,7 +308,7 @@ convention = "numpy" [tool.mypy] python_version = "3.10" -packages = ["openml", "tests"] +packages = ["openml"] show_error_codes = true @@ -323,3 +325,13 @@ no_implicit_optional = true check_untyped_defs = true warn_return_any = true + +[[tool.mypy.overrides]] +module = ["tests.*", "openml.extensions.sklearn.*"] + +# TODO(eddiebergman): This should be re-enabled after tests get refactored +ignore_errors = true +#disallow_untyped_defs = false # Sometimes we just want to ignore verbose types +#disallow_untyped_decorators = false # Test decorators are not properly typed +#disallow_incomplete_defs = false # Sometimes we just want to ignore verbose types +#disable_error_code = ["var-annotated"] From 2533dce3b08b2e7016894565dd90a3eb5536c516 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 16 Feb 2026 20:31:57 +0530 Subject: [PATCH 5/8] reverts --- pyproject.toml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b30e1bfb9..569d400a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,6 +147,9 @@ output-format = "grouped" unsafe-fixes = true exclude = [ + # TODO(eddiebergman): Tests should be re-enabled after the refactor + "tests", + # ".bzr", ".direnv", ".eggs", @@ -270,7 +273,6 @@ ignore = [ "S101", # Use of assert detected. "W292", # No newline at end of file "PLC1901", # "" can be simplified to be falsey - "TC003", # Move stdlib import into TYPE_CHECKING "COM812", # Trailing comma missing (handled by linter, ruff recommend disabling if using formatter) "N803", # Argument should be lowercase (but we accept things like `X`) "PLC0415", # Allow imports inside functions / non-top-level scope @@ -306,7 +308,7 @@ convention = "numpy" [tool.mypy] python_version = "3.10" -packages = ["openml", "tests"] +packages = ["openml"] show_error_codes = true @@ -323,3 +325,13 @@ no_implicit_optional = true check_untyped_defs = true warn_return_any = true + +[[tool.mypy.overrides]] +module = ["tests.*", "openml.extensions.sklearn.*"] + +# TODO(eddiebergman): This should be re-enabled after tests get refactored +ignore_errors = true +#disallow_untyped_defs = false # Sometimes we just want to ignore verbose types +#disallow_untyped_decorators = false # Test decorators are not properly typed +#disallow_incomplete_defs = false # Sometimes we just want to ignore verbose types +#disable_error_code = ["var-annotated"] From 7e46acc9d7c4186a3288dc0d4c751999fd217655 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 16 Feb 2026 20:54:16 +0530 Subject: [PATCH 6/8] test From af29976ed52faeead2f3b648a921bb991ecbe4ad Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 16 Feb 2026 20:57:08 +0530 Subject: [PATCH 7/8] pre-commit run across all files --- .github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md | 1 - .github/PULL_REQUEST_TEMPLATE.md | 3 +- .github/workflows/release_docker.yaml | 2 +- .github/workflows/test.yml | 2 +- CONTRIBUTING.md | 22 +- LICENSE | 6 +- README.md | 2 +- docker/readme.md | 24 +- docker/startup.sh | 2 +- docs/details.md | 7 +- docs/extensions.md | 6 +- docs/index.md | 10 +- examples/_external_or_deprecated/README.md | 4 +- openml/base.py | 5 +- openml/cli.py | 5 +- openml/config.py | 6 +- openml/datasets/data_feature.py | 3 +- openml/extensions/extension_interface.py | 3 +- openml/flows/flow.py | 6 +- openml/runs/run.py | 3 +- openml/runs/trace.py | 6 +- openml/setups/functions.py | 6 +- openml/study/study.py | 6 +- openml/tasks/split.py | 6 +- openml/tasks/task.py | 5 +- openml/utils.py | 3 +- tests/conftest.py | 4 +- tests/files/misc/trace.arff | 6 +- .../datasets/data_delete_not_owned.xml | 2 +- .../datasets/data_description_61.xml | 16 +- .../org/openml/test/datasets/-1/qualities.xml | 1 - .../org/openml/test/datasets/2/dataset.arff | 20 +- .../openml/test/datasets/2/description.xml | 26 +-- .../org/openml/test/datasets/2/qualities.xml | 212 +++++++++--------- .../org/openml/test/setups/1/description.xml | 1 - .../org/openml/test/tasks/1/datasplits.arff | 2 +- .../openml/test/tasks/1882/datasplits.arff | 2 +- .../org/openml/test/tasks/3/datasplits.arff | 2 +- tests/test_datasets/test_dataset.py | 8 +- 39 files changed, 237 insertions(+), 219 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md index 11290dc66..c4df7182f 100644 --- a/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE/ISSUE_TEMPLATE.md @@ -46,4 +46,3 @@ import openml; print("OpenML", openml.__version__) - diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5584e6438..99b766cd9 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,7 +19,7 @@ Please make sure that: * Change Log Entry: -#### Details +#### Details - diff --git a/.github/workflows/release_docker.yaml b/.github/workflows/release_docker.yaml index fcea357e4..a242cead8 100644 --- a/.github/workflows/release_docker.yaml +++ b/.github/workflows/release_docker.yaml @@ -61,6 +61,6 @@ jobs: repository: openml/openml-python short-description: "pre-installed openml-python environment" readme-filepath: ./docker/readme.md - + - name: Image digest run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b10721f55..b408cb643 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -87,7 +87,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }} - + if [ "${{ matrix.pandas-version }}" != "" ]; then echo "Installing specific pandas version: ${{ matrix.pandas-version }}" pip install "pandas==${{ matrix.pandas-version }}" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 35ab30b4a..d47582ddb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,8 +28,8 @@ Great! You've decided you want to help out. Now what? All contributions should be linked to issues on the [GitHub issue tracker](https://github.com/openml/openml-python/issues). In particular for new contributors, the *good first issue* label should help you find issues which are suitable for beginners. Resolving these issues allows you to start -contributing to the project without much prior knowledge. Your assistance in this area -will be greatly appreciated by the more experienced developers as it helps free up +contributing to the project without much prior knowledge. Your assistance in this area +will be greatly appreciated by the more experienced developers as it helps free up their time to concentrate on other issues. If you encounter a particular part of the documentation or code that you want to improve, @@ -39,16 +39,16 @@ This is important since you can first get feedback or pointers from experienced To let everyone know you are working on an issue, please leave a comment that states you will work on the issue (or, if you have the permission, *assign* yourself to the issue). This avoids double work! -## Contributing Workflow Overview +## Contributing Workflow Overview To contribute to the openml-python package, follow these steps: 0. Determine how you want to contribute (see above). 1. Set up your local development environment. - 1. Fork and clone the `openml-python` repository. Then, create a new branch from the ``develop`` branch. If you are new to `git`, see our [detailed documentation](#basic-git-workflow), or rely on your favorite IDE. + 1. Fork and clone the `openml-python` repository. Then, create a new branch from the ``develop`` branch. If you are new to `git`, see our [detailed documentation](#basic-git-workflow), or rely on your favorite IDE. 2. [Install the local dependencies](#install-local-dependencies) to run the tests for your contribution. 3. [Test your installation](#testing-your-installation) to ensure everything is set up correctly. 4. Implement your contribution. If contributing to the documentation, see [here](#contributing-to-the-documentation). -5. [Create a pull request](#pull-request-checklist). +5. [Create a pull request](#pull-request-checklist). ### Install Local Dependencies @@ -56,11 +56,11 @@ We recommend following the instructions below to install all requirements locall However, it is also possible to use the [openml-python docker image](https://github.com/openml/openml-python/blob/main/docker/readme.md) for testing and building documentation. Moreover, feel free to use any alternative package managers, such as `pip`. -1. To ensure a smooth development experience, we recommend using the `uv` package manager. Thus, first install `uv`. If any Python version already exists on your system, follow the steps below, otherwise see [here](https://docs.astral.sh/uv/getting-started/installation/). +1. To ensure a smooth development experience, we recommend using the `uv` package manager. Thus, first install `uv`. If any Python version already exists on your system, follow the steps below, otherwise see [here](https://docs.astral.sh/uv/getting-started/installation/). ```bash pip install uv ``` -2. Create a virtual environment using `uv` and activate it. This will ensure that the dependencies for `openml-python` do not interfere with other Python projects on your system. +2. Create a virtual environment using `uv` and activate it. This will ensure that the dependencies for `openml-python` do not interfere with other Python projects on your system. ```bash uv venv --seed --python 3.8 ~/.venvs/openml-python source ~/.venvs/openml-python/bin/activate @@ -108,7 +108,7 @@ Drafts often benefit from the inclusion of a [task list](https://github.com/blog/1375-task-lists-in-gfm-issues-pulls-comments) in the PR description. ---- +--- # Appendix @@ -146,7 +146,7 @@ local disk: git checkout -b feature/my-feature ``` - Always use a ``feature`` branch. It's good practice to never work on the ``main`` or ``develop`` branch! + Always use a ``feature`` branch. It's good practice to never work on the ``main`` or ``develop`` branch! To make the nature of your pull request easily visible, please prepend the name of the branch with the type of changes you want to merge, such as ``feature`` if it contains a new feature, ``fix`` for a bugfix, ``doc`` for documentation and ``maint`` for other maintenance on the package. 4. Develop the feature on your feature branch. Add changed files using ``git add`` and then ``git commit`` files: @@ -178,7 +178,7 @@ Before each commit, it will automatically run: if the resulting code is very bloated, consider a (small) refactor. - [mypy](https://mypy.readthedocs.io/en/stable/) a static type checker. In particular, make sure each function you work on has type hints. - + If you want to run the pre-commit tests without doing a commit, run: ```bash $ make check @@ -203,4 +203,4 @@ When dependencies are installed, run ```bash mkdocs serve ``` -This will open a preview of the website. \ No newline at end of file +This will open a preview of the website. diff --git a/LICENSE b/LICENSE index e08aa862b..7c0204d1c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2014-2019, Matthias Feurer, Jan van Rijn, Andreas Müller, +Copyright (c) 2014-2019, Matthias Feurer, Jan van Rijn, Andreas Müller, Joaquin Vanschoren and others. All rights reserved. @@ -29,7 +29,7 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -License of the files CONTRIBUTING.md, ISSUE_TEMPLATE.md and +License of the files CONTRIBUTING.md, ISSUE_TEMPLATE.md and PULL_REQUEST_TEMPLATE.md: Those files are modifications of the respecting templates in scikit-learn and @@ -52,7 +52,7 @@ modification, are permitted provided that the following conditions are met: c. Neither the name of the Scikit-learn Developers nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written - permission. + permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" diff --git a/README.md b/README.md index c44e42981..73b5774fe 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ Bibtex entry: We welcome contributions from both new and experienced developers! -If you would like to contribute to OpenML-Python, please read our +If you would like to contribute to OpenML-Python, please read our [Contribution Guidelines](https://github.com/openml/openml-python/blob/develop/CONTRIBUTING.md). If you are new to open-source development, a great way to get started is by diff --git a/docker/readme.md b/docker/readme.md index d0af9d9fe..6a0362a58 100644 --- a/docker/readme.md +++ b/docker/readme.md @@ -1,8 +1,8 @@ # OpenML Python Container This docker container has the latest version of openml-python downloaded and pre-installed. -It can also be used by developers to run unit tests or build the docs in -a fresh and/or isolated unix environment. +It can also be used by developers to run unit tests or build the docs in +a fresh and/or isolated unix environment. This document contains information about: 1. [Usage](#usage): how to use the image and its main modes. @@ -42,7 +42,7 @@ docker run -v PATH/TO/FILE:/openml/MY_SCRIPT.py openml/openml-python MY_SCRIPT.p ### Running unit tests You can run the unit tests by passing `test` as the first argument. -It also requires a local or remote repository to be specified, which is explained +It also requires a local or remote repository to be specified, which is explained [below]((#using-local-or-remote-code). For this example, we specify to test the `develop` branch: @@ -52,8 +52,8 @@ docker run openml/openml-python test develop ### Building documentation -You can build the documentation by passing `doc` as the first argument, -you should [mount]((https://docs.docker.com/storage/bind-mounts/#start-a-container-with-a-bind-mount)) +You can build the documentation by passing `doc` as the first argument, +you should [mount]((https://docs.docker.com/storage/bind-mounts/#start-a-container-with-a-bind-mount)) an output directory in which the docs will be stored. You also need to provide a remote or local repository as explained in [the section below]((#using-local-or-remote-code). In this example, we build documentation for the `develop` branch. @@ -67,7 +67,7 @@ on Linux: ```text docker run --mount type=bind,source="./output",destination="/output" openml/openml-python doc develop ``` - + see [the section below]((#using-local-or-remote-code) for running against local changes or a remote branch. @@ -106,26 +106,26 @@ The branch may be specified by name directly if it exists on the original reposi Where `BRANCH` is the name of the branch for which to generate the documentation. It is also possible to build the documentation from the branch on a fork, -in this case the `BRANCH` should be specified as `GITHUB_NAME#BRANCH` (e.g. +in this case the `BRANCH` should be specified as `GITHUB_NAME#BRANCH` (e.g. `PGijsbers#my_feature_branch`) and the name of the forked repository should be `openml-python`. ## For developers -This section contains some notes about the structure of the image, +This section contains some notes about the structure of the image, intended for those who want to work on it. ### Added Directories The `openml/openml-python` image is built on a vanilla `python:3` image. Additionally, it contains the following files are directories: - - `/openml`: contains the openml-python repository in the state with which the image - was built by default. If working with a `BRANCH`, this repository will be set to + - `/openml`: contains the openml-python repository in the state with which the image + was built by default. If working with a `BRANCH`, this repository will be set to the `HEAD` of `BRANCH`. - `/openml/venv/`: contains the used virtual environment for `doc` and `test`. It has - `openml-python` dependencies pre-installed. When invoked with `doc` or `test`, the + `openml-python` dependencies pre-installed. When invoked with `doc` or `test`, the dependencies will be updated based on the `setup.py` of the `BRANCH` or mounted `/code`. - `/scripts/startup.sh`: the entrypoint of the image. Takes care of the automated features (e.g. `doc` and `test`). ## Building the image To build the image yourself, execute `docker build -f Dockerfile .` from the `docker` -directory of the `openml-python` repository. It will use the `startup.sh` as is, so any +directory of the `openml-python` repository. It will use the `startup.sh` as is, so any local changes will be present in the image. diff --git a/docker/startup.sh b/docker/startup.sh index 34a5c61f3..97ae12b49 100644 --- a/docker/startup.sh +++ b/docker/startup.sh @@ -77,4 +77,4 @@ if [ "$1" == "doc" ]; then if [ -d "/output" ]; then cp -r /openml/doc/build /output fi -fi \ No newline at end of file +fi diff --git a/docs/details.md b/docs/details.md index bf4b0cd2b..634524e9a 100644 --- a/docs/details.md +++ b/docs/details.md @@ -1,7 +1,7 @@ # Advanced User Guide This document highlights some of the more advanced features of -`openml-python`. +`openml-python`. ## Configuration @@ -34,7 +34,7 @@ following keys are defined: trying to reconnect for a longer time, quickly increasing the time between retries. -- connection_n_retries: number of times to retry a request if they fail. +- connection_n_retries: number of times to retry a request if they fail. Default depends on retry_policy (5 for `human`, 50 for `robot`) - verbosity: the level of output: - 0: normal output @@ -43,7 +43,7 @@ Default depends on retry_policy (5 for `human`, 50 for `robot`) This file is easily configurable by the `openml` command line interface. To see where the file is stored, and what its values are, use openml -configure none. +configure none. ## Docker @@ -73,4 +73,3 @@ list of **tasks**. A further explanation is given in the [OpenML user guide](https://docs.openml.org/concepts/). - diff --git a/docs/extensions.md b/docs/extensions.md index 858447440..7f7dd19a6 100644 --- a/docs/extensions.md +++ b/docs/extensions.md @@ -31,7 +31,7 @@ extension interface to allows others to contribute back. Building a suitable extension for therefore requires an understanding of the current OpenML-Python support. -[This tutorial](../examples/Basics/simple_flows_and_runs_tutorial) shows how the scikit-learn +[This tutorial](../examples/Basics/simple_flows_and_runs_tutorial) shows how the scikit-learn extension works with OpenML-Python. #### API @@ -52,7 +52,7 @@ must be called to allow OpenML-Python to interface the new extension. The following methods should get implemented. Although the documentation in the extension interface should always be leading, here -we list some additional information and best practices. +we list some additional information and best practices. Note that most methods are relatively simple and can be implemented in several lines of code. @@ -135,7 +135,7 @@ and can be implemented in several lines of code. ### Hosting the library Each extension created should be a stand-alone repository, compatible -with the [OpenML-Python repository](https://github.com/openml/openml-python). +with the [OpenML-Python repository](https://github.com/openml/openml-python). The extension repository should work off-the-shelf with *OpenML-Python* installed. Create a public Github repo with the following directory structure: diff --git a/docs/index.md b/docs/index.md index 1058c3956..bc708f3e3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,13 +4,13 @@ Welcome to the documentation of the OpenML Python API, a connector to the collaborative machine learning platform -[OpenML.org](https://www.openml.org). +[OpenML.org](https://www.openml.org). OpenML-Python can download or upload data from OpenML, such as datasets and machine learning experiment results. If you are new to OpenML, we recommend checking out the [OpenML documentation](https://docs.openml.org/) -to get familiar with the concepts and features of OpenML. In particular, we recommend -reading more about the [OpenML concepts](https://docs.openml.org/concepts/). +to get familiar with the concepts and features of OpenML. In particular, we recommend +reading more about the [OpenML concepts](https://docs.openml.org/concepts/). ## :joystick: Minimal Examples @@ -78,7 +78,7 @@ Contributing to the OpenML package is highly appreciated. Please see the ## Citing OpenML-Python If you use OpenML-Python in a scientific publication, we would -appreciate a reference to our JMLR-MLOSS paper +appreciate a reference to our JMLR-MLOSS paper ["OpenML-Python: an extensible Python API for OpenML"](https://www.jmlr.org/papers/v22/19-920.html): === "Bibtex" @@ -98,6 +98,6 @@ appreciate a reference to our JMLR-MLOSS paper === "MLA" - Feurer, Matthias, et al. + Feurer, Matthias, et al. "OpenML-Python: an extensible Python API for OpenML." _Journal of Machine Learning Research_ 22.100 (2021):1−5. diff --git a/examples/_external_or_deprecated/README.md b/examples/_external_or_deprecated/README.md index d25a81baa..d37c87fcd 100644 --- a/examples/_external_or_deprecated/README.md +++ b/examples/_external_or_deprecated/README.md @@ -1,5 +1,5 @@ # External or Deprecated Examples -This directory contains examples that are either external or deprecated. They may not be maintained or updated +This directory contains examples that are either external or deprecated. They may not be maintained or updated regularly, and their functionality might not align with the latest version of the library. Moreover, -they are not shown on the documentation website. \ No newline at end of file +they are not shown on the documentation website. diff --git a/openml/base.py b/openml/base.py index a282be8eb..80253df2f 100644 --- a/openml/base.py +++ b/openml/base.py @@ -4,7 +4,7 @@ import re import webbrowser from abc import ABC, abstractmethod -from collections.abc import Iterable, Sequence +from typing import TYPE_CHECKING import xmltodict @@ -13,6 +13,9 @@ from .utils import _get_rest_api_type_alias, _tag_openml_base +if TYPE_CHECKING: + from collections.abc import Iterable, Sequence + class OpenMLBase(ABC): """Base object for functionality that is shared across entities.""" diff --git a/openml/cli.py b/openml/cli.py index 0afb089c2..1117b5e5e 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -5,13 +5,16 @@ import argparse import string import sys -from collections.abc import Callable from pathlib import Path +from typing import TYPE_CHECKING from urllib.parse import urlparse from openml import config from openml.__version__ import __version__ +if TYPE_CHECKING: + from collections.abc import Callable + def is_hex(string_: str) -> bool: return all(c in string.hexdigits for c in string_) diff --git a/openml/config.py b/openml/config.py index e6104fd7f..7f62ba8a3 100644 --- a/openml/config.py +++ b/openml/config.py @@ -10,14 +10,16 @@ import platform import shutil import warnings -from collections.abc import Iterator from contextlib import contextmanager from io import StringIO from pathlib import Path -from typing import Any, Literal, cast +from typing import TYPE_CHECKING, Any, Literal, cast from typing_extensions import TypedDict from urllib.parse import urlparse +if TYPE_CHECKING: + from collections.abc import Iterator + logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") console_handler: logging.StreamHandler | None = None diff --git a/openml/datasets/data_feature.py b/openml/datasets/data_feature.py index 0598763b0..960205781 100644 --- a/openml/datasets/data_feature.py +++ b/openml/datasets/data_feature.py @@ -1,10 +1,11 @@ # License: BSD 3-Clause from __future__ import annotations -from collections.abc import Sequence from typing import TYPE_CHECKING, Any, ClassVar if TYPE_CHECKING: + from collections.abc import Sequence + from IPython.lib import pretty diff --git a/openml/extensions/extension_interface.py b/openml/extensions/extension_interface.py index e391d109a..b1c710de8 100644 --- a/openml/extensions/extension_interface.py +++ b/openml/extensions/extension_interface.py @@ -2,11 +2,12 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections import OrderedDict from typing import TYPE_CHECKING, Any # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: + from collections import OrderedDict + import numpy as np import scipy.sparse diff --git a/openml/flows/flow.py b/openml/flows/flow.py index 7dd84fdee..4413cc3de 100644 --- a/openml/flows/flow.py +++ b/openml/flows/flow.py @@ -3,9 +3,8 @@ import logging from collections import OrderedDict -from collections.abc import Hashable, Sequence from pathlib import Path -from typing import Any, cast +from typing import TYPE_CHECKING, Any, cast import xmltodict @@ -13,6 +12,9 @@ from openml.extensions import Extension, get_extension_by_flow from openml.utils import extract_xml_tags +if TYPE_CHECKING: + from collections.abc import Hashable, Sequence + class OpenMLFlow(OpenMLBase): """OpenML Flow. Stores machine learning models. diff --git a/openml/runs/run.py b/openml/runs/run.py index eff011408..9d7ce9e5c 100644 --- a/openml/runs/run.py +++ b/openml/runs/run.py @@ -4,7 +4,6 @@ import pickle import time from collections import OrderedDict -from collections.abc import Callable, Sequence from pathlib import Path from typing import ( TYPE_CHECKING, @@ -31,6 +30,8 @@ ) if TYPE_CHECKING: + from collections.abc import Callable, Sequence + from openml.runs.trace import OpenMLRunTrace diff --git a/openml/runs/trace.py b/openml/runs/trace.py index 708cdd8f1..260dd60c9 100644 --- a/openml/runs/trace.py +++ b/openml/runs/trace.py @@ -3,15 +3,17 @@ import json from collections import OrderedDict -from collections.abc import Iterator from dataclasses import dataclass from pathlib import Path -from typing import IO, Any +from typing import IO, TYPE_CHECKING, Any from typing_extensions import Self import arff import xmltodict +if TYPE_CHECKING: + from collections.abc import Iterator + PREFIX = "parameter_" REQUIRED_ATTRIBUTES = [ "repeat", diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 4bf279ed1..263ae6e11 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -2,11 +2,10 @@ from __future__ import annotations from collections import OrderedDict -from collections.abc import Iterable from functools import partial from itertools import chain from pathlib import Path -from typing import Any, Literal +from typing import TYPE_CHECKING, Any, Literal import pandas as pd import xmltodict @@ -19,6 +18,9 @@ from .setup import OpenMLParameter, OpenMLSetup +if TYPE_CHECKING: + from collections.abc import Iterable + def setup_exists(flow: OpenMLFlow) -> int: """ diff --git a/openml/study/study.py b/openml/study/study.py index 7a9c80bbe..8b4c8bdf6 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -2,12 +2,14 @@ # TODO(eddiebergman): Begging for dataclassses to shorten this all from __future__ import annotations -from collections.abc import Sequence -from typing import Any +from typing import TYPE_CHECKING, Any from openml.base import OpenMLBase from openml.config import get_server_base_url +if TYPE_CHECKING: + from collections.abc import Sequence + class BaseStudy(OpenMLBase): """ diff --git a/openml/tasks/split.py b/openml/tasks/split.py index 464e41b2a..4429dbbf9 100644 --- a/openml/tasks/split.py +++ b/openml/tasks/split.py @@ -3,13 +3,15 @@ import pickle from collections import OrderedDict -from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any from typing_extensions import NamedTuple import arff # type: ignore import numpy as np +if TYPE_CHECKING: + from pathlib import Path + class Split(NamedTuple): """A single split of a dataset.""" diff --git a/openml/tasks/task.py b/openml/tasks/task.py index b297a105c..8072175fc 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -5,9 +5,7 @@ import warnings from abc import ABC -from collections.abc import Sequence from enum import Enum -from pathlib import Path from typing import TYPE_CHECKING, Any from typing_extensions import TypedDict @@ -20,6 +18,9 @@ from .split import OpenMLSplit if TYPE_CHECKING: + from collections.abc import Sequence + from pathlib import Path + import numpy as np import pandas as pd diff --git a/openml/utils.py b/openml/utils.py index 3680bc0ff..2167ddf6a 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -4,7 +4,6 @@ import contextlib import shutil import warnings -from collections.abc import Callable, Mapping, Sized from functools import wraps from pathlib import Path from typing import TYPE_CHECKING, Any, Literal, TypeVar, overload @@ -23,6 +22,8 @@ # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: + from collections.abc import Callable, Mapping, Sized + from openml.base import OpenMLBase P = ParamSpec("P") diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..1cc9b88c7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -295,11 +295,11 @@ def with_test_cache(test_files_directory, request): openml.config.set_root_cache_directory(_root_cache_directory) if tmp_cache.exists(): shutil.rmtree(tmp_cache) - + @pytest.fixture def static_cache_dir(): - return Path(__file__).parent / "files" + return Path(__file__).parent / "files" @pytest.fixture def workdir(tmp_path): diff --git a/tests/files/misc/trace.arff b/tests/files/misc/trace.arff index 8690f2ec6..d86872770 100644 --- a/tests/files/misc/trace.arff +++ b/tests/files/misc/trace.arff @@ -514,6 +514,6 @@ 0,9,47,1.0,false,52803.95318629395,0.48840311594449337,2.781026953358017,0.24387504713836486,0,0.0010179658001682542,'\"median\"' 0,9,48,0.916518650089,false,4.212729480184038,-0.4237018370034965,1.3846048401020126,1.2806750694258338,1,0.0013379343290737073,'\"most_frequent\"' 0,9,49,1.0,false,8255.254357596426,0.15512460844121778,2.632883048107603,9.685313873205853,1,9.29186571775498e-06,'\"median\"' -% -% -% \ No newline at end of file +% +% +% diff --git a/tests/files/mock_responses/datasets/data_delete_not_owned.xml b/tests/files/mock_responses/datasets/data_delete_not_owned.xml index 7d412d48e..1548bad36 100644 --- a/tests/files/mock_responses/datasets/data_delete_not_owned.xml +++ b/tests/files/mock_responses/datasets/data_delete_not_owned.xml @@ -1,4 +1,4 @@ 353 Dataset is not owned by you - \ No newline at end of file + diff --git a/tests/files/mock_responses/datasets/data_description_61.xml b/tests/files/mock_responses/datasets/data_description_61.xml index fc25e5861..b510f2144 100644 --- a/tests/files/mock_responses/datasets/data_description_61.xml +++ b/tests/files/mock_responses/datasets/data_description_61.xml @@ -2,22 +2,22 @@ 61 iris 1 - **Author**: R.A. Fisher -**Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Iris) - 1936 - Donated by Michael Marshall -**Please cite**: + **Author**: R.A. Fisher +**Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Iris) - 1936 - Donated by Michael Marshall +**Please cite**: -**Iris Plants Database** +**Iris Plants Database** This is perhaps the best known database to be found in the pattern recognition literature. Fisher's paper is a classic in the field and is referenced frequently to this day. (See Duda & Hart, for example.) The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. -Predicted attribute: class of iris plant. -This is an exceedingly simple domain. - +Predicted attribute: class of iris plant. +This is an exceedingly simple domain. + ### Attribute Information: 1. sepal length in cm 2. sepal width in cm 3. petal length in cm 4. petal width in cm - 5. class: + 5. class: -- Iris Setosa -- Iris Versicolour -- Iris Virginica diff --git a/tests/files/org/openml/test/datasets/-1/qualities.xml b/tests/files/org/openml/test/datasets/-1/qualities.xml index 32a27a42c..9cead5382 100644 --- a/tests/files/org/openml/test/datasets/-1/qualities.xml +++ b/tests/files/org/openml/test/datasets/-1/qualities.xml @@ -77,4 +77,3 @@ 0.004999750012499375 - diff --git a/tests/files/org/openml/test/datasets/2/dataset.arff b/tests/files/org/openml/test/datasets/2/dataset.arff index 401cb5e72..b40f4a1e9 100644 --- a/tests/files/org/openml/test/datasets/2/dataset.arff +++ b/tests/files/org/openml/test/datasets/2/dataset.arff @@ -1,21 +1,21 @@ % 1. Title of Database: Annealing Data -% +% % 2. Source Information: donated by David Sterling and Wray Buntine. -% +% % 3. Past Usage: unknown -% +% % 4. Relevant Information: % -- Explanation: I suspect this was left by Ross Quinlan in 1987 at the % 4th Machine Learning Workshop. I'd have to check with Jeff Schlimmer % to double check this. -% +% % 5. Number of Instances: 798 -% +% % 6. Number of Attributes: 38 % -- 6 continuously-valued % -- 3 integer-valued % -- 29 nominal-valued -% +% % 7. Attribute Information: % 1. family: --,GB,GK,GS,TN,ZA,ZF,ZH,ZM,ZS % 2. product-type: C, H, G @@ -56,11 +56,11 @@ % 37. bore: 0000,0500,0600,0760 % 38. packing: -,1,2,3 % classes: 1,2,3,4,5,U -% +% % -- The '-' values are actually 'not_applicable' values rather than % 'missing_values' (and so can be treated as legal discrete % values rather than as showing the absence of a discrete value). -% +% % 8. Missing Attribute Values: Signified with "?" % Attribute: Number of instances missing its value: % 1 0 @@ -102,7 +102,7 @@ % 37 0 % 38 789 % 39 0 -% +% % 9. Distribution of Classes % Class Name: Number of Instances: % 1 8 @@ -113,7 +113,7 @@ % U 34 % --- % 798 -% +% @relation anneal.ORIG @attribute 'family' { GB , GK , GS , TN , ZA , ZF , ZH , ZM , ZS } @attribute 'product-type' { C , H , G } diff --git a/tests/files/org/openml/test/datasets/2/description.xml b/tests/files/org/openml/test/datasets/2/description.xml index 579ef9813..138add198 100644 --- a/tests/files/org/openml/test/datasets/2/description.xml +++ b/tests/files/org/openml/test/datasets/2/description.xml @@ -2,28 +2,28 @@ 2 anneal 1 - **Author**: -**Source**: Unknown - -**Please cite**: + **Author**: +**Source**: Unknown - +**Please cite**: 1. Title of Database: Annealing Data - + 2. Source Information: donated by David Sterling and Wray Buntine. - + 3. Past Usage: unknown - + 4. Relevant Information: -- Explanation: I suspect this was left by Ross Quinlan in 1987 at the 4th Machine Learning Workshop. I'd have to check with Jeff Schlimmer to double check this. - + 5. Number of Instances: 798 - + 6. Number of Attributes: 38 -- 6 continuously-valued -- 3 integer-valued -- 29 nominal-valued - + 7. Attribute Information: 1. family: --,GB,GK,GS,TN,ZA,ZF,ZH,ZM,ZS 2. product-type: C, H, G @@ -64,11 +64,11 @@ 37. bore: 0000,0500,0600,0760 38. packing: -,1,2,3 classes: 1,2,3,4,5,U - + -- The '-' values are actually 'not_applicable' values rather than 'missing_values' (and so can be treated as legal discrete values rather than as showing the absence of a discrete value). - + 8. Missing Attribute Values: Signified with "?" Attribute: Number of instances missing its value: 1 0 @@ -110,7 +110,7 @@ 37 0 38 789 39 0 - + 9. Distribution of Classes Class Name: Number of Instances: 1 8 @@ -124,7 +124,7 @@ ARFF 2014-04-06T23:19:24 Public http://www.openml.org/data/download/1666876/phpFsFYVN - + 1666876 class 1 hallostudy_1uciwelt public active 4eaed8b6ec9d8211024b6c089b064761 diff --git a/tests/files/org/openml/test/datasets/2/qualities.xml b/tests/files/org/openml/test/datasets/2/qualities.xml index c1fb36069..1b7df8bed 100644 --- a/tests/files/org/openml/test/datasets/2/qualities.xml +++ b/tests/files/org/openml/test/datasets/2/qualities.xml @@ -1,530 +1,530 @@ - + ClassCount 6.0 - + ClassEntropy -1.0 - + DecisionStumpAUC 0.8652735384332186 - + DecisionStumpErrRate 0.22828507795100222 - + DecisionStumpKappa 0.4503332218612649 - + DefaultAccuracy 0.7616926503340757 - + Dimensionality 0.043429844097995544 - + EquivalentNumberOfAtts -22.557085312981506 - + HoeffdingAdwin.changes 0.0 - + HoeffdingAdwin.warnings 0.0 - + HoeffdingDDM.changes 1.0 - + HoeffdingDDM.warnings 15.0 - + IncompleteInstanceCount 898.0 - + InstanceCount 898.0 - + J48.00001.AUC 0.5939933958517685 - + J48.00001.ErrRate 0.19710467706013363 - + J48.00001.Kappa 0.267904435979752 - + J48.0001.AUC 0.5939933958517685 - + J48.0001.ErrRate 0.19710467706013363 - + J48.0001.Kappa 0.267904435979752 - + J48.001.AUC 0.5939933958517685 - + J48.001.ErrRate 0.19710467706013363 - + J48.001.Kappa 0.267904435979752 - + JRipAUC 0.9109739092241762 - + JRipErrRate 0.06570155902004454 - + JRipKappa 0.8280056485253776 - + MajorityClassSize 684 - + MaxNominalAttDistinctValues 9.0 - + MeanAttributeEntropy -1.0 - + MeanKurtosisOfNumericAtts 4.607030275019118 - + MeanMeansOfNumericAtts 348.50426818856744 - + MeanMutualInformation 0.0443319686974143 - + MeanNominalAttDistinctValues 2.3125 - + MeanSkewnessOfNumericAtts 2.022468153229902 - + MeanStdDevOfNumericAtts 405.17326983790934 - + MinNominalAttDistinctValues 1.0 - + MinorityClassSize 0 - + NBTreeAUC 0.9931336253518552 - + NBTreeErrRate 0.025612472160356347 - + NBTreeKappa 0.934554534191406 - + NaiveBayesAUC 0.9315907109421729 - + NaiveBayesAdwin.changes 0.0 - + NaiveBayesAdwin.warnings 0.0 - + NaiveBayesDdm.changes 0.0 - + NaiveBayesDdm.warnings 6.0 - + NaiveBayesErrRate 0.24610244988864144 - + NaiveBayesKappa 0.5569590016631507 - + NegativePercentage 0.7616926503340757 - + NoiseToSignalRatio -23.55708531298151 - + NumAttributes 39.0 - + NumBinaryAtts 7.0 - + NumMissingValues 22175.0 - + NumNominalAtts 32.0 - + NumNumericAtts 6.0 - + NumberOfClasses 6 - + NumberOfFeatures 39 - + NumberOfInstances 898 - + NumberOfInstancesWithMissingValues 898 - + NumberOfMissingValues 22175 - + NumberOfNumericFeatures 6 - + NumberOfSymbolicFeatures 32 - + PercentageOfBinaryAtts 0.1794871794871795 - + PercentageOfMissingValues 0.6331734338415853 - + PercentageOfNominalAtts 0.8205128205128205 - + PercentageOfNumericAtts 0.15384615384615385 - + PositivePercentage 0.0 - + REPTreeDepth1AUC 0.5692694611232046 - + REPTreeDepth1ErrRate 0.2182628062360802 - + REPTreeDepth1Kappa 0.14164212004759766 - + REPTreeDepth2AUC 0.7133659706134035 - + REPTreeDepth2ErrRate 0.19265033407572382 - + REPTreeDepth2Kappa 0.30086540149138874 - + REPTreeDepth3AUC 0.822151031071005 - + REPTreeDepth3ErrRate 0.1714922048997773 - + REPTreeDepth3Kappa 0.4189509420009749 - + RandomTreeDepth1AUC 0.6369584297524356 - + RandomTreeDepth1ErrRate 0.2383073496659243 - + RandomTreeDepth1Kappa 0.0 - + RandomTreeDepth2AUC 0.6965727371473568 - + RandomTreeDepth2ErrRate 0.24276169265033407 - + RandomTreeDepth2Kappa -0.005216998377390389 - + RandomTreeDepth3AUC 0.7789906449828373 - + RandomTreeDepth3ErrRate 0.22939866369710468 - + RandomTreeDepth3Kappa 0.09436363903399038 - + SVMe1AUC 0.9304778273868244 - + SVMe1ErrRate 0.1291759465478842 - + SVMe1Kappa 0.6587162215291064 - + SVMe2AUC 0.9433808010825946 - + SVMe2ErrRate 0.10801781737193764 - + SVMe2Kappa 0.7229082765509388 - + SVMe3AUC 0.9304096988871444 - + SVMe3ErrRate 0.11247216035634744 - + SVMe3Kappa 0.7085970582754477 - + SimpleLogisticAUC 0.9442627474199359 - + SimpleLogisticErrRate 0.11358574610244988 - + SimpleLogisticKappa 0.7031828227379664 - + StdvNominalAttDistinctValues 2.070335799473275 - + kNN_1NAUC 0.8721948540771287 - + kNN_1NErrRate 0.06347438752783964 - + kNN_1NKappa 0.8261102938928316 - + kNN_2NAUC 0.903736290844985 - + kNN_2NErrRate 0.07349665924276169 - + kNN_2NKappa 0.79861023802647 - + kNN_3NAUC 0.9295206197723355 - + kNN_3NErrRate 0.08351893095768374 - + kNN_3NKappa 0.7631665008070274 diff --git a/tests/files/org/openml/test/setups/1/description.xml b/tests/files/org/openml/test/setups/1/description.xml index 5717ad9f5..a1ce1c4a8 100644 --- a/tests/files/org/openml/test/setups/1/description.xml +++ b/tests/files/org/openml/test/setups/1/description.xml @@ -22,4 +22,3 @@ 2 - diff --git a/tests/files/org/openml/test/tasks/1/datasplits.arff b/tests/files/org/openml/test/tasks/1/datasplits.arff index f30057749..be142c5c4 100644 --- a/tests/files/org/openml/test/tasks/1/datasplits.arff +++ b/tests/files/org/openml/test/tasks/1/datasplits.arff @@ -8985,4 +8985,4 @@ TEST,302,0,9 TEST,179,0,9 TEST,788,0,9 TEST,588,0,9 -TEST,395,0,9 \ No newline at end of file +TEST,395,0,9 diff --git a/tests/files/org/openml/test/tasks/1882/datasplits.arff b/tests/files/org/openml/test/tasks/1882/datasplits.arff index 1a55f6531..75907131f 100644 --- a/tests/files/org/openml/test/tasks/1882/datasplits.arff +++ b/tests/files/org/openml/test/tasks/1882/datasplits.arff @@ -89805,4 +89805,4 @@ TEST,431,9,9 TEST,731,9,9 TEST,360,9,9 TEST,371,9,9 -TEST,870,9,9 \ No newline at end of file +TEST,870,9,9 diff --git a/tests/files/org/openml/test/tasks/3/datasplits.arff b/tests/files/org/openml/test/tasks/3/datasplits.arff index a22d68898..4ac591246 100644 --- a/tests/files/org/openml/test/tasks/3/datasplits.arff +++ b/tests/files/org/openml/test/tasks/3/datasplits.arff @@ -31965,4 +31965,4 @@ TEST,2224,0,9 TEST,2318,0,9 TEST,1660,0,9 TEST,484,0,9 -TEST,860,0,9 \ No newline at end of file +TEST,860,0,9 diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index b13bac30b..b79a86c74 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -305,7 +305,7 @@ def test_get_feature_with_ontology_data_id_11(): assert len(dataset.features) == 7 assert len(dataset.features[1].ontologies) >= 2 assert len(dataset.features[2].ontologies) >= 1 - assert len(dataset.features[3].ontologies) >= 1 + assert len(dataset.features[3].ontologies) >= 1 @pytest.mark.uses_test_server() def test_add_remove_ontology_to_dataset(): @@ -313,7 +313,7 @@ def test_add_remove_ontology_to_dataset(): feature_index = 1 ontology = "https://www.openml.org/unittest/" + str(time()) openml.datasets.functions.data_feature_add_ontology(did, feature_index, ontology) - openml.datasets.functions.data_feature_remove_ontology(did, feature_index, ontology) + openml.datasets.functions.data_feature_remove_ontology(did, feature_index, ontology) @pytest.mark.uses_test_server() def test_add_same_ontology_multiple_features(): @@ -321,7 +321,7 @@ def test_add_same_ontology_multiple_features(): ontology = "https://www.openml.org/unittest/" + str(time()) for i in range(3): - openml.datasets.functions.data_feature_add_ontology(did, i, ontology) + openml.datasets.functions.data_feature_add_ontology(did, i, ontology) @pytest.mark.uses_test_server() @@ -333,7 +333,7 @@ def test_add_illegal_long_ontology(): assert False except openml.exceptions.OpenMLServerException as e: assert e.code == 1105 - + @pytest.mark.uses_test_server() From f3d5d82799e8dee32150d838498e9f1bbe68a579 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Feb 2026 15:30:27 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_extensions/test_functions.py | 22 +++++++++++----------- tests/test_utils/test_utils.py | 8 ++++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/test_extensions/test_functions.py b/tests/test_extensions/test_functions.py index 90fbaa9f1..0e7b9aae0 100644 --- a/tests/test_extensions/test_functions.py +++ b/tests/test_extensions/test_functions.py @@ -116,16 +116,16 @@ def test_get_extension_by_flow(self): # We replace the global list with a new empty list [] ONLY for this block with patch("openml.extensions.extensions", []): assert get_extension_by_flow(DummyFlow()) is None - + with pytest.raises(ValueError, match="No extension registered which can handle flow:"): get_extension_by_flow(DummyFlow(), raise_if_no_extension=True) - + register_extension(DummyExtension1) assert isinstance(get_extension_by_flow(DummyFlow()), DummyExtension1) - + register_extension(DummyExtension2) assert isinstance(get_extension_by_flow(DummyFlow()), DummyExtension1) - + register_extension(DummyExtension1) with pytest.raises( ValueError, match="Multiple extensions registered which can handle flow:" @@ -136,16 +136,16 @@ def test_get_extension_by_model(self): # Again, we start with a fresh empty list automatically with patch("openml.extensions.extensions", []): assert get_extension_by_model(DummyModel()) is None - + with pytest.raises(ValueError, match="No extension registered which can handle model:"): get_extension_by_model(DummyModel(), raise_if_no_extension=True) - + register_extension(DummyExtension1) assert isinstance(get_extension_by_model(DummyModel()), DummyExtension1) - + register_extension(DummyExtension2) assert isinstance(get_extension_by_model(DummyModel()), DummyExtension1) - + register_extension(DummyExtension1) with pytest.raises( ValueError, match="Multiple extensions registered which can handle model:" @@ -208,7 +208,7 @@ class UnknownModel: with pytest.raises(ValueError, match="No extension registered"): get_extension_by_model(UnknownModel(), raise_if_no_extension=True) - + def test_register_same_extension_twice(): """Test behavior when registering same extension twice.""" # Using a context manager here to isolate the list @@ -225,7 +225,7 @@ def test_register_same_extension_twice(): @patch("openml.extensions.extensions", []) def test_extension_priority_order(): - """Test that extensions are checked in registration order.""" + """Test that extensions are checked in registration order.""" class DummyExtensionA(DummyExtension): pass class DummyExtensionB(DummyExtension): @@ -235,4 +235,4 @@ class DummyExtensionB(DummyExtension): register_extension(DummyExtensionB) assert openml.extensions.extensions[0] is DummyExtensionA - assert openml.extensions.extensions[1] is DummyExtensionB \ No newline at end of file + assert openml.extensions.extensions[1] is DummyExtensionB diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 8dbdd30b5..7ff8d057e 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -161,7 +161,7 @@ def test_get_cache_size(config_mock,tmp_path): This test uses a temporary directory (tmp_path) as the cache location by patching the configuration via config_mock. It verifies two conditions: - empty cache and after dataset fetch. + empty cache and after dataset fetch. Parameters ---------- @@ -170,12 +170,12 @@ def test_get_cache_size(config_mock,tmp_path): tmp_path : pathlib.Path A pytest-provided temporary directory used as an isolated cache location. """ - + config_mock.return_value = tmp_path cache_size = openml.utils.get_cache_size() assert cache_size == 0 sub_dir = tmp_path / "subdir" sub_dir.mkdir() (sub_dir / "nested_file.txt").write_bytes(b"b" * 100) - - assert openml.utils.get_cache_size() == 100 \ No newline at end of file + + assert openml.utils.get_cache_size() == 100