From 0159f474c6bbc15f20d52bc946bd252bd852b196 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 30 Dec 2025 09:11:27 +0500 Subject: [PATCH 01/86] set up folder structure and base code --- openml/_api/__init__.py | 8 +++ openml/_api/config.py | 5 ++ openml/_api/http/__init__.py | 1 + openml/_api/http/client.py | 23 ++++++ openml/_api/http/utils.py | 0 openml/_api/resources/__init__.py | 2 + openml/_api/resources/base.py | 22 ++++++ openml/_api/resources/datasets.py | 13 ++++ openml/_api/resources/tasks.py | 113 ++++++++++++++++++++++++++++++ openml/_api/runtime/core.py | 58 +++++++++++++++ openml/_api/runtime/fallback.py | 5 ++ openml/tasks/functions.py | 8 ++- 12 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 openml/_api/__init__.py create mode 100644 openml/_api/config.py create mode 100644 openml/_api/http/__init__.py create mode 100644 openml/_api/http/client.py create mode 100644 openml/_api/http/utils.py create mode 100644 openml/_api/resources/__init__.py create mode 100644 openml/_api/resources/base.py create mode 100644 openml/_api/resources/datasets.py create mode 100644 openml/_api/resources/tasks.py create mode 100644 openml/_api/runtime/core.py create mode 100644 openml/_api/runtime/fallback.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py new file mode 100644 index 000000000..5089f94dd --- /dev/null +++ b/openml/_api/__init__.py @@ -0,0 +1,8 @@ +from openml._api.runtime.core import APIContext + + +def set_api_version(version: str, strict=False): + api_context.set_version(version=version, strict=strict) + + +api_context = APIContext() diff --git a/openml/_api/config.py b/openml/_api/config.py new file mode 100644 index 000000000..bd93c3cad --- /dev/null +++ b/openml/_api/config.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +API_V1_SERVER = "https://www.openml.org/api/v1/xml" +API_V2_SERVER = "http://127.0.0.1:8001" +API_KEY = "..." diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py new file mode 100644 index 000000000..fde2a5b0a --- /dev/null +++ b/openml/_api/http/__init__.py @@ -0,0 +1 @@ +from openml._api.http.client import HTTPClient diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py new file mode 100644 index 000000000..81a9213e3 --- /dev/null +++ b/openml/_api/http/client.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import requests + +from openml.__version__ import __version__ + + +class HTTPClient: + def __init__(self, base_url: str): + self.base_url = base_url + self.headers = {"user-agent": f"openml-python/{__version__}"} + + def get(self, path, params=None): + url = f"{self.base_url}/{path}" + return requests.get(url, params=params, headers=self.headers) + + def post(self, path, data=None, files=None): + url = f"{self.base_url}/{path}" + return requests.post(url, data=data, files=files, headers=self.headers) + + def delete(self, path, params=None): + url = f"{self.base_url}/{path}" + return requests.delete(url, params=params, headers=self.headers) diff --git a/openml/_api/http/utils.py b/openml/_api/http/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py new file mode 100644 index 000000000..078fc5998 --- /dev/null +++ b/openml/_api/resources/__init__.py @@ -0,0 +1,2 @@ +from openml._api.resources.datasets import DatasetsV1, DatasetsV2 +from openml._api.resources.tasks import TasksV1, TasksV2 diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py new file mode 100644 index 000000000..1fae27665 --- /dev/null +++ b/openml/_api/resources/base.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.http import HTTPClient + + +class ResourceAPI: + def __init__(self, http: HTTPClient): + self._http = http + + +class DatasetsAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, id: int) -> dict: ... + + +class TasksAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, id: int) -> dict: ... diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py new file mode 100644 index 000000000..cd1bb595a --- /dev/null +++ b/openml/_api/resources/datasets.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from openml._api.resources.base import DatasetsAPI + + +class DatasetsV1(DatasetsAPI): + def get(self, id): + pass + + +class DatasetsV2(DatasetsAPI): + def get(self, id): + pass diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py new file mode 100644 index 000000000..b0e9afbf8 --- /dev/null +++ b/openml/_api/resources/tasks.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import xmltodict + +from openml._api.resources.base import TasksAPI +from openml.tasks.task import ( + OpenMLClassificationTask, + OpenMLClusteringTask, + OpenMLLearningCurveTask, + OpenMLRegressionTask, + OpenMLTask, + TaskType, +) + + +class TasksV1(TasksAPI): + def get(self, id, return_response=False): + path = f"task/{id}" + response = self._http.get(path) + xml_content = response.content + task = self._create_task_from_xml(xml_content) + + if return_response: + return task, response + + return task + + def _create_task_from_xml(self, xml: str) -> OpenMLTask: + """Create a task given a xml string. + + Parameters + ---------- + xml : string + Task xml representation. + + Returns + ------- + OpenMLTask + """ + dic = xmltodict.parse(xml)["oml:task"] + estimation_parameters = {} + inputs = {} + # Due to the unordered structure we obtain, we first have to extract + # the possible keys of oml:input; dic["oml:input"] is a list of + # OrderedDicts + + # Check if there is a list of inputs + if isinstance(dic["oml:input"], list): + for input_ in dic["oml:input"]: + name = input_["@name"] + inputs[name] = input_ + # Single input case + elif isinstance(dic["oml:input"], dict): + name = dic["oml:input"]["@name"] + inputs[name] = dic["oml:input"] + + evaluation_measures = None + if "evaluation_measures" in inputs: + evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][ + "oml:evaluation_measure" + ] + + task_type = TaskType(int(dic["oml:task_type_id"])) + common_kwargs = { + "task_id": dic["oml:task_id"], + "task_type": dic["oml:task_type"], + "task_type_id": task_type, + "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], + "evaluation_measure": evaluation_measures, + } + # TODO: add OpenMLClusteringTask? + if task_type in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ): + # Convert some more parameters + for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ + "oml:parameter" + ]: + name = parameter["@name"] + text = parameter.get("#text", "") + estimation_parameters[name] = text + + common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:type"] + common_kwargs["estimation_procedure_id"] = int( + inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"] + ) + + common_kwargs["estimation_parameters"] = estimation_parameters + common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][ + "oml:target_feature" + ] + common_kwargs["data_splits_url"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:data_splits_url"] + + cls = { + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }.get(task_type) + if cls is None: + raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") + return cls(**common_kwargs) # type: ignore + + +class TasksV2(TasksAPI): + def get(self, id): + pass diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py new file mode 100644 index 000000000..80f35587c --- /dev/null +++ b/openml/_api/runtime/core.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from openml._api.config import ( + API_V1_SERVER, + API_V2_SERVER, +) +from openml._api.http.client import HTTPClient +from openml._api.resources import ( + DatasetsV1, + DatasetsV2, + TasksV1, + TasksV2, +) +from openml._api.runtime.fallback import FallbackProxy + + +class APIBackend: + def __init__(self, *, datasets, tasks): + self.datasets = datasets + self.tasks = tasks + + +def build_backend(version: str, strict: bool) -> APIBackend: + v1_http = HTTPClient(API_V1_SERVER) + v2_http = HTTPClient(API_V2_SERVER) + + v1 = APIBackend( + datasets=DatasetsV1(v1_http), + tasks=TasksV1(v1_http), + ) + + if version == "v1": + return v1 + + v2 = APIBackend( + datasets=DatasetsV2(v2_http), + tasks=TasksV2(v2_http), + ) + + if strict: + return v2 + + return APIBackend( + datasets=FallbackProxy(v2.datasets, v1.datasets), + tasks=FallbackProxy(v2.tasks, v1.tasks), + ) + + +class APIContext: + def __init__(self): + self._backend = build_backend("v1", strict=False) + + def set_version(self, version: str, strict: bool = False): + self._backend = build_backend(version, strict) + + @property + def backend(self): + return self._backend diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py new file mode 100644 index 000000000..56e96a966 --- /dev/null +++ b/openml/_api/runtime/fallback.py @@ -0,0 +1,5 @@ +from __future__ import annotations + + +class FallbackProxy: + pass diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index d2bf5e946..91be65965 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -12,6 +12,7 @@ import openml._api_calls import openml.utils +from openml._api import api_context from openml.datasets import get_dataset from openml.exceptions import OpenMLCacheException @@ -442,11 +443,12 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get") + task, response = api_context.backend.tasks.get(task_id, return_response=True) with xml_file.open("w", encoding="utf8") as fh: - fh.write(task_xml) - return _create_task_from_xml(task_xml) + fh.write(response.text) + + return task def _create_task_from_xml(xml: str) -> OpenMLTask: From 52ef37999fad8509e5e85b8512e442bd9dc69e04 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 5 Jan 2026 12:48:58 +0500 Subject: [PATCH 02/86] fix pre-commit --- openml/_api/__init__.py | 2 +- openml/_api/http/__init__.py | 2 ++ openml/_api/http/client.py | 32 +++++++++++++++++++++++-------- openml/_api/resources/__init__.py | 2 ++ openml/_api/resources/base.py | 13 +++++++++++-- openml/_api/resources/datasets.py | 15 +++++++++++---- openml/_api/resources/tasks.py | 25 +++++++++++++++++++----- openml/_api/runtime/__init__.py | 0 openml/_api/runtime/core.py | 23 +++++++++++----------- openml/_api/runtime/fallback.py | 9 ++++++++- openml/tasks/functions.py | 12 ++++++++---- 11 files changed, 99 insertions(+), 36 deletions(-) create mode 100644 openml/_api/runtime/__init__.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 5089f94dd..881f40671 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -1,7 +1,7 @@ from openml._api.runtime.core import APIContext -def set_api_version(version: str, strict=False): +def set_api_version(version: str, *, strict: bool = False) -> None: api_context.set_version(version=version, strict=strict) diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py index fde2a5b0a..8e6d1e4ce 100644 --- a/openml/_api/http/__init__.py +++ b/openml/_api/http/__init__.py @@ -1 +1,3 @@ from openml._api.http.client import HTTPClient + +__all__ = ["HTTPClient"] diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index 81a9213e3..dea5de809 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,23 +1,39 @@ from __future__ import annotations +from typing import Any, Mapping + import requests +from requests import Response from openml.__version__ import __version__ class HTTPClient: - def __init__(self, base_url: str): + def __init__(self, base_url: str) -> None: self.base_url = base_url - self.headers = {"user-agent": f"openml-python/{__version__}"} + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - def get(self, path, params=None): + def get( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: url = f"{self.base_url}/{path}" - return requests.get(url, params=params, headers=self.headers) + return requests.get(url, params=params, headers=self.headers, timeout=10) - def post(self, path, data=None, files=None): + def post( + self, + path: str, + data: Mapping[str, Any] | None = None, + files: Any = None, + ) -> Response: url = f"{self.base_url}/{path}" - return requests.post(url, data=data, files=files, headers=self.headers) + return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) - def delete(self, path, params=None): + def delete( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: url = f"{self.base_url}/{path}" - return requests.delete(url, params=params, headers=self.headers) + return requests.delete(url, params=params, headers=self.headers, timeout=10) diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index 078fc5998..b1af3c1a8 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,2 +1,4 @@ from openml._api.resources.datasets import DatasetsV1, DatasetsV2 from openml._api.resources.tasks import TasksV1, TasksV2 + +__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"] diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py index 1fae27665..6fbf8977d 100644 --- a/openml/_api/resources/base.py +++ b/openml/_api/resources/base.py @@ -4,7 +4,11 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + from requests import Response + from openml._api.http import HTTPClient + from openml.datasets.dataset import OpenMLDataset + from openml.tasks.task import OpenMLTask class ResourceAPI: @@ -14,9 +18,14 @@ def __init__(self, http: HTTPClient): class DatasetsAPI(ResourceAPI, ABC): @abstractmethod - def get(self, id: int) -> dict: ... + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... class TasksAPI(ResourceAPI, ABC): @abstractmethod - def get(self, id: int) -> dict: ... + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py index cd1bb595a..9ff1ec278 100644 --- a/openml/_api/resources/datasets.py +++ b/openml/_api/resources/datasets.py @@ -1,13 +1,20 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from openml._api.resources.base import DatasetsAPI +if TYPE_CHECKING: + from responses import Response + + from openml.datasets.dataset import OpenMLDataset + class DatasetsV1(DatasetsAPI): - def get(self, id): - pass + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError class DatasetsV2(DatasetsAPI): - def get(self, id): - pass + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index b0e9afbf8..f494fb9a3 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import xmltodict from openml._api.resources.base import TasksAPI @@ -12,12 +14,20 @@ TaskType, ) +if TYPE_CHECKING: + from requests import Response + class TasksV1(TasksAPI): - def get(self, id, return_response=False): - path = f"task/{id}" + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + path = f"task/{task_id}" response = self._http.get(path) - xml_content = response.content + xml_content = response.text task = self._create_task_from_xml(xml_content) if return_response: @@ -109,5 +119,10 @@ def _create_task_from_xml(self, xml: str) -> OpenMLTask: class TasksV2(TasksAPI): - def get(self, id): - pass + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + raise NotImplementedError diff --git a/openml/_api/runtime/__init__.py b/openml/_api/runtime/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 80f35587c..aa09a69db 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from openml._api.config import ( API_V1_SERVER, API_V2_SERVER, @@ -11,16 +13,18 @@ TasksV1, TasksV2, ) -from openml._api.runtime.fallback import FallbackProxy + +if TYPE_CHECKING: + from openml._api.resources.base import DatasetsAPI, TasksAPI class APIBackend: - def __init__(self, *, datasets, tasks): + def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): self.datasets = datasets self.tasks = tasks -def build_backend(version: str, strict: bool) -> APIBackend: +def build_backend(version: str, *, strict: bool) -> APIBackend: v1_http = HTTPClient(API_V1_SERVER) v2_http = HTTPClient(API_V2_SERVER) @@ -40,19 +44,16 @@ def build_backend(version: str, strict: bool) -> APIBackend: if strict: return v2 - return APIBackend( - datasets=FallbackProxy(v2.datasets, v1.datasets), - tasks=FallbackProxy(v2.tasks, v1.tasks), - ) + return v1 class APIContext: - def __init__(self): + def __init__(self) -> None: self._backend = build_backend("v1", strict=False) - def set_version(self, version: str, strict: bool = False): - self._backend = build_backend(version, strict) + def set_version(self, version: str, *, strict: bool = False) -> None: + self._backend = build_backend(version=version, strict=strict) @property - def backend(self): + def backend(self) -> APIBackend: return self._backend diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py index 56e96a966..1bc99d270 100644 --- a/openml/_api/runtime/fallback.py +++ b/openml/_api/runtime/fallback.py @@ -1,5 +1,12 @@ from __future__ import annotations +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.resources.base import ResourceAPI + class FallbackProxy: - pass + def __init__(self, primary: ResourceAPI, fallback: ResourceAPI): + self._primary = primary + self._fallback = fallback diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index ef67f75bf..a794ad56d 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -445,10 +445,14 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - task, response = api_context.backend.tasks.get(task_id, return_response=True) - - with xml_file.open("w", encoding="utf8") as fh: - fh.write(response.text) + result = api_context.backend.tasks.get(task_id, return_response=True) + + if isinstance(result, tuple): + task, response = result + with xml_file.open("w", encoding="utf8") as fh: + fh.write(response.text) + else: + task = result return task From 5dfcbce55a027d19cd502ea7bb3d521c2b1bca29 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 7 Jan 2026 22:14:31 +0500 Subject: [PATCH 03/86] refactor --- openml/_api/config.py | 62 +++++++++++++++++++++++++++++++++++-- openml/_api/http/client.py | 18 +++++++---- openml/_api/runtime/core.py | 9 ++---- 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index bd93c3cad..1431f66b1 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -1,5 +1,61 @@ from __future__ import annotations -API_V1_SERVER = "https://www.openml.org/api/v1/xml" -API_V2_SERVER = "http://127.0.0.1:8001" -API_KEY = "..." +from dataclasses import dataclass +from typing import Literal + +DelayMethod = Literal["human", "robot"] + + +@dataclass +class APIConfig: + server: str + base_url: str + key: str + + +@dataclass +class APISettings: + v1: APIConfig + v2: APIConfig + + +@dataclass +class ConnectionConfig: + retries: int = 3 + delay_method: DelayMethod = "human" + delay_time: int = 1 # seconds + + def __post_init__(self) -> None: + if self.delay_method not in ("human", "robot"): + raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}") + + +@dataclass +class CacheConfig: + dir: str = "~/.openml/cache" + ttl: int = 60 * 60 * 24 * 7 # one week + + +@dataclass +class Settings: + api: APISettings + connection: ConnectionConfig + cache: CacheConfig + + +settings = Settings( + api=APISettings( + v1=APIConfig( + server="https://www.openml.org/", + base_url="api/v1/xml/", + key="...", + ), + v2=APIConfig( + server="http://127.0.0.1:8001/", + base_url="", + key="...", + ), + ), + connection=ConnectionConfig(), + cache=CacheConfig(), +) diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index dea5de809..74e08c709 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,24 +1,30 @@ from __future__ import annotations -from typing import Any, Mapping +from typing import TYPE_CHECKING, Any, Mapping import requests from requests import Response from openml.__version__ import __version__ +if TYPE_CHECKING: + from openml._api.config import APIConfig + class HTTPClient: - def __init__(self, base_url: str) -> None: - self.base_url = base_url + def __init__(self, config: APIConfig) -> None: + self.config = config self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + def _create_url(self, path: str) -> str: + return self.config.server + self.config.base_url + path + def get( self, path: str, params: Mapping[str, Any] | None = None, ) -> Response: - url = f"{self.base_url}/{path}" + url = self._create_url(path) return requests.get(url, params=params, headers=self.headers, timeout=10) def post( @@ -27,7 +33,7 @@ def post( data: Mapping[str, Any] | None = None, files: Any = None, ) -> Response: - url = f"{self.base_url}/{path}" + url = self._create_url(path) return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) def delete( @@ -35,5 +41,5 @@ def delete( path: str, params: Mapping[str, Any] | None = None, ) -> Response: - url = f"{self.base_url}/{path}" + url = self._create_url(path) return requests.delete(url, params=params, headers=self.headers, timeout=10) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index aa09a69db..98b587411 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -2,10 +2,7 @@ from typing import TYPE_CHECKING -from openml._api.config import ( - API_V1_SERVER, - API_V2_SERVER, -) +from openml._api.config import settings from openml._api.http.client import HTTPClient from openml._api.resources import ( DatasetsV1, @@ -25,8 +22,8 @@ def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): def build_backend(version: str, *, strict: bool) -> APIBackend: - v1_http = HTTPClient(API_V1_SERVER) - v2_http = HTTPClient(API_V2_SERVER) + v1_http = HTTPClient(config=settings.api.v1) + v2_http = HTTPClient(config=settings.api.v2) v1 = APIBackend( datasets=DatasetsV1(v1_http), From 2acbe9992cf95bfc103ff4fa0c360a58c1842870 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 7 Jan 2026 22:24:03 +0500 Subject: [PATCH 04/86] implement cache_dir --- openml/_api/http/client.py | 74 +++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index 74e08c709..49b05c88e 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,36 +1,93 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Mapping +from pathlib import Path +from typing import TYPE_CHECKING, Any +from urllib.parse import urlencode, urljoin, urlparse import requests from requests import Response from openml.__version__ import __version__ +from openml._api.config import settings if TYPE_CHECKING: from openml._api.config import APIConfig -class HTTPClient: +class CacheMixin: + @property + def dir(self) -> str: + return settings.cache.dir + + @property + def ttl(self) -> int: + return settings.cache.ttl + + def _get_cache_directory(self, url: str, params: dict[str, Any]) -> Path: + parsed_url = urlparse(url) + netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain + path_parts = parsed_url.path.strip("/").split("/") + + # remove api_key and serialize params if any + filtered_params = {k: v for k, v in params.items() if k != "api_key"} + params_part = [urlencode(filtered_params)] if filtered_params else [] + + return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) + + def _get_cache_response(self, url: str, params: dict[str, Any]) -> Response | None: # noqa: ARG002 + return None + + def _set_cache_response(self, url: str, params: dict[str, Any], response: Response) -> None: # noqa: ARG002 + return None + + +class HTTPClient(CacheMixin): def __init__(self, config: APIConfig) -> None: self.config = config self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - def _create_url(self, path: str) -> str: - return self.config.server + self.config.base_url + path + @property + def server(self) -> str: + return self.config.server + + @property + def base_url(self) -> str: + return self.config.base_url + + def _create_url(self, path: str) -> Any: + return urljoin(self.server, urljoin(self.base_url, path)) def get( self, path: str, - params: Mapping[str, Any] | None = None, + *, + params: dict[str, Any] | None = None, + use_cache: bool = False, + use_api_key: bool = False, ) -> Response: url = self._create_url(path) - return requests.get(url, params=params, headers=self.headers, timeout=10) + params = dict(params) if params is not None else {} + + if use_api_key: + params["api_key"] = self.config.key + + if use_cache: + response = self._get_cache_response(url, params) + if response: + return response + + response = requests.get(url, params=params, headers=self.headers, timeout=10) + + if use_cache: + self._set_cache_response(url, params, response) + + return response def post( self, path: str, - data: Mapping[str, Any] | None = None, + *, + data: dict[str, Any] | None = None, files: Any = None, ) -> Response: url = self._create_url(path) @@ -39,7 +96,8 @@ def post( def delete( self, path: str, - params: Mapping[str, Any] | None = None, + *, + params: dict[str, Any] | None = None, ) -> Response: url = self._create_url(path) return requests.delete(url, params=params, headers=self.headers, timeout=10) From af99880a9e16a49833c63084c9e9267c112b6b91 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 7 Jan 2026 23:42:17 +0500 Subject: [PATCH 05/86] refactor --- openml/_api/config.py | 1 + openml/_api/http/client.py | 100 +++++++++++++++++++++++++++---------- 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 1431f66b1..848fe8da1 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -11,6 +11,7 @@ class APIConfig: server: str base_url: str key: str + timeout: int = 10 # seconds @dataclass diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index 49b05c88e..a90e93933 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -23,7 +23,7 @@ def dir(self) -> str: def ttl(self) -> int: return settings.cache.ttl - def _get_cache_directory(self, url: str, params: dict[str, Any]) -> Path: + def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path: parsed_url = urlparse(url) netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain path_parts = parsed_url.path.strip("/").split("/") @@ -34,10 +34,10 @@ def _get_cache_directory(self, url: str, params: dict[str, Any]) -> Path: return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) - def _get_cache_response(self, url: str, params: dict[str, Any]) -> Response | None: # noqa: ARG002 - return None + def _get_cache_response(self, cache_dir: Path) -> Response: # noqa: ARG002 + return Response() - def _set_cache_response(self, url: str, params: dict[str, Any], response: Response) -> None: # noqa: ARG002 + def _set_cache_response(self, cache_dir: Path, response: Response) -> None: # noqa: ARG002 return None @@ -54,50 +54,98 @@ def server(self) -> str: def base_url(self) -> str: return self.config.base_url - def _create_url(self, path: str) -> Any: - return urljoin(self.server, urljoin(self.base_url, path)) + @property + def key(self) -> str: + return self.config.key - def get( + @property + def timeout(self) -> int: + return self.config.timeout + + def request( self, + method: str, path: str, *, - params: dict[str, Any] | None = None, use_cache: bool = False, use_api_key: bool = False, + **request_kwargs: Any, ) -> Response: - url = self._create_url(path) - params = dict(params) if params is not None else {} + url = urljoin(self.server, urljoin(self.base_url, path)) + params = request_kwargs.pop("params", {}) + params = params.copy() if use_api_key: - params["api_key"] = self.config.key + params["api_key"] = self.key - if use_cache: - response = self._get_cache_response(url, params) - if response: - return response + headers = request_kwargs.pop("headers", {}) + headers = headers.copy() + headers.update(self.headers) + + timeout = request_kwargs.pop("timeout", self.timeout) + cache_dir = self._get_cache_dir(url, params) - response = requests.get(url, params=params, headers=self.headers, timeout=10) + if use_cache: + try: + return self._get_cache_response(cache_dir) + # TODO: handle ttl expired error + except Exception: + raise + + response = requests.request( + method=method, + url=url, + params=params, + headers=headers, + timeout=timeout, + **request_kwargs, + ) if use_cache: - self._set_cache_response(url, params, response) + self._set_cache_response(cache_dir, response) return response - def post( + def get( self, path: str, *, - data: dict[str, Any] | None = None, - files: Any = None, + use_cache: bool = False, + use_api_key: bool = False, + **request_kwargs: Any, ) -> Response: - url = self._create_url(path) - return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) + # TODO: remove override when cache is implemented + use_cache = False + return self.request( + method="GET", + path=path, + use_cache=use_cache, + use_api_key=use_api_key, + **request_kwargs, + ) + + def post( + self, + path: str, + **request_kwargs: Any, + ) -> Response: + return self.request( + method="POST", + path=path, + use_cache=False, + use_api_key=True, + **request_kwargs, + ) def delete( self, path: str, - *, - params: dict[str, Any] | None = None, + **request_kwargs: Any, ) -> Response: - url = self._create_url(path) - return requests.delete(url, params=params, headers=self.headers, timeout=10) + return self.request( + method="DELETE", + path=path, + use_cache=False, + use_api_key=True, + **request_kwargs, + ) From 4c75e16890a76d8fbc0ddc125a267d23ddaded44 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 15 Jan 2026 14:51:22 +0500 Subject: [PATCH 06/86] undo changes in tasks/functions.py --- openml/tasks/functions.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index a794ad56d..e9b879ae4 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -12,7 +12,6 @@ import openml._api_calls import openml.utils -from openml._api import api_context from openml.datasets import get_dataset from openml.exceptions import OpenMLCacheException @@ -445,16 +444,11 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - result = api_context.backend.tasks.get(task_id, return_response=True) + task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get") - if isinstance(result, tuple): - task, response = result - with xml_file.open("w", encoding="utf8") as fh: - fh.write(response.text) - else: - task = result - - return task + with xml_file.open("w", encoding="utf8") as fh: + fh.write(task_xml) + return _create_task_from_xml(task_xml) def _create_task_from_xml(xml: str) -> OpenMLTask: From c6033832e8008d0d8f94fa196d519e35f24030c3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 21 Jan 2026 10:47:26 +0500 Subject: [PATCH 07/86] add tests directory --- tests/test_api/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/test_api/__init__.py diff --git a/tests/test_api/__init__.py b/tests/test_api/__init__.py new file mode 100644 index 000000000..e69de29bb From ff6a8b05314e74bba7ad64388304a3708f83dbf0 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 21 Jan 2026 11:40:23 +0500 Subject: [PATCH 08/86] use enum for delay method --- openml/_api/config.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 848fe8da1..13063df7a 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -1,9 +1,12 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Literal +from enum import Enum -DelayMethod = Literal["human", "robot"] + +class DelayMethod(str, Enum): + HUMAN = "human" + ROBOT = "robot" @dataclass @@ -23,13 +26,9 @@ class APISettings: @dataclass class ConnectionConfig: retries: int = 3 - delay_method: DelayMethod = "human" + delay_method: DelayMethod = DelayMethod.HUMAN delay_time: int = 1 # seconds - def __post_init__(self) -> None: - if self.delay_method not in ("human", "robot"): - raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}") - @dataclass class CacheConfig: From f01898fe88b397b0c981398650664e3ecb3f9b08 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 21 Jan 2026 11:41:33 +0500 Subject: [PATCH 09/86] implement cache --- openml/_api/http/client.py | 76 ++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index a90e93933..f76efe5a1 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,5 +1,7 @@ from __future__ import annotations +import json +import time from pathlib import Path from typing import TYPE_CHECKING, Any from urllib.parse import urlencode, urljoin, urlparse @@ -34,11 +36,70 @@ def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path: return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) - def _get_cache_response(self, cache_dir: Path) -> Response: # noqa: ARG002 - return Response() + def _get_cache_response(self, cache_dir: Path) -> Response: + if not cache_dir.exists(): + raise FileNotFoundError(f"Cache directory not found: {cache_dir}") - def _set_cache_response(self, cache_dir: Path, response: Response) -> None: # noqa: ARG002 - return None + meta_path = cache_dir / "meta.json" + headers_path = cache_dir / "headers.json" + body_path = cache_dir / "body.bin" + + if not (meta_path.exists() and headers_path.exists() and body_path.exists()): + raise FileNotFoundError(f"Incomplete cache at {cache_dir}") + + with meta_path.open("r", encoding="utf-8") as f: + meta = json.load(f) + + created_at = meta.get("created_at") + if created_at is None: + raise ValueError("Cache metadata missing 'created_at'") + + if time.time() - created_at > self.ttl: + raise TimeoutError(f"Cache expired for {cache_dir}") + + with headers_path.open("r", encoding="utf-8") as f: + headers = json.load(f) + + body = body_path.read_bytes() + + response = Response() + response.status_code = meta["status_code"] + response.url = meta["url"] + response.reason = meta["reason"] + response.headers = headers + response._content = body + response.encoding = meta["encoding"] + + return response + + def _set_cache_response(self, cache_dir: Path, response: Response) -> None: + cache_dir.mkdir(parents=True, exist_ok=True) + + # body + (cache_dir / "body.bin").write_bytes(response.content) + + # headers + with (cache_dir / "headers.json").open("w", encoding="utf-8") as f: + json.dump(dict(response.headers), f) + + # meta + meta = { + "status_code": response.status_code, + "url": response.url, + "reason": response.reason, + "encoding": response.encoding, + "elapsed": response.elapsed.total_seconds(), + "created_at": time.time(), + "request": { + "method": response.request.method if response.request else None, + "url": response.request.url if response.request else None, + "headers": dict(response.request.headers) if response.request else None, + "body": response.request.body if response.request else None, + }, + } + + with (cache_dir / "meta.json").open("w", encoding="utf-8") as f: + json.dump(meta, f) class HTTPClient(CacheMixin): @@ -88,7 +149,10 @@ def request( if use_cache: try: return self._get_cache_response(cache_dir) - # TODO: handle ttl expired error + except FileNotFoundError: + pass + except TimeoutError: + pass except Exception: raise @@ -114,8 +178,6 @@ def get( use_api_key: bool = False, **request_kwargs: Any, ) -> Response: - # TODO: remove override when cache is implemented - use_cache = False return self.request( method="GET", path=path, From 5c4511e60b0bc50aba2509bc48bb931082b0caf5 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 21 Jan 2026 13:36:05 +0500 Subject: [PATCH 10/86] refactor clients --- openml/_api/clients/__init__.py | 6 + .../_api/{http/client.py => clients/http.py} | 126 +++++++++--------- .../_api/{http/utils.py => clients/minio.py} | 0 openml/_api/config.py | 6 +- openml/_api/http/__init__.py | 3 - openml/_api/runtime/core.py | 37 ++++- 6 files changed, 101 insertions(+), 77 deletions(-) create mode 100644 openml/_api/clients/__init__.py rename openml/_api/{http/client.py => clients/http.py} (61%) rename openml/_api/{http/utils.py => clients/minio.py} (100%) delete mode 100644 openml/_api/http/__init__.py diff --git a/openml/_api/clients/__init__.py b/openml/_api/clients/__init__.py new file mode 100644 index 000000000..8a5ff94e4 --- /dev/null +++ b/openml/_api/clients/__init__.py @@ -0,0 +1,6 @@ +from .http import HTTPCache, HTTPClient + +__all__ = [ + "HTTPCache", + "HTTPClient", +] diff --git a/openml/_api/http/client.py b/openml/_api/clients/http.py similarity index 61% rename from openml/_api/http/client.py rename to openml/_api/clients/http.py index f76efe5a1..4e126ee92 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/clients/http.py @@ -10,42 +10,41 @@ from requests import Response from openml.__version__ import __version__ -from openml._api.config import settings if TYPE_CHECKING: - from openml._api.config import APIConfig + from openml._api.config import DelayMethod -class CacheMixin: - @property - def dir(self) -> str: - return settings.cache.dir +class HTTPCache: + def __init__(self, *, path: Path, ttl: int) -> None: + self.path = path + self.ttl = ttl - @property - def ttl(self) -> int: - return settings.cache.ttl - - def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path: + def get_key(self, url: str, params: dict[str, Any]) -> str: parsed_url = urlparse(url) - netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain + netloc_parts = parsed_url.netloc.split(".")[::-1] path_parts = parsed_url.path.strip("/").split("/") - # remove api_key and serialize params if any filtered_params = {k: v for k, v in params.items() if k != "api_key"} params_part = [urlencode(filtered_params)] if filtered_params else [] - return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) + return str(Path(*netloc_parts, *path_parts, *params_part)) + + def _key_to_path(self, key: str) -> Path: + return self.path.joinpath(key) + + def load(self, key: str) -> Response: + path = self._key_to_path(key) - def _get_cache_response(self, cache_dir: Path) -> Response: - if not cache_dir.exists(): - raise FileNotFoundError(f"Cache directory not found: {cache_dir}") + if not path.exists(): + raise FileNotFoundError(f"Cache directory not found: {path}") - meta_path = cache_dir / "meta.json" - headers_path = cache_dir / "headers.json" - body_path = cache_dir / "body.bin" + meta_path = path / "meta.json" + headers_path = path / "headers.json" + body_path = path / "body.bin" if not (meta_path.exists() and headers_path.exists() and body_path.exists()): - raise FileNotFoundError(f"Incomplete cache at {cache_dir}") + raise FileNotFoundError(f"Incomplete cache at {path}") with meta_path.open("r", encoding="utf-8") as f: meta = json.load(f) @@ -55,7 +54,7 @@ def _get_cache_response(self, cache_dir: Path) -> Response: raise ValueError("Cache metadata missing 'created_at'") if time.time() - created_at > self.ttl: - raise TimeoutError(f"Cache expired for {cache_dir}") + raise TimeoutError(f"Cache expired for {path}") with headers_path.open("r", encoding="utf-8") as f: headers = json.load(f) @@ -72,17 +71,15 @@ def _get_cache_response(self, cache_dir: Path) -> Response: return response - def _set_cache_response(self, cache_dir: Path, response: Response) -> None: - cache_dir.mkdir(parents=True, exist_ok=True) + def save(self, key: str, response: Response) -> None: + path = self._key_to_path(key) + path.mkdir(parents=True, exist_ok=True) - # body - (cache_dir / "body.bin").write_bytes(response.content) + (path / "body.bin").write_bytes(response.content) - # headers - with (cache_dir / "headers.json").open("w", encoding="utf-8") as f: + with (path / "headers.json").open("w", encoding="utf-8") as f: json.dump(dict(response.headers), f) - # meta meta = { "status_code": response.status_code, "url": response.url, @@ -98,30 +95,33 @@ def _set_cache_response(self, cache_dir: Path, response: Response) -> None: }, } - with (cache_dir / "meta.json").open("w", encoding="utf-8") as f: + with (path / "meta.json").open("w", encoding="utf-8") as f: json.dump(meta, f) -class HTTPClient(CacheMixin): - def __init__(self, config: APIConfig) -> None: - self.config = config - self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - - @property - def server(self) -> str: - return self.config.server - - @property - def base_url(self) -> str: - return self.config.base_url - - @property - def key(self) -> str: - return self.config.key +class HTTPClient: + def __init__( # noqa: PLR0913 + self, + *, + server: str, + base_url: str, + api_key: str, + timeout: int, + retries: int, + delay_method: DelayMethod, + delay_time: int, + cache: HTTPCache | None = None, + ) -> None: + self.server = server + self.base_url = base_url + self.api_key = api_key + self.timeout = timeout + self.retries = retries + self.delay_method = delay_method + self.delay_time = delay_time + self.cache = cache - @property - def timeout(self) -> int: - return self.config.timeout + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} def request( self, @@ -134,27 +134,25 @@ def request( ) -> Response: url = urljoin(self.server, urljoin(self.base_url, path)) - params = request_kwargs.pop("params", {}) - params = params.copy() + # prepare params + params = request_kwargs.pop("params", {}).copy() if use_api_key: - params["api_key"] = self.key + params["api_key"] = self.api_key - headers = request_kwargs.pop("headers", {}) - headers = headers.copy() + # prepare headers + headers = request_kwargs.pop("headers", {}).copy() headers.update(self.headers) timeout = request_kwargs.pop("timeout", self.timeout) - cache_dir = self._get_cache_dir(url, params) - if use_cache: + if use_cache and self.cache is not None: + cache_key = self.cache.get_key(url, params) try: - return self._get_cache_response(cache_dir) - except FileNotFoundError: - pass - except TimeoutError: - pass + return self.cache.load(cache_key) + except (FileNotFoundError, TimeoutError): + pass # cache miss or expired, continue except Exception: - raise + raise # propagate unexpected cache errors response = requests.request( method=method, @@ -165,8 +163,8 @@ def request( **request_kwargs, ) - if use_cache: - self._set_cache_response(cache_dir, response) + if use_cache and self.cache is not None: + self.cache.save(cache_key, response) return response diff --git a/openml/_api/http/utils.py b/openml/_api/clients/minio.py similarity index 100% rename from openml/_api/http/utils.py rename to openml/_api/clients/minio.py diff --git a/openml/_api/config.py b/openml/_api/config.py index 13063df7a..aa153a556 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -13,7 +13,7 @@ class DelayMethod(str, Enum): class APIConfig: server: str base_url: str - key: str + api_key: str timeout: int = 10 # seconds @@ -48,12 +48,12 @@ class Settings: v1=APIConfig( server="https://www.openml.org/", base_url="api/v1/xml/", - key="...", + api_key="...", ), v2=APIConfig( server="http://127.0.0.1:8001/", base_url="", - key="...", + api_key="...", ), ), connection=ConnectionConfig(), diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py deleted file mode 100644 index 8e6d1e4ce..000000000 --- a/openml/_api/http/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from openml._api.http.client import HTTPClient - -__all__ = ["HTTPClient"] diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 98b587411..483b74d3d 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -1,9 +1,10 @@ from __future__ import annotations +from pathlib import Path from typing import TYPE_CHECKING +from openml._api.clients import HTTPCache, HTTPClient from openml._api.config import settings -from openml._api.http.client import HTTPClient from openml._api.resources import ( DatasetsV1, DatasetsV2, @@ -22,20 +23,42 @@ def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): def build_backend(version: str, *, strict: bool) -> APIBackend: - v1_http = HTTPClient(config=settings.api.v1) - v2_http = HTTPClient(config=settings.api.v2) + http_cache = HTTPCache( + path=Path(settings.cache.dir), + ttl=settings.cache.ttl, + ) + v1_http_client = HTTPClient( + server=settings.api.v1.server, + base_url=settings.api.v1.base_url, + api_key=settings.api.v1.api_key, + timeout=settings.api.v1.timeout, + retries=settings.connection.retries, + delay_method=settings.connection.delay_method, + delay_time=settings.connection.delay_time, + cache=http_cache, + ) + v2_http_client = HTTPClient( + server=settings.api.v2.server, + base_url=settings.api.v2.base_url, + api_key=settings.api.v2.api_key, + timeout=settings.api.v2.timeout, + retries=settings.connection.retries, + delay_method=settings.connection.delay_method, + delay_time=settings.connection.delay_time, + cache=http_cache, + ) v1 = APIBackend( - datasets=DatasetsV1(v1_http), - tasks=TasksV1(v1_http), + datasets=DatasetsV1(v1_http_client), + tasks=TasksV1(v1_http_client), ) if version == "v1": return v1 v2 = APIBackend( - datasets=DatasetsV2(v2_http), - tasks=TasksV2(v2_http), + datasets=DatasetsV2(v2_http_client), + tasks=TasksV2(v2_http_client), ) if strict: From 43276d2ac56ba39d195b5d54d72bed2e61da3f79 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 23 Jan 2026 12:17:53 +0500 Subject: [PATCH 11/86] fix import in resources/base.py --- openml/_api/resources/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py index 6fbf8977d..54b40a0e0 100644 --- a/openml/_api/resources/base.py +++ b/openml/_api/resources/base.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: from requests import Response - from openml._api.http import HTTPClient + from openml._api.clients import HTTPClient from openml.datasets.dataset import OpenMLDataset from openml.tasks.task import OpenMLTask From 1206f697d09df82ed7f18bfea94a476844e01cb4 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 26 Jan 2026 13:52:20 +0500 Subject: [PATCH 12/86] refactor and add exception handling --- openml/_api/clients/http.py | 241 +++++++++++++++++++++++++++++++++--- openml/_api/config.py | 5 +- openml/_api/runtime/core.py | 6 +- 3 files changed, 229 insertions(+), 23 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 4e126ee92..dc184074d 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -1,18 +1,28 @@ from __future__ import annotations import json +import logging +import math +import random import time +import xml +from collections.abc import Mapping from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import Any from urllib.parse import urlencode, urljoin, urlparse import requests +import xmltodict from requests import Response from openml.__version__ import __version__ - -if TYPE_CHECKING: - from openml._api.config import DelayMethod +from openml._api.config import RetryPolicy +from openml.exceptions import ( + OpenMLNotAuthorizedError, + OpenMLServerError, + OpenMLServerException, + OpenMLServerNoResult, +) class HTTPCache: @@ -108,8 +118,7 @@ def __init__( # noqa: PLR0913 api_key: str, timeout: int, retries: int, - delay_method: DelayMethod, - delay_time: int, + retry_policy: RetryPolicy, cache: HTTPCache | None = None, ) -> None: self.server = server @@ -117,12 +126,194 @@ def __init__( # noqa: PLR0913 self.api_key = api_key self.timeout = timeout self.retries = retries - self.delay_method = delay_method - self.delay_time = delay_time + self.retry_policy = retry_policy self.cache = cache + self.retry_func = ( + self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay + ) self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + def _robot_delay(self, n: int) -> float: + wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 + variation = random.gauss(0, wait / 10) + return max(1.0, wait + variation) + + def _human_delay(self, n: int) -> float: + return max(1.0, n) + + def _parse_exception_response( + self, + response: Response, + ) -> tuple[int | None, str]: + content_type = response.headers.get("Content-Type", "").lower() + + if "json" in content_type: + server_exception = response.json() + server_error = server_exception["detail"] + code = server_error.get("code") + message = server_error.get("message") + additional_information = server_error.get("additional_information") + else: + server_exception = xmltodict.parse(response.text) + server_error = server_exception["oml:error"] + code = server_error.get("oml:code") + message = server_error.get("oml:message") + additional_information = server_error.get("oml:additional_information") + + if code is not None: + code = int(code) + + if message and additional_information: + full_message = f"{message} - {additional_information}" + elif message: + full_message = message + elif additional_information: + full_message = additional_information + else: + full_message = "" + + return code, full_message + + def _raise_code_specific_error( + self, + code: int, + message: str, + url: str, + files: Mapping[str, Any] | None, + ) -> None: + if code in [111, 372, 512, 500, 482, 542, 674]: + # 512 for runs, 372 for datasets, 500 for flows + # 482 for tasks, 542 for evaluations, 674 for setups + # 111 for dataset descriptions + raise OpenMLServerNoResult(code=code, message=message, url=url) + + # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow) + if code in [163] and files is not None and "description" in files: + # file_elements['description'] is the XML file description of the flow + message = f"\n{files['description']}\n{message}" + + if code in [ + 102, # flow/exists post + 137, # dataset post + 350, # dataset/42 delete + 310, # flow/ post + 320, # flow/42 delete + 400, # run/42 delete + 460, # task/42 delete + ]: + raise OpenMLNotAuthorizedError( + message=( + f"The API call {url} requires authentication via an API key.\nPlease configure " + "OpenML-Python to use your API as described in this example:" + "\nhttps://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication" + ) + ) + + # Propagate all server errors to the calling functions, except + # for 107 which represents a database connection error. + # These are typically caused by high server load, + # which means trying again might resolve the issue. + # DATABASE_CONNECTION_ERRCODE + if code != 107: + raise OpenMLServerException(code=code, message=message, url=url) + + def _validate_response( + self, + method: str, + url: str, + files: Mapping[str, Any] | None, + response: Response, + ) -> Exception | None: + if ( + "Content-Encoding" not in response.headers + or response.headers["Content-Encoding"] != "gzip" + ): + logging.warning(f"Received uncompressed content from OpenML for {url}.") + + if response.status_code == 200: + return None + + if response.status_code == requests.codes.URI_TOO_LONG: + raise OpenMLServerError(f"URI too long! ({url})") + + retry_raise_e: Exception | None = None + + try: + code, message = self._parse_exception_response(response) + + except (requests.exceptions.JSONDecodeError, xml.parsers.expat.ExpatError) as e: + if method != "GET": + extra = f"Status code: {response.status_code}\n{response.text}" + raise OpenMLServerError( + f"Unexpected server error when calling {url}. Please contact the " + f"developers!\n{extra}" + ) from e + + retry_raise_e = e + + except Exception as e: + # If we failed to parse it out, + # then something has gone wrong in the body we have sent back + # from the server and there is little extra information we can capture. + raise OpenMLServerError( + f"Unexpected server error when calling {url}. Please contact the developers!\n" + f"Status code: {response.status_code}\n{response.text}", + ) from e + + if code is not None: + self._raise_code_specific_error( + code=code, + message=message, + url=url, + files=files, + ) + + if retry_raise_e is None: + retry_raise_e = OpenMLServerException(code=code, message=message, url=url) + + return retry_raise_e + + def _request( # noqa: PLR0913 + self, + method: str, + url: str, + params: Mapping[str, Any], + headers: Mapping[str, str], + timeout: float | int, + files: Mapping[str, Any] | None, + **request_kwargs: Any, + ) -> tuple[Response | None, Exception | None]: + retry_raise_e: Exception | None = None + response: Response | None = None + + try: + response = requests.request( + method=method, + url=url, + params=params, + headers=headers, + timeout=timeout, + files=files, + **request_kwargs, + ) + except ( + requests.exceptions.ChunkedEncodingError, + requests.exceptions.ConnectionError, + requests.exceptions.SSLError, + ) as e: + retry_raise_e = e + + if response is not None: + retry_raise_e = self._validate_response( + method=method, + url=url, + files=files, + response=response, + ) + + return response, retry_raise_e + def request( self, method: str, @@ -133,6 +324,7 @@ def request( **request_kwargs: Any, ) -> Response: url = urljoin(self.server, urljoin(self.base_url, path)) + retries = max(1, self.retries) # prepare params params = request_kwargs.pop("params", {}).copy() @@ -144,6 +336,9 @@ def request( headers.update(self.headers) timeout = request_kwargs.pop("timeout", self.timeout) + files = request_kwargs.pop("files", None) + + use_cache = False if use_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) @@ -154,14 +349,28 @@ def request( except Exception: raise # propagate unexpected cache errors - response = requests.request( - method=method, - url=url, - params=params, - headers=headers, - timeout=timeout, - **request_kwargs, - ) + for retry_counter in range(1, retries + 1): + response, retry_raise_e = self._request( + method=method, + url=url, + params=params, + headers=headers, + timeout=timeout, + files=files, + **request_kwargs, + ) + + # executed successfully + if retry_raise_e is None: + break + # tries completed + if retry_counter >= retries: + raise retry_raise_e + + delay = self.retry_func(retry_counter) + time.sleep(delay) + + assert response is not None if use_cache and self.cache is not None: self.cache.save(cache_key, response) diff --git a/openml/_api/config.py b/openml/_api/config.py index aa153a556..6cce06403 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -4,7 +4,7 @@ from enum import Enum -class DelayMethod(str, Enum): +class RetryPolicy(str, Enum): HUMAN = "human" ROBOT = "robot" @@ -26,8 +26,7 @@ class APISettings: @dataclass class ConnectionConfig: retries: int = 3 - delay_method: DelayMethod = DelayMethod.HUMAN - delay_time: int = 1 # seconds + retry_policy: RetryPolicy = RetryPolicy.HUMAN @dataclass diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 483b74d3d..25f2649ee 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -33,8 +33,7 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: api_key=settings.api.v1.api_key, timeout=settings.api.v1.timeout, retries=settings.connection.retries, - delay_method=settings.connection.delay_method, - delay_time=settings.connection.delay_time, + retry_policy=settings.connection.retry_policy, cache=http_cache, ) v2_http_client = HTTPClient( @@ -43,8 +42,7 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: api_key=settings.api.v2.api_key, timeout=settings.api.v2.timeout, retries=settings.connection.retries, - delay_method=settings.connection.delay_method, - delay_time=settings.connection.delay_time, + retry_policy=settings.connection.retry_policy, cache=http_cache, ) From 4948e991f96821372934c7132f4a695da165d17b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 26 Jan 2026 20:43:32 +0500 Subject: [PATCH 13/86] refactor resources/base/ --- openml/_api/resources/base/__init__.py | 13 ++++++ openml/_api/resources/base/base.py | 41 +++++++++++++++++++ .../resources/{base.py => base/resources.py} | 16 ++++---- openml/_api/resources/base/versions.py | 23 +++++++++++ openml/_api/resources/datasets.py | 6 +-- openml/_api/resources/tasks.py | 6 +-- 6 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 openml/_api/resources/base/__init__.py create mode 100644 openml/_api/resources/base/base.py rename openml/_api/resources/{base.py => base/resources.py} (64%) create mode 100644 openml/_api/resources/base/versions.py diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py new file mode 100644 index 000000000..851cfe942 --- /dev/null +++ b/openml/_api/resources/base/__init__.py @@ -0,0 +1,13 @@ +from openml._api.resources.base.base import APIVersion, ResourceAPI, ResourceType +from openml._api.resources.base.resources import DatasetsAPI, TasksAPI +from openml._api.resources.base.versions import ResourceV1, ResourceV2 + +__all__ = [ + "APIVersion", + "DatasetsAPI", + "ResourceAPI", + "ResourceType", + "ResourceV1", + "ResourceV2", + "TasksAPI", +] diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py new file mode 100644 index 000000000..8d85d054b --- /dev/null +++ b/openml/_api/resources/base/base.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from enum import Enum +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.clients import HTTPClient + + +class APIVersion(str, Enum): + V1 = "v1" + V2 = "v2" + + +class ResourceType(str, Enum): + DATASETS = "datasets" + TASKS = "tasks" + + +class ResourceAPI(ABC): + api_version: APIVersion | None = None + resource_type: ResourceType | None = None + + def __init__(self, http: HTTPClient): + self._http = http + + def _raise_not_implemented_error(self, method_name: str | None = None) -> None: + version = getattr(self.api_version, "name", "Unknown version") + resource = getattr(self.resource_type, "name", "Unknown resource") + method_info = f" Method: {method_name}" if method_name else "" + raise NotImplementedError( + f"{self.__class__.__name__}: {version} API does not support this " + f"functionality for resource: {resource}.{method_info}" + ) + + @abstractmethod + def delete(self) -> None: ... + + @abstractmethod + def publish(self) -> None: ... diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base/resources.py similarity index 64% rename from openml/_api/resources/base.py rename to openml/_api/resources/base/resources.py index 54b40a0e0..edb26c91c 100644 --- a/openml/_api/resources/base.py +++ b/openml/_api/resources/base/resources.py @@ -1,27 +1,27 @@ from __future__ import annotations -from abc import ABC, abstractmethod +from abc import abstractmethod from typing import TYPE_CHECKING +from openml._api.resources.base import ResourceAPI, ResourceType + if TYPE_CHECKING: from requests import Response - from openml._api.clients import HTTPClient from openml.datasets.dataset import OpenMLDataset from openml.tasks.task import OpenMLTask -class ResourceAPI: - def __init__(self, http: HTTPClient): - self._http = http - +class DatasetsAPI(ResourceAPI): + resource_type: ResourceType | None = ResourceType.DATASETS -class DatasetsAPI(ResourceAPI, ABC): @abstractmethod def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... -class TasksAPI(ResourceAPI, ABC): +class TasksAPI(ResourceAPI): + resource_type: ResourceType | None = ResourceType.TASKS + @abstractmethod def get( self, diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py new file mode 100644 index 000000000..8a81517e5 --- /dev/null +++ b/openml/_api/resources/base/versions.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from openml._api.resources.base import APIVersion, ResourceAPI + + +class ResourceV1(ResourceAPI): + api_version: APIVersion | None = APIVersion.V1 + + def delete(self) -> None: + pass + + def publish(self) -> None: + pass + + +class ResourceV2(ResourceAPI): + api_version: APIVersion | None = APIVersion.V2 + + def delete(self) -> None: + self._raise_not_implemented_error("delete") + + def publish(self) -> None: + self._raise_not_implemented_error("publish") diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py index 9ff1ec278..f3a49a84f 100644 --- a/openml/_api/resources/datasets.py +++ b/openml/_api/resources/datasets.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING -from openml._api.resources.base import DatasetsAPI +from openml._api.resources.base import DatasetsAPI, ResourceV1, ResourceV2 if TYPE_CHECKING: from responses import Response @@ -10,11 +10,11 @@ from openml.datasets.dataset import OpenMLDataset -class DatasetsV1(DatasetsAPI): +class DatasetsV1(ResourceV1, DatasetsAPI): def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: raise NotImplementedError -class DatasetsV2(DatasetsAPI): +class DatasetsV2(ResourceV2, DatasetsAPI): def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: raise NotImplementedError diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index f494fb9a3..a7ca39208 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -4,7 +4,7 @@ import xmltodict -from openml._api.resources.base import TasksAPI +from openml._api.resources.base import ResourceV1, ResourceV2, TasksAPI from openml.tasks.task import ( OpenMLClassificationTask, OpenMLClusteringTask, @@ -18,7 +18,7 @@ from requests import Response -class TasksV1(TasksAPI): +class TasksV1(ResourceV1, TasksAPI): def get( self, task_id: int, @@ -118,7 +118,7 @@ def _create_task_from_xml(self, xml: str) -> OpenMLTask: return cls(**common_kwargs) # type: ignore -class TasksV2(TasksAPI): +class TasksV2(ResourceV2, TasksAPI): def get( self, task_id: int, From a3541675fd6452e68f268127df7c583bb9c2d0ca Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 26 Jan 2026 21:06:20 +0500 Subject: [PATCH 14/86] implement delete --- openml/_api/resources/base/base.py | 23 +++++--- openml/_api/resources/base/resources.py | 4 +- openml/_api/resources/base/versions.py | 76 ++++++++++++++++++++++--- 3 files changed, 86 insertions(+), 17 deletions(-) diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 8d85d054b..9b1803508 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -14,28 +14,37 @@ class APIVersion(str, Enum): class ResourceType(str, Enum): - DATASETS = "datasets" - TASKS = "tasks" + DATASET = "dataset" + TASK = "task" + TASK_TYPE = "task_type" + EVALUATION_MEASURE = "evaluation_measure" + ESTIMATION_PROCEDURE = "estimation_procedure" + EVALUATION = "evaluation" + FLOW = "flow" + STUDY = "study" + RUN = "run" + SETUP = "setup" + USER = "user" class ResourceAPI(ABC): - api_version: APIVersion | None = None - resource_type: ResourceType | None = None + api_version: APIVersion + resource_type: ResourceType def __init__(self, http: HTTPClient): self._http = http - def _raise_not_implemented_error(self, method_name: str | None = None) -> None: + def _get_not_implemented_message(self, method_name: str | None = None) -> str: version = getattr(self.api_version, "name", "Unknown version") resource = getattr(self.resource_type, "name", "Unknown resource") method_info = f" Method: {method_name}" if method_name else "" - raise NotImplementedError( + return ( f"{self.__class__.__name__}: {version} API does not support this " f"functionality for resource: {resource}.{method_info}" ) @abstractmethod - def delete(self) -> None: ... + def delete(self, resource_id: int) -> bool: ... @abstractmethod def publish(self) -> None: ... diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index edb26c91c..55cb95c0d 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -13,14 +13,14 @@ class DatasetsAPI(ResourceAPI): - resource_type: ResourceType | None = ResourceType.DATASETS + resource_type: ResourceType = ResourceType.DATASET @abstractmethod def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... class TasksAPI(ResourceAPI): - resource_type: ResourceType | None = ResourceType.TASKS + resource_type: ResourceType = ResourceType.TASK @abstractmethod def get( diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 8a81517e5..ce7b02057 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -1,23 +1,83 @@ from __future__ import annotations -from openml._api.resources.base import APIVersion, ResourceAPI +import xmltodict + +from openml._api.resources.base import APIVersion, ResourceAPI, ResourceType +from openml.exceptions import ( + OpenMLNotAuthorizedError, + OpenMLServerError, + OpenMLServerException, +) class ResourceV1(ResourceAPI): - api_version: APIVersion | None = APIVersion.V1 + api_version: APIVersion = APIVersion.V1 - def delete(self) -> None: - pass + def delete(self, resource_id: int) -> bool: + if self.resource_type == ResourceType.DATASET: + resource_type = "data" + else: + resource_type = self.resource_type.name + + legal_resources = { + "data", + "flow", + "task", + "run", + "study", + "user", + } + if resource_type not in legal_resources: + raise ValueError(f"Can't delete a {resource_type}") + + url_suffix = f"{resource_type}/{resource_id}" + try: + response = self._http.delete(url_suffix) + result = xmltodict.parse(response.content) + return f"oml:{resource_type}_delete" in result + except OpenMLServerException as e: + # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php + # Most exceptions are descriptive enough to be raised as their standard + # OpenMLServerException, however there are two cases where we add information: + # - a generic "failed" message, we direct them to the right issue board + # - when the user successfully authenticates with the server, + # but user is not allowed to take the requested action, + # in which case we specify a OpenMLNotAuthorizedError. + by_other_user = [323, 353, 393, 453, 594] + has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595] + unknown_reason = [325, 355, 394, 455, 593] + if e.code in by_other_user: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted " + "because it was not uploaded by you." + ), + ) from e + if e.code in has_dependent_entities: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because " + f"it still has associated entities: {e.message}" + ), + ) from e + if e.code in unknown_reason: + raise OpenMLServerError( + message=( + f"The {resource_type} can not be deleted for unknown reason," + " please open an issue at: https://github.com/openml/openml/issues/new" + ), + ) from e + raise e def publish(self) -> None: pass class ResourceV2(ResourceAPI): - api_version: APIVersion | None = APIVersion.V2 + api_version: APIVersion = APIVersion.V2 - def delete(self) -> None: - self._raise_not_implemented_error("delete") + def delete(self, resource_id: int) -> bool: + raise NotImplementedError(self._get_not_implemented_message("publish")) def publish(self) -> None: - self._raise_not_implemented_error("publish") + raise NotImplementedError(self._get_not_implemented_message("publish")) From 1fe7e3ed8561945c20e8433603046a35484c37e7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 27 Jan 2026 12:56:35 +0500 Subject: [PATCH 15/86] implement publish and minor refactoring --- openml/_api/clients/http.py | 2 - openml/_api/resources/base/base.py | 15 ++-- openml/_api/resources/base/versions.py | 113 ++++++++++++++++--------- 3 files changed, 82 insertions(+), 48 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index dc184074d..1622087c9 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -338,8 +338,6 @@ def request( timeout = request_kwargs.pop("timeout", self.timeout) files = request_kwargs.pop("files", None) - use_cache = False - if use_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) try: diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 9b1803508..f2d7d1e88 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -5,6 +5,9 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + from collections.abc import Mapping + from typing import Any + from openml._api.clients import HTTPClient @@ -34,6 +37,12 @@ class ResourceAPI(ABC): def __init__(self, http: HTTPClient): self._http = http + @abstractmethod + def delete(self, resource_id: int) -> bool: ... + + @abstractmethod + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: ... + def _get_not_implemented_message(self, method_name: str | None = None) -> str: version = getattr(self.api_version, "name", "Unknown version") resource = getattr(self.resource_type, "name", "Unknown resource") @@ -42,9 +51,3 @@ def _get_not_implemented_message(self, method_name: str | None = None) -> str: f"{self.__class__.__name__}: {version} API does not support this " f"functionality for resource: {resource}.{method_info}" ) - - @abstractmethod - def delete(self, resource_id: int) -> bool: ... - - @abstractmethod - def publish(self) -> None: ... diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index ce7b02057..41f883ebe 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -1,5 +1,8 @@ from __future__ import annotations +from collections.abc import Mapping +from typing import Any + import xmltodict from openml._api.resources.base import APIVersion, ResourceAPI, ResourceType @@ -13,6 +16,11 @@ class ResourceV1(ResourceAPI): api_version: APIVersion = APIVersion.V1 + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: + response = self._http.post(path, files=files) + parsed_response = xmltodict.parse(response.content) + return self._extract_id_from_upload(parsed_response) + def delete(self, resource_id: int) -> bool: if self.resource_type == ResourceType.DATASET: resource_type = "data" @@ -30,54 +38,79 @@ def delete(self, resource_id: int) -> bool: if resource_type not in legal_resources: raise ValueError(f"Can't delete a {resource_type}") - url_suffix = f"{resource_type}/{resource_id}" + path = f"{resource_type}/{resource_id}" try: - response = self._http.delete(url_suffix) + response = self._http.delete(path) result = xmltodict.parse(response.content) return f"oml:{resource_type}_delete" in result except OpenMLServerException as e: - # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php - # Most exceptions are descriptive enough to be raised as their standard - # OpenMLServerException, however there are two cases where we add information: - # - a generic "failed" message, we direct them to the right issue board - # - when the user successfully authenticates with the server, - # but user is not allowed to take the requested action, - # in which case we specify a OpenMLNotAuthorizedError. - by_other_user = [323, 353, 393, 453, 594] - has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595] - unknown_reason = [325, 355, 394, 455, 593] - if e.code in by_other_user: - raise OpenMLNotAuthorizedError( - message=( - f"The {resource_type} can not be deleted " - "because it was not uploaded by you." - ), - ) from e - if e.code in has_dependent_entities: - raise OpenMLNotAuthorizedError( - message=( - f"The {resource_type} can not be deleted because " - f"it still has associated entities: {e.message}" - ), - ) from e - if e.code in unknown_reason: - raise OpenMLServerError( - message=( - f"The {resource_type} can not be deleted for unknown reason," - " please open an issue at: https://github.com/openml/openml/issues/new" - ), - ) from e - raise e - - def publish(self) -> None: - pass + self._handle_delete_exception(resource_type, e) + raise + + def _handle_delete_exception( + self, resource_type: str, exception: OpenMLServerException + ) -> None: + # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php + # Most exceptions are descriptive enough to be raised as their standard + # OpenMLServerException, however there are two cases where we add information: + # - a generic "failed" message, we direct them to the right issue board + # - when the user successfully authenticates with the server, + # but user is not allowed to take the requested action, + # in which case we specify a OpenMLNotAuthorizedError. + by_other_user = [323, 353, 393, 453, 594] + has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595] + unknown_reason = [325, 355, 394, 455, 593] + if exception.code in by_other_user: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because it was not uploaded by you." + ), + ) from exception + if exception.code in has_dependent_entities: + raise OpenMLNotAuthorizedError( + message=( + f"The {resource_type} can not be deleted because " + f"it still has associated entities: {exception.message}" + ), + ) from exception + if exception.code in unknown_reason: + raise OpenMLServerError( + message=( + f"The {resource_type} can not be deleted for unknown reason," + " please open an issue at: https://github.com/openml/openml/issues/new" + ), + ) from exception + raise exception + + def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: + # reads id from + # sample parsed dict: {"oml:openml": {"oml:upload_flow": {"oml:id": "42"}}} + + # xmltodict always gives exactly one root key + ((_, root_value),) = parsed.items() + + if not isinstance(root_value, Mapping): + raise ValueError("Unexpected XML structure") + + # upload node (e.g. oml:upload_task, oml:study_upload, ...) + ((_, upload_value),) = root_value.items() + + if not isinstance(upload_value, Mapping): + raise ValueError("Unexpected upload node structure") + + # ID is the only leaf value + for v in upload_value.values(): + if isinstance(v, (str, int)): + return int(v) + + raise ValueError("No ID found in upload response") class ResourceV2(ResourceAPI): api_version: APIVersion = APIVersion.V2 - def delete(self, resource_id: int) -> bool: + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: raise NotImplementedError(self._get_not_implemented_message("publish")) - def publish(self) -> None: - raise NotImplementedError(self._get_not_implemented_message("publish")) + def delete(self, resource_id: int) -> bool: + raise NotImplementedError(self._get_not_implemented_message("delete")) From 54a3151932e3c50bda983f6d6609a4740e38a0c7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 27 Jan 2026 14:17:40 +0500 Subject: [PATCH 16/86] implement tag/untag --- openml/_api/clients/http.py | 10 +++- openml/_api/resources/base/base.py | 6 +++ openml/_api/resources/base/versions.py | 63 ++++++++++++++++++++------ openml/_api/resources/tasks.py | 4 +- 4 files changed, 67 insertions(+), 16 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 1622087c9..65d7b2248 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -279,6 +279,7 @@ def _request( # noqa: PLR0913 method: str, url: str, params: Mapping[str, Any], + data: Mapping[str, Any], headers: Mapping[str, str], timeout: float | int, files: Mapping[str, Any] | None, @@ -292,6 +293,7 @@ def _request( # noqa: PLR0913 method=method, url=url, params=params, + data=data, headers=headers, timeout=timeout, files=files, @@ -326,11 +328,16 @@ def request( url = urljoin(self.server, urljoin(self.base_url, path)) retries = max(1, self.retries) - # prepare params params = request_kwargs.pop("params", {}).copy() + data = request_kwargs.pop("data", {}).copy() + if use_api_key: params["api_key"] = self.api_key + if method.upper() in {"POST", "PUT", "PATCH"}: + data = {**params, **data} + params = {} + # prepare headers headers = request_kwargs.pop("headers", {}).copy() headers.update(self.headers) @@ -352,6 +359,7 @@ def request( method=method, url=url, params=params, + data=data, headers=headers, timeout=timeout, files=files, diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index f2d7d1e88..63d4c40eb 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -43,6 +43,12 @@ def delete(self, resource_id: int) -> bool: ... @abstractmethod def publish(self, path: str, files: Mapping[str, Any] | None) -> int: ... + @abstractmethod + def tag(self, resource_id: int, tag: str) -> list[str]: ... + + @abstractmethod + def untag(self, resource_id: int, tag: str) -> list[str]: ... + def _get_not_implemented_message(self, method_name: str | None = None) -> str: version = getattr(self.api_version, "name", "Unknown version") resource = getattr(self.resource_type, "name", "Unknown resource") diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 41f883ebe..91c1a8c06 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -22,19 +22,9 @@ def publish(self, path: str, files: Mapping[str, Any] | None) -> int: return self._extract_id_from_upload(parsed_response) def delete(self, resource_id: int) -> bool: - if self.resource_type == ResourceType.DATASET: - resource_type = "data" - else: - resource_type = self.resource_type.name - - legal_resources = { - "data", - "flow", - "task", - "run", - "study", - "user", - } + resource_type = self._get_endpoint_name() + + legal_resources = {"data", "flow", "task", "run", "study", "user"} if resource_type not in legal_resources: raise ValueError(f"Can't delete a {resource_type}") @@ -47,6 +37,47 @@ def delete(self, resource_id: int) -> bool: self._handle_delete_exception(resource_type, e) raise + def tag(self, resource_id: int, tag: str) -> list[str]: + resource_type = self._get_endpoint_name() + + legal_resources = {"data", "task", "flow", "setup", "run"} + if resource_type not in legal_resources: + raise ValueError(f"Can't tag a {resource_type}") + + path = f"{resource_type}/tag" + data = {f"{resource_type}_id": resource_id, "tag": tag} + response = self._http.post(path, data=data) + + main_tag = f"oml:{resource_type}_tag" + parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) + result = parsed_response[main_tag] + tags: list[str] = result.get("oml:tag", []) + + return tags + + def untag(self, resource_id: int, tag: str) -> list[str]: + resource_type = self._get_endpoint_name() + + legal_resources = {"data", "task", "flow", "setup", "run"} + if resource_type not in legal_resources: + raise ValueError(f"Can't tag a {resource_type}") + + path = f"{resource_type}/untag" + data = {f"{resource_type}_id": resource_id, "tag": tag} + response = self._http.post(path, data=data) + + main_tag = f"oml:{resource_type}_untag" + parsed_response = xmltodict.parse(response.content, force_list={"oml:tag"}) + result = parsed_response[main_tag] + tags: list[str] = result.get("oml:tag", []) + + return tags + + def _get_endpoint_name(self) -> str: + if self.resource_type == ResourceType.DATASET: + return "data" + return self.resource_type.name + def _handle_delete_exception( self, resource_type: str, exception: OpenMLServerException ) -> None: @@ -114,3 +145,9 @@ def publish(self, path: str, files: Mapping[str, Any] | None) -> int: def delete(self, resource_id: int) -> bool: raise NotImplementedError(self._get_not_implemented_message("delete")) + + def tag(self, resource_id: int, tag: str) -> list[str]: + raise NotImplementedError(self._get_not_implemented_message("untag")) + + def untag(self, resource_id: int, tag: str) -> list[str]: + raise NotImplementedError(self._get_not_implemented_message("untag")) diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index a7ca39208..295e7a73d 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -26,7 +26,7 @@ def get( return_response: bool = False, ) -> OpenMLTask | tuple[OpenMLTask, Response]: path = f"task/{task_id}" - response = self._http.get(path) + response = self._http.get(path, use_cache=True) xml_content = response.text task = self._create_task_from_xml(xml_content) @@ -125,4 +125,4 @@ def get( *, return_response: bool = False, ) -> OpenMLTask | tuple[OpenMLTask, Response]: - raise NotImplementedError + raise NotImplementedError(self._get_not_implemented_message("get")) From 2b6fe6507b349703060f060f0184169abf5e20de Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 27 Jan 2026 18:31:39 +0500 Subject: [PATCH 17/86] implement fallback --- openml/_api/resources/__init__.py | 3 +- openml/_api/resources/base/__init__.py | 2 + openml/_api/resources/base/fallback.py | 56 ++++++++++++++++++++++++++ openml/_api/runtime/core.py | 8 +++- openml/_api/runtime/fallback.py | 12 ------ 5 files changed, 66 insertions(+), 15 deletions(-) create mode 100644 openml/_api/resources/base/fallback.py delete mode 100644 openml/_api/runtime/fallback.py diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index b1af3c1a8..6c0807e0f 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,4 +1,5 @@ +from openml._api.resources.base.fallback import FallbackProxy from openml._api.resources.datasets import DatasetsV1, DatasetsV2 from openml._api.resources.tasks import TasksV1, TasksV2 -__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"] +__all__ = ["DatasetsV1", "DatasetsV2", "FallbackProxy", "TasksV1", "TasksV2"] diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py index 851cfe942..bddc09b21 100644 --- a/openml/_api/resources/base/__init__.py +++ b/openml/_api/resources/base/__init__.py @@ -1,10 +1,12 @@ from openml._api.resources.base.base import APIVersion, ResourceAPI, ResourceType +from openml._api.resources.base.fallback import FallbackProxy from openml._api.resources.base.resources import DatasetsAPI, TasksAPI from openml._api.resources.base.versions import ResourceV1, ResourceV2 __all__ = [ "APIVersion", "DatasetsAPI", + "FallbackProxy", "ResourceAPI", "ResourceType", "ResourceV1", diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py new file mode 100644 index 000000000..253ee3865 --- /dev/null +++ b/openml/_api/resources/base/fallback.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from collections.abc import Callable +from typing import Any + + +class FallbackProxy: + def __init__(self, *api_versions: Any): + if not api_versions: + raise ValueError("At least one API version must be provided") + self._apis = api_versions + + def __getattr__(self, name: str) -> Any: + api, attr = self._find_attr(name) + if callable(attr): + return self._wrap_callable(name, api, attr) + return attr + + def _find_attr(self, name: str) -> tuple[Any, Any]: + for api in self._apis: + attr = getattr(api, name, None) + if attr is not None: + return api, attr + raise AttributeError(f"{self.__class__.__name__} has no attribute {name}") + + def _wrap_callable( + self, + name: str, + primary_api: Any, + primary_attr: Callable[..., Any], + ) -> Callable[..., Any]: + def wrapper(*args: Any, **kwargs: Any) -> Any: + try: + return primary_attr(*args, **kwargs) + except NotImplementedError: + return self._call_fallbacks(name, primary_api, *args, **kwargs) + + return wrapper + + def _call_fallbacks( + self, + name: str, + skip_api: Any, + *args: Any, + **kwargs: Any, + ) -> Any: + for api in self._apis: + if api is skip_api: + continue + attr = getattr(api, name, None) + if callable(attr): + try: + return attr(*args, **kwargs) + except NotImplementedError: + continue + raise NotImplementedError(f"Could not fallback to any API for method: {name}") diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 25f2649ee..4914179f8 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -8,6 +8,7 @@ from openml._api.resources import ( DatasetsV1, DatasetsV2, + FallbackProxy, TasksV1, TasksV2, ) @@ -17,7 +18,7 @@ class APIBackend: - def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): + def __init__(self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy): self.datasets = datasets self.tasks = tasks @@ -62,7 +63,10 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: if strict: return v2 - return v1 + return APIBackend( + datasets=FallbackProxy(DatasetsV2(v2_http_client), DatasetsV1(v1_http_client)), + tasks=FallbackProxy(TasksV2(v2_http_client), TasksV1(v1_http_client)), + ) class APIContext: diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py deleted file mode 100644 index 1bc99d270..000000000 --- a/openml/_api/runtime/fallback.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from openml._api.resources.base import ResourceAPI - - -class FallbackProxy: - def __init__(self, primary: ResourceAPI, fallback: ResourceAPI): - self._primary = primary - self._fallback = fallback From fa53f8d3e10dabde3634c05a97d67560459bcaa6 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 28 Jan 2026 13:50:42 +0500 Subject: [PATCH 18/86] add test_http.py --- openml/testing.py | 88 +++++++++++++++++++++++ tests/test_api/test_http.py | 134 ++++++++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+) create mode 100644 tests/test_api/test_http.py diff --git a/openml/testing.py b/openml/testing.py index 8d3bbbd5b..b0aaac9be 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -11,10 +11,13 @@ import unittest from pathlib import Path from typing import ClassVar +from urllib.parse import urljoin import requests import openml +from openml._api.clients import HTTPCache, HTTPClient +from openml._api.config import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -276,6 +279,91 @@ def _check_fold_timing_evaluations( # noqa: PLR0913 assert evaluation <= max_val +class TestAPIBase(unittest.TestCase): + server: str + base_url: str + api_key: str + timeout: int + retries: int + retry_policy: RetryPolicy + dir: str + ttl: int + cache: HTTPCache + http_client: HTTPClient + + def setUp(self) -> None: + self.server = "https://test.openml.org/" + self.base_url = "api/v1/xml" + self.api_key = "normaluser" + self.timeout = 10 + self.retries = 3 + self.retry_policy = RetryPolicy.HUMAN + self.dir = "test_cache" + self.ttl = 60 * 60 * 24 * 7 + + self.cache = self._get_http_cache( + path=Path(self.dir), + ttl=self.ttl, + ) + self.http_client = self._get_http_client( + server=self.server, + base_url=self.base_url, + api_key=self.api_key, + timeout=self.timeout, + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ) + + if self.cache.path.exists(): + shutil.rmtree(self.cache.path) + + def tearDown(self) -> None: + if self.cache.path.exists(): + shutil.rmtree(self.cache.path) + + def _get_http_cache( + self, + path: Path, + ttl: int, + ) -> HTTPCache: + return HTTPCache( + path=path, + ttl=ttl, + ) + + def _get_http_client( # noqa: PLR0913 + self, + server: str, + base_url: str, + api_key: str, + timeout: int, + retries: int, + retry_policy: RetryPolicy, + cache: HTTPCache | None = None, + ) -> HTTPClient: + return HTTPClient( + server=server, + base_url=base_url, + api_key=api_key, + timeout=timeout, + retries=retries, + retry_policy=retry_policy, + cache=cache, + ) + + def _get_url( + self, + server: str | None = None, + base_url: str | None = None, + path: str | None = None, + ) -> str: + server = server if server else self.server + base_url = base_url if base_url else self.base_url + path = path if path else "" + return urljoin(self.server, urljoin(self.base_url, path)) + + def check_task_existence( task_type: TaskType, dataset_id: int, diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py new file mode 100644 index 000000000..98b6fda5a --- /dev/null +++ b/tests/test_api/test_http.py @@ -0,0 +1,134 @@ +from requests import Response, Request +import time +import xmltodict +from openml.testing import TestAPIBase + + +class TestHTTPClient(TestAPIBase): + def test_cache(self): + url = self._get_url(path="task/31") + params = {"param1": "value1", "param2": "value2"} + + key = self.cache.get_key(url, params) + + # validate key + self.assertEqual( + key, + "org/openml/test/api/v1/task/31/param1=value1¶m2=value2", + ) + + # create fake response + req = Request("GET", url).prepare() + response = Response() + response.status_code = 200 + response.url = url + response.reason = "OK" + response._content = b"test" + response.headers = {"Content-Type": "text/xml"} + response.encoding = "utf-8" + response.request = req + response.elapsed = type("Elapsed", (), {"total_seconds": lambda self: 0.1})() + + # save to cache + self.cache.save(key, response) + + # load from cache + cached_response = self.cache.load(key) + + # validate loaded response + self.assertEqual(cached_response.status_code, 200) + self.assertEqual(cached_response.url, url) + self.assertEqual(cached_response.content, b"test") + self.assertEqual( + cached_response.headers["Content-Type"], "text/xml" + ) + + def test_get(self): + response = self.http_client.get("task/1") + + self.assertEqual(response.status_code, 200) + self.assertIn(b" new request + self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) + self.assertEqual(response2.status_code, 200) + self.assertEqual(response1.content, response2.content) + + def test_post_and_delete(self): + task_xml = """ + + 5 + 193 + 17 + + """ + + task_id = None + try: + # POST the task + post_response = self.http_client.post( + "task", + files={"description": task_xml}, + ) + self.assertEqual(post_response.status_code, 200) + xml_resp = xmltodict.parse(post_response.content) + task_id = int(xml_resp["oml:upload_task"]["oml:id"]) + + # GET the task to verify it exists + get_response = self.http_client.get(f"task/{task_id}") + self.assertEqual(get_response.status_code, 200) + + finally: + # DELETE the task if it was created + if task_id is not None: + try: + del_response = self.http_client.delete(f"task/{task_id}") + # optional: verify delete + if del_response.status_code != 200: + print(f"Warning: delete failed for task {task_id}") + except Exception as e: + print(f"Warning: failed to delete task {task_id}: {e}") From 2b2db962fc252a2b2b23f21bd1d055905ed74588 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 28 Jan 2026 13:52:43 +0500 Subject: [PATCH 19/86] add uses_test_server marker --- tests/test_api/test_http.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 98b6fda5a..94ce5ee93 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -1,6 +1,7 @@ from requests import Response, Request import time import xmltodict +import pytest from openml.testing import TestAPIBase @@ -43,12 +44,14 @@ def test_cache(self): cached_response.headers["Content-Type"], "text/xml" ) + @pytest.mark.uses_test_server() def test_get(self): response = self.http_client.get("task/1") self.assertEqual(response.status_code, 200) self.assertIn(b" From c9617f932fce853dbe6db9a445ef98cc6cfec7f4 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 29 Jan 2026 14:40:09 +0500 Subject: [PATCH 20/86] implement reset_cache --- openml/_api/clients/http.py | 6 +++++- tests/test_api/test_http.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 65d7b2248..dfcdf5a8a 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -322,6 +322,7 @@ def request( path: str, *, use_cache: bool = False, + reset_cache: bool = False, use_api_key: bool = False, **request_kwargs: Any, ) -> Response: @@ -345,7 +346,7 @@ def request( timeout = request_kwargs.pop("timeout", self.timeout) files = request_kwargs.pop("files", None) - if use_cache and self.cache is not None: + if use_cache and not reset_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) try: return self.cache.load(cache_key) @@ -379,6 +380,7 @@ def request( assert response is not None if use_cache and self.cache is not None: + cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) return response @@ -388,6 +390,7 @@ def get( path: str, *, use_cache: bool = False, + reset_cache: bool = False, use_api_key: bool = False, **request_kwargs: Any, ) -> Response: @@ -395,6 +398,7 @@ def get( method="GET", path=path, use_cache=use_cache, + reset_cache=reset_cache, use_api_key=use_api_key, **request_kwargs, ) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 94ce5ee93..808321862 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -103,6 +103,24 @@ def test_get_cache_expires(self): self.assertEqual(response2.status_code, 200) self.assertEqual(response1.content, response2.content) + @pytest.mark.uses_test_server() + def test_get_reset_cache(self): + path = "task/1" + + url = self._get_url(path=path) + key = self.cache.get_key(url, {}) + cache_path = self.cache._key_to_path(key) / "meta.json" + + response1 = self.http_client.get(path, use_cache=True) + response1_cache_time_stamp = cache_path.stat().st_ctime + + response2 = self.http_client.get(path, use_cache=True, reset_cache=True) + response2_cache_time_stamp = cache_path.stat().st_ctime + + self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) + self.assertEqual(response2.status_code, 200) + self.assertEqual(response1.content, response2.content) + @pytest.mark.uses_test_server() def test_post_and_delete(self): task_xml = """ From 5bc37b80abc86e89644e431f48ca2d4d4ad7814c Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 29 Jan 2026 22:02:38 +0500 Subject: [PATCH 21/86] fixes with publish/delete --- openml/_api/resources/base/versions.py | 22 ++++++------- tests/test_api/test_http.py | 9 ++---- tests/test_api/test_versions.py | 44 ++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 18 deletions(-) create mode 100644 tests/test_api/test_versions.py diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 91c1a8c06..6ca2dd345 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Mapping -from typing import Any +from typing import Any, cast import xmltodict @@ -76,7 +76,7 @@ def untag(self, resource_id: int, tag: str) -> list[str]: def _get_endpoint_name(self) -> str: if self.resource_type == ResourceType.DATASET: return "data" - return self.resource_type.name + return cast("str", self.resource_type.value) def _handle_delete_exception( self, resource_type: str, exception: OpenMLServerException @@ -114,8 +114,8 @@ def _handle_delete_exception( raise exception def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: - # reads id from - # sample parsed dict: {"oml:openml": {"oml:upload_flow": {"oml:id": "42"}}} + # reads id from upload response + # actual parsed dict: {"oml:upload_flow": {"@xmlns:oml": "...", "oml:id": "42"}} # xmltodict always gives exactly one root key ((_, root_value),) = parsed.items() @@ -123,14 +123,14 @@ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: if not isinstance(root_value, Mapping): raise ValueError("Unexpected XML structure") - # upload node (e.g. oml:upload_task, oml:study_upload, ...) - ((_, upload_value),) = root_value.items() + # Look for oml:id directly in the root value + if "oml:id" in root_value: + id_value = root_value["oml:id"] + if isinstance(id_value, (str, int)): + return int(id_value) - if not isinstance(upload_value, Mapping): - raise ValueError("Unexpected upload node structure") - - # ID is the only leaf value - for v in upload_value.values(): + # Fallback: check all values for numeric/string IDs + for v in root_value.values(): if isinstance(v, (str, int)): return int(v) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 808321862..c16759558 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -149,10 +149,5 @@ def test_post_and_delete(self): finally: # DELETE the task if it was created if task_id is not None: - try: - del_response = self.http_client.delete(f"task/{task_id}") - # optional: verify delete - if del_response.status_code != 200: - print(f"Warning: delete failed for task {task_id}") - except Exception as e: - print(f"Warning: failed to delete task {task_id}: {e}") + del_response = self.http_client.delete(f"task/{task_id}") + self.assertEqual(del_response.status_code, 200) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py new file mode 100644 index 000000000..d3b1cd45d --- /dev/null +++ b/tests/test_api/test_versions.py @@ -0,0 +1,44 @@ +import pytest +from openml.testing import TestAPIBase +from openml._api.resources.base.versions import ResourceV1 +from openml._api.resources.base.resources import ResourceType + + +class TestResourceV1(TestAPIBase): + def setUp(self): + super().setUp() + self.resource = ResourceV1(self.http_client) + self.resource.resource_type = ResourceType.TASK + + @pytest.mark.uses_test_server() + def test_publish_and_delete(self): + task_xml = """ + + 5 + 193 + 17 + + """ + + task_id = None + try: + # Publish the task + task_id = self.resource.publish( + "task", + files={"description": task_xml}, + ) + + # Get the task to verify it exists + get_response = self.http_client.get(f"task/{task_id}") + self.assertEqual(get_response.status_code, 200) + + finally: + # delete the task if it was created + if task_id is not None: + success = self.resource.delete(task_id) + self.assertTrue(success) + + + @pytest.mark.uses_test_server() + def test_tag_and_untag(self): + pass From 08d991686843fc2ff5d8182e96a162bc2e706f52 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 29 Jan 2026 22:05:24 +0500 Subject: [PATCH 22/86] fix cache_key in tests --- tests/test_api/test_http.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index c16759558..efaeaeeef 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -3,6 +3,7 @@ import xmltodict import pytest from openml.testing import TestAPIBase +import os class TestHTTPClient(TestAPIBase): @@ -11,12 +12,19 @@ def test_cache(self): params = {"param1": "value1", "param2": "value2"} key = self.cache.get_key(url, params) + expected_key = os.path.join( + "org", + "openml", + "test", + "api", + "v1", + "task", + "31", + "param1=value1¶m2=value2", + ) # validate key - self.assertEqual( - key, - "org/openml/test/api/v1/task/31/param1=value1¶m2=value2", - ) + self.assertEqual(key, expected_key) # create fake response req = Request("GET", url).prepare() From 8caba11111d93fd438915e3f697a634d362eba1f Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 30 Jan 2026 11:47:41 +0500 Subject: [PATCH 23/86] update _not_supported --- openml/_api/resources/base/base.py | 19 +++++++++++-------- openml/_api/resources/base/fallback.py | 8 +++++--- openml/_api/resources/base/versions.py | 16 ++++++++-------- openml/_api/resources/tasks.py | 6 +++--- openml/exceptions.py | 4 ++++ 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 63d4c40eb..38ceccbac 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -2,7 +2,9 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, NoReturn + +from openml.exceptions import OpenMLNotSupportedError if TYPE_CHECKING: from collections.abc import Mapping @@ -49,11 +51,12 @@ def tag(self, resource_id: int, tag: str) -> list[str]: ... @abstractmethod def untag(self, resource_id: int, tag: str) -> list[str]: ... - def _get_not_implemented_message(self, method_name: str | None = None) -> str: - version = getattr(self.api_version, "name", "Unknown version") - resource = getattr(self.resource_type, "name", "Unknown resource") - method_info = f" Method: {method_name}" if method_name else "" - return ( - f"{self.__class__.__name__}: {version} API does not support this " - f"functionality for resource: {resource}.{method_info}" + def _not_supported(self, *, method: str) -> NoReturn: + version = getattr(self.api_version, "value", "unknown") + resource = getattr(self.resource_type, "value", "unknown") + + raise OpenMLNotSupportedError( + f"{self.__class__.__name__}: " + f"{version} API does not support `{method}` " + f"for resource `{resource}`" ) diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py index 253ee3865..3919c36a9 100644 --- a/openml/_api/resources/base/fallback.py +++ b/openml/_api/resources/base/fallback.py @@ -3,6 +3,8 @@ from collections.abc import Callable from typing import Any +from openml.exceptions import OpenMLNotSupportedError + class FallbackProxy: def __init__(self, *api_versions: Any): @@ -32,7 +34,7 @@ def _wrap_callable( def wrapper(*args: Any, **kwargs: Any) -> Any: try: return primary_attr(*args, **kwargs) - except NotImplementedError: + except OpenMLNotSupportedError: return self._call_fallbacks(name, primary_api, *args, **kwargs) return wrapper @@ -51,6 +53,6 @@ def _call_fallbacks( if callable(attr): try: return attr(*args, **kwargs) - except NotImplementedError: + except OpenMLNotSupportedError: continue - raise NotImplementedError(f"Could not fallback to any API for method: {name}") + raise OpenMLNotSupportedError(f"Could not fallback to any API for method: {name}") diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 6ca2dd345..04b7617b1 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -140,14 +140,14 @@ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: class ResourceV2(ResourceAPI): api_version: APIVersion = APIVersion.V2 - def publish(self, path: str, files: Mapping[str, Any] | None) -> int: - raise NotImplementedError(self._get_not_implemented_message("publish")) + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: # noqa: ARG002 + self._not_supported(method="publish") - def delete(self, resource_id: int) -> bool: - raise NotImplementedError(self._get_not_implemented_message("delete")) + def delete(self, resource_id: int) -> bool: # noqa: ARG002 + self._not_supported(method="delete") - def tag(self, resource_id: int, tag: str) -> list[str]: - raise NotImplementedError(self._get_not_implemented_message("untag")) + def tag(self, resource_id: int, tag: str) -> list[str]: # noqa: ARG002 + self._not_supported(method="tag") - def untag(self, resource_id: int, tag: str) -> list[str]: - raise NotImplementedError(self._get_not_implemented_message("untag")) + def untag(self, resource_id: int, tag: str) -> list[str]: # noqa: ARG002 + self._not_supported(method="untag") diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index 295e7a73d..8420f8e57 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -121,8 +121,8 @@ def _create_task_from_xml(self, xml: str) -> OpenMLTask: class TasksV2(ResourceV2, TasksAPI): def get( self, - task_id: int, + task_id: int, # noqa: ARG002 *, - return_response: bool = False, + return_response: bool = False, # noqa: ARG002 ) -> OpenMLTask | tuple[OpenMLTask, Response]: - raise NotImplementedError(self._get_not_implemented_message("get")) + self._not_supported(method="get") diff --git a/openml/exceptions.py b/openml/exceptions.py index fe63b8a58..26c2d2591 100644 --- a/openml/exceptions.py +++ b/openml/exceptions.py @@ -65,3 +65,7 @@ class OpenMLNotAuthorizedError(OpenMLServerError): class ObjectNotPublishedError(PyOpenMLError): """Indicates an object has not been published yet.""" + + +class OpenMLNotSupportedError(PyOpenMLError): + """Raised when an API operation is not supported for a resource/version.""" From 1913c10416b74421709601d5177c1e67db93a401 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:27:36 +0100 Subject: [PATCH 24/86] add 'get_api_config' skeleton method --- openml/_api/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openml/_api/config.py b/openml/_api/config.py index 6cce06403..2201420d9 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -41,6 +41,9 @@ class Settings: connection: ConnectionConfig cache: CacheConfig + def get_api_config(self, version: str) -> APIConfig: + pass + settings = Settings( api=APISettings( From 7681949675f3c72e09d09d810aaa11acd78c6811 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:29:13 +0100 Subject: [PATCH 25/86] remove 'APISettings' --- openml/_api/config.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 2201420d9..893b950c6 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -17,12 +17,6 @@ class APIConfig: timeout: int = 10 # seconds -@dataclass -class APISettings: - v1: APIConfig - v2: APIConfig - - @dataclass class ConnectionConfig: retries: int = 3 From 01840a5a09442228f708daf45c32acbd05ce0e8b Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:34:11 +0100 Subject: [PATCH 26/86] impl. 'get_api_config' --- openml/_api/config.py | 54 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 893b950c6..8600156f7 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -29,14 +29,58 @@ class CacheConfig: ttl: int = 60 * 60 * 24 * 7 # one week -@dataclass class Settings: - api: APISettings - connection: ConnectionConfig - cache: CacheConfig + def __init__(self) -> None: + self.api_configs: dict[str, APIConfig] = {} + self.connection = ConnectionConfig() + self.cache = CacheConfig() + self._initialized = False def get_api_config(self, version: str) -> APIConfig: - pass + """Get API config for a version, with lazy initialization from openml.config.""" + if not self._initialized: + self._init_from_legacy_config() + if version not in self.api_configs: + raise NotImplementedError( + f"API {version} is not yet available. " + f"Supported versions: {list(self.api_configs.keys())}" + ) + return self.api_configs[version] + + def _init_from_legacy_config(self) -> None: + """Lazy init from openml.config to avoid circular imports.""" + if self._initialized: + return + + # Import here to avoid circular import at module load time + import openml.config as legacy + + # Parse server URL to extract base components + # e.g., "https://www.openml.org/api/v1/xml" -> server="https://www.openml.org/" + server_url = legacy.server + if "/api" in server_url: + server_base = server_url.rsplit("/api", 1)[0] + "/" + else: + server_base = server_url + + self.api_configs["v1"] = APIConfig( + server=server_base, + base_url="api/v1/xml/", + api_key=legacy.apikey, + ) + + # Sync connection settings from legacy config + self.connection = ConnectionConfig( + retries=legacy.connection_n_retries, + retry_policy=RetryPolicy(legacy.retry_policy), + ) + + # Sync cache settings from legacy config + self.cache = CacheConfig( + dir=str(legacy._root_cache_directory), + ) + + self._initialized = True settings = Settings( From 26ed4c1ee0ab9571f74726795e050b7d47110227 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:39:43 +0100 Subject: [PATCH 27/86] add singleton pattern for settings --- openml/_api/config.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/openml/_api/config.py b/openml/_api/config.py index 8600156f7..ee3240556 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -99,3 +99,18 @@ def _init_from_legacy_config(self) -> None: connection=ConnectionConfig(), cache=CacheConfig(), ) + + +_settings = None + + +def get_settings() -> Settings: + """Get settings singleton, creating on first access. + + Settings are lazily initialized from openml.config when first accessed, + avoiding circular imports at module load time. + """ + global _settings + if _settings is None: + _settings = Settings() + return _settings From c588d0cd456233894fa67a56e7a814c36ca25761 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:40:19 +0100 Subject: [PATCH 28/86] add 'reset_settings' --- openml/_api/config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/openml/_api/config.py b/openml/_api/config.py index ee3240556..5670698c8 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -114,3 +114,9 @@ def get_settings() -> Settings: if _settings is None: _settings = Settings() return _settings + + +def reset_settings() -> None: + """Reset the settings singleton. Could be useful for testing.""" + global _settings + _settings = None From b6ff7207c5d8428c885f498986d2a5abf0d66ac3 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:40:32 +0100 Subject: [PATCH 29/86] remove unused code --- openml/_api/config.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 5670698c8..4dc408428 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -83,24 +83,6 @@ def _init_from_legacy_config(self) -> None: self._initialized = True -settings = Settings( - api=APISettings( - v1=APIConfig( - server="https://www.openml.org/", - base_url="api/v1/xml/", - api_key="...", - ), - v2=APIConfig( - server="http://127.0.0.1:8001/", - base_url="", - api_key="...", - ), - ), - connection=ConnectionConfig(), - cache=CacheConfig(), -) - - _settings = None From 80d5afc1e0784abe264b10abaabe40fec7984792 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:44:44 +0100 Subject: [PATCH 30/86] reimplement usage of v1 settings config --- openml/_api/runtime/core.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 4914179f8..5e55d61cb 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING from openml._api.clients import HTTPCache, HTTPClient -from openml._api.config import settings +from openml._api.config import get_settings from openml._api.resources import ( DatasetsV1, DatasetsV2, @@ -18,30 +18,29 @@ class APIBackend: - def __init__(self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy): + def __init__( + self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy + ): self.datasets = datasets self.tasks = tasks def build_backend(version: str, *, strict: bool) -> APIBackend: + settings = get_settings() + + # Get config for v1 (lazy init from openml.config) + v1_config = settings.get_api_config("v1") + http_cache = HTTPCache( - path=Path(settings.cache.dir), + path=Path(settings.cache.dir).expanduser(), ttl=settings.cache.ttl, ) + v1_http_client = HTTPClient( - server=settings.api.v1.server, - base_url=settings.api.v1.base_url, - api_key=settings.api.v1.api_key, - timeout=settings.api.v1.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - v2_http_client = HTTPClient( - server=settings.api.v2.server, - base_url=settings.api.v2.base_url, - api_key=settings.api.v2.api_key, - timeout=settings.api.v2.timeout, + server=v1_config.server, + base_url=v1_config.base_url, + api_key=v1_config.api_key, + timeout=v1_config.timeout, retries=settings.connection.retries, retry_policy=settings.connection.retry_policy, cache=http_cache, From f47112c7b9eb1710ddf7b79ea97b3f8c0b0cbf49 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:47:25 +0100 Subject: [PATCH 31/86] first try v2, fallback to v1 if not available --- openml/_api/runtime/core.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 5e55d61cb..24fd2c248 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -54,6 +54,25 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: if version == "v1": return v1 + # V2 support - will raise NotImplementedError if v2 config not available + try: + v2_config = settings.get_api_config("v2") + except NotImplementedError: + if strict: + raise + # Non-strict mode: fall back to v1 only + return v1 + + v2_http_client = HTTPClient( + server=v2_config.server, + base_url=v2_config.base_url, + api_key=v2_config.api_key, + timeout=v2_config.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + cache=http_cache, + ) + v2 = APIBackend( datasets=DatasetsV2(v2_http_client), tasks=TasksV2(v2_http_client), From d44cf3eb5e36587ad033e24b1e54863e98df2d91 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 19:58:42 +0100 Subject: [PATCH 32/86] reimplement singelton without the use of 'global' --- openml/_api/config.py | 46 +++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 4dc408428..c375542b8 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -30,12 +30,28 @@ class CacheConfig: class Settings: + """Settings container that reads from openml.config on access.""" + + _instance: Settings | None = None + def __init__(self) -> None: self.api_configs: dict[str, APIConfig] = {} self.connection = ConnectionConfig() self.cache = CacheConfig() self._initialized = False + @classmethod + def get(cls) -> Settings: + """Get settings singleton, creating on first access.""" + if cls._instance is None: + cls._instance = cls() + return cls._instance + + @classmethod + def reset(cls) -> None: + """Reset the settings singleton. Useful for testing.""" + cls._instance = None + def get_api_config(self, version: str) -> APIConfig: """Get API config for a version, with lazy initialization from openml.config.""" if not self._initialized: @@ -52,11 +68,8 @@ def _init_from_legacy_config(self) -> None: if self._initialized: return - # Import here to avoid circular import at module load time - import openml.config as legacy + import openml.config as legacy # Import here to avoid circular - # Parse server URL to extract base components - # e.g., "https://www.openml.org/api/v1/xml" -> server="https://www.openml.org/" server_url = legacy.server if "/api" in server_url: server_base = server_url.rsplit("/api", 1)[0] + "/" @@ -69,36 +82,13 @@ def _init_from_legacy_config(self) -> None: api_key=legacy.apikey, ) - # Sync connection settings from legacy config + # Sync connection- and cache- settings from legacy config self.connection = ConnectionConfig( retries=legacy.connection_n_retries, retry_policy=RetryPolicy(legacy.retry_policy), ) - - # Sync cache settings from legacy config self.cache = CacheConfig( dir=str(legacy._root_cache_directory), ) self._initialized = True - - -_settings = None - - -def get_settings() -> Settings: - """Get settings singleton, creating on first access. - - Settings are lazily initialized from openml.config when first accessed, - avoiding circular imports at module load time. - """ - global _settings - if _settings is None: - _settings = Settings() - return _settings - - -def reset_settings() -> None: - """Reset the settings singleton. Could be useful for testing.""" - global _settings - _settings = None From ea7dda17087bc25d07ea7610da25b8ec04b17ca2 Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 20:00:25 +0100 Subject: [PATCH 33/86] add explanations --- openml/_api/config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index c375542b8..32dd8ecf5 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -68,7 +68,11 @@ def _init_from_legacy_config(self) -> None: if self._initialized: return - import openml.config as legacy # Import here to avoid circular + # Import here (not at module level) to avoid circular imports. + # We read from openml.config to integrate with the existing config system + # where users set their API key, server, cache directory, etc. + # This avoids duplicating those settings with hardcoded values. + import openml.config as legacy server_url = legacy.server if "/api" in server_url: From f0e594784b446006e401ab4aa1d7113344b6dd0e Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 20:01:16 +0100 Subject: [PATCH 34/86] change usage of settings to new impl. --- openml/_api/runtime/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 24fd2c248..9207fc31d 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING from openml._api.clients import HTTPCache, HTTPClient -from openml._api.config import get_settings +from openml._api.config import Settings from openml._api.resources import ( DatasetsV1, DatasetsV2, @@ -26,7 +26,7 @@ def __init__( def build_backend(version: str, *, strict: bool) -> APIBackend: - settings = get_settings() + settings = Settings.get() # Get config for v1 (lazy init from openml.config) v1_config = settings.get_api_config("v1") From edcd006b574a91e367d96e5c3718daf0edbc352e Mon Sep 17 00:00:00 2001 From: Simon Blanke Date: Fri, 30 Jan 2026 20:06:45 +0100 Subject: [PATCH 35/86] add explanations --- openml/_api/runtime/core.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 9207fc31d..a73105e91 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -28,7 +28,11 @@ def __init__( def build_backend(version: str, *, strict: bool) -> APIBackend: settings = Settings.get() - # Get config for v1 (lazy init from openml.config) + # Get config for v1. On first access, this triggers lazy initialization + # from openml.config, reading the user's actual API key, server URL, + # cache directory, and retry settings. This avoids circular imports + # (openml.config is imported inside the method, not at module load time) + # and ensures we use the user's configured values rather than hardcoded defaults. v1_config = settings.get_api_config("v1") http_cache = HTTPCache( @@ -54,7 +58,11 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: if version == "v1": return v1 - # V2 support - will raise NotImplementedError if v2 config not available + # V2 support. Currently v2 is not yet available, + # so get_api_config("v2") raises NotImplementedError. When v2 becomes available, + # its config will be added to Settings._init_from_legacy_config(). + # In strict mode: propagate the error. + # In non-strict mode: silently fall back to v1 only. try: v2_config = settings.get_api_config("v2") except NotImplementedError: From cde0aaeb7657a03fe6547a9b252a2f13457fc7f0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 19:10:42 +0000 Subject: [PATCH 36/86] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openml/_api/config.py | 5 +---- openml/_api/runtime/core.py | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 32dd8ecf5..76d30f113 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -75,10 +75,7 @@ def _init_from_legacy_config(self) -> None: import openml.config as legacy server_url = legacy.server - if "/api" in server_url: - server_base = server_url.rsplit("/api", 1)[0] + "/" - else: - server_base = server_url + server_base = server_url.rsplit("/api", 1)[0] + "/" if "/api" in server_url else server_url self.api_configs["v1"] = APIConfig( server=server_base, diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index a73105e91..22b3004a4 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -18,9 +18,7 @@ class APIBackend: - def __init__( - self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy - ): + def __init__(self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy): self.datasets = datasets self.tasks = tasks From aa1e5602b87caf59680434a17fe6cc6532f58419 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sun, 1 Feb 2026 11:29:33 +0500 Subject: [PATCH 37/86] move to config: APIVersion, ResourceType --- openml/_api/config.py | 19 +++++++++++++++++++ openml/_api/resources/base/__init__.py | 4 +--- openml/_api/resources/base/base.py | 21 +-------------------- openml/_api/resources/base/resources.py | 3 ++- openml/_api/resources/base/versions.py | 3 ++- tests/test_api/test_versions.py | 2 +- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 76d30f113..3afbf224f 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -4,6 +4,25 @@ from enum import Enum +class APIVersion(str, Enum): + V1 = "v1" + V2 = "v2" + + +class ResourceType(str, Enum): + DATASET = "dataset" + TASK = "task" + TASK_TYPE = "task_type" + EVALUATION_MEASURE = "evaluation_measure" + ESTIMATION_PROCEDURE = "estimation_procedure" + EVALUATION = "evaluation" + FLOW = "flow" + STUDY = "study" + RUN = "run" + SETUP = "setup" + USER = "user" + + class RetryPolicy(str, Enum): HUMAN = "human" ROBOT = "robot" diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py index bddc09b21..089729d09 100644 --- a/openml/_api/resources/base/__init__.py +++ b/openml/_api/resources/base/__init__.py @@ -1,14 +1,12 @@ -from openml._api.resources.base.base import APIVersion, ResourceAPI, ResourceType +from openml._api.resources.base.base import ResourceAPI from openml._api.resources.base.fallback import FallbackProxy from openml._api.resources.base.resources import DatasetsAPI, TasksAPI from openml._api.resources.base.versions import ResourceV1, ResourceV2 __all__ = [ - "APIVersion", "DatasetsAPI", "FallbackProxy", "ResourceAPI", - "ResourceType", "ResourceV1", "ResourceV2", "TasksAPI", diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 38ceccbac..dbe3e95ea 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -1,7 +1,6 @@ from __future__ import annotations from abc import ABC, abstractmethod -from enum import Enum from typing import TYPE_CHECKING, NoReturn from openml.exceptions import OpenMLNotSupportedError @@ -11,25 +10,7 @@ from typing import Any from openml._api.clients import HTTPClient - - -class APIVersion(str, Enum): - V1 = "v1" - V2 = "v2" - - -class ResourceType(str, Enum): - DATASET = "dataset" - TASK = "task" - TASK_TYPE = "task_type" - EVALUATION_MEASURE = "evaluation_measure" - ESTIMATION_PROCEDURE = "estimation_procedure" - EVALUATION = "evaluation" - FLOW = "flow" - STUDY = "study" - RUN = "run" - SETUP = "setup" - USER = "user" + from openml._api.config import APIVersion, ResourceType class ResourceAPI(ABC): diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 55cb95c0d..406bdfa50 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -3,7 +3,8 @@ from abc import abstractmethod from typing import TYPE_CHECKING -from openml._api.resources.base import ResourceAPI, ResourceType +from openml._api.config import ResourceType +from openml._api.resources.base import ResourceAPI if TYPE_CHECKING: from requests import Response diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 04b7617b1..990c3f791 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -5,7 +5,8 @@ import xmltodict -from openml._api.resources.base import APIVersion, ResourceAPI, ResourceType +from openml._api.config import APIVersion, ResourceType +from openml._api.resources.base import ResourceAPI from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index d3b1cd45d..9eb4c7a91 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,7 +1,7 @@ import pytest from openml.testing import TestAPIBase from openml._api.resources.base.versions import ResourceV1 -from openml._api.resources.base.resources import ResourceType +from openml._api.config import ResourceType class TestResourceV1(TestAPIBase): From 06b8497eb552e2c880e93f19224a534bef37986b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sun, 1 Feb 2026 11:48:04 +0500 Subject: [PATCH 38/86] remove api_context entirely --- openml/__init__.py | 2 ++ openml/_api/__init__.py | 8 -------- openml/_api/runtime/core.py | 12 ------------ openml/_api/runtime/instance.py | 5 +++++ 4 files changed, 7 insertions(+), 20 deletions(-) create mode 100644 openml/_api/runtime/instance.py diff --git a/openml/__init__.py b/openml/__init__.py index ae5db261f..a7c95dc2e 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -33,6 +33,7 @@ utils, ) from .__version__ import __version__ +from ._api.runtime.instance import _backend from .datasets import OpenMLDataFeature, OpenMLDataset from .evaluations import OpenMLEvaluation from .flows import OpenMLFlow @@ -109,6 +110,7 @@ def populate_cache( "OpenMLTask", "__version__", "_api_calls", + "_backend", "config", "datasets", "evaluations", diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 881f40671..e69de29bb 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -1,8 +0,0 @@ -from openml._api.runtime.core import APIContext - - -def set_api_version(version: str, *, strict: bool = False) -> None: - api_context.set_version(version=version, strict=strict) - - -api_context = APIContext() diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 22b3004a4..d4ae9b688 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -91,15 +91,3 @@ def build_backend(version: str, *, strict: bool) -> APIBackend: datasets=FallbackProxy(DatasetsV2(v2_http_client), DatasetsV1(v1_http_client)), tasks=FallbackProxy(TasksV2(v2_http_client), TasksV1(v1_http_client)), ) - - -class APIContext: - def __init__(self) -> None: - self._backend = build_backend("v1", strict=False) - - def set_version(self, version: str, *, strict: bool = False) -> None: - self._backend = build_backend(version=version, strict=strict) - - @property - def backend(self) -> APIBackend: - return self._backend diff --git a/openml/_api/runtime/instance.py b/openml/_api/runtime/instance.py new file mode 100644 index 000000000..0d945b084 --- /dev/null +++ b/openml/_api/runtime/instance.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from openml._api.runtime.core import APIBackend, build_backend + +_backend: APIBackend = build_backend("v1", strict=False) From 384da91b80d91526826df3afda4ac2624562f6f7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sun, 1 Feb 2026 14:40:13 +0500 Subject: [PATCH 39/86] major refactor --- openml/_api/clients/__init__.py | 2 + openml/_api/clients/minio.py | 11 + openml/_api/resources/__init__.py | 36 ++- openml/_api/resources/base/__init__.py | 29 +- openml/_api/resources/base/resources.py | 49 ++-- openml/_api/resources/base/versions.py | 4 +- openml/_api/resources/dataset.py | 11 + openml/_api/resources/datasets.py | 20 -- openml/_api/resources/estimation_procedure.py | 11 + openml/_api/resources/evaluation.py | 11 + openml/_api/resources/evaluation_measure.py | 11 + openml/_api/resources/flow.py | 11 + openml/_api/resources/run.py | 11 + openml/_api/resources/setup.py | 11 + openml/_api/resources/study.py | 11 + openml/_api/resources/task.py | 11 + openml/_api/resources/tasks.py | 128 --------- openml/_api/runtime/core.py | 251 ++++++++++++------ openml/_api/runtime/instance.py | 4 +- tests/test_api/test_versions.py | 6 +- 20 files changed, 382 insertions(+), 257 deletions(-) create mode 100644 openml/_api/resources/dataset.py delete mode 100644 openml/_api/resources/datasets.py create mode 100644 openml/_api/resources/estimation_procedure.py create mode 100644 openml/_api/resources/evaluation.py create mode 100644 openml/_api/resources/evaluation_measure.py create mode 100644 openml/_api/resources/flow.py create mode 100644 openml/_api/resources/run.py create mode 100644 openml/_api/resources/setup.py create mode 100644 openml/_api/resources/study.py create mode 100644 openml/_api/resources/task.py delete mode 100644 openml/_api/resources/tasks.py diff --git a/openml/_api/clients/__init__.py b/openml/_api/clients/__init__.py index 8a5ff94e4..42f11fbcf 100644 --- a/openml/_api/clients/__init__.py +++ b/openml/_api/clients/__init__.py @@ -1,6 +1,8 @@ from .http import HTTPCache, HTTPClient +from .minio import MinIOClient __all__ = [ "HTTPCache", "HTTPClient", + "MinIOClient", ] diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py index e69de29bb..2edc8269b 100644 --- a/openml/_api/clients/minio.py +++ b/openml/_api/clients/minio.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from pathlib import Path + +from openml.__version__ import __version__ + + +class MinIOClient: + def __init__(self, path: Path | None = None) -> None: + self.path = path + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index 6c0807e0f..b666c018b 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,5 +1,35 @@ from openml._api.resources.base.fallback import FallbackProxy -from openml._api.resources.datasets import DatasetsV1, DatasetsV2 -from openml._api.resources.tasks import TasksV1, TasksV2 +from openml._api.resources.dataset import DatasetV1API, DatasetV2API +from openml._api.resources.estimation_procedure import ( + EstimationProcedureV1API, + EstimationProcedureV2API, +) +from openml._api.resources.evaluation import EvaluationV1API, EvaluationV2API +from openml._api.resources.evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API +from openml._api.resources.flow import FlowV1API, FlowV2API +from openml._api.resources.run import RunV1API, RunV2API +from openml._api.resources.setup import SetupV1API, SetupV2API +from openml._api.resources.study import StudyV1API, StudyV2API +from openml._api.resources.task import TaskV1API, TaskV2API -__all__ = ["DatasetsV1", "DatasetsV2", "FallbackProxy", "TasksV1", "TasksV2"] +__all__ = [ + "DatasetV1API", + "DatasetV2API", + "EstimationProcedureV1API", + "EstimationProcedureV2API", + "EvaluationMeasureV1API", + "EvaluationMeasureV2API", + "EvaluationV1API", + "EvaluationV2API", + "FallbackProxy", + "FlowV1API", + "FlowV2API", + "RunV1API", + "RunV2API", + "SetupV1API", + "SetupV2API", + "StudyV1API", + "StudyV2API", + "TaskV1API", + "TaskV2API", +] diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py index 089729d09..f222a0b87 100644 --- a/openml/_api/resources/base/__init__.py +++ b/openml/_api/resources/base/__init__.py @@ -1,13 +1,30 @@ from openml._api.resources.base.base import ResourceAPI from openml._api.resources.base.fallback import FallbackProxy -from openml._api.resources.base.resources import DatasetsAPI, TasksAPI -from openml._api.resources.base.versions import ResourceV1, ResourceV2 +from openml._api.resources.base.resources import ( + DatasetAPI, + EstimationProcedureAPI, + EvaluationAPI, + EvaluationMeasureAPI, + FlowAPI, + RunAPI, + SetupAPI, + StudyAPI, + TaskAPI, +) +from openml._api.resources.base.versions import ResourceV1API, ResourceV2API __all__ = [ - "DatasetsAPI", + "DatasetAPI", + "EstimationProcedureAPI", + "EvaluationAPI", + "EvaluationMeasureAPI", "FallbackProxy", + "FlowAPI", "ResourceAPI", - "ResourceV1", - "ResourceV2", - "TasksAPI", + "ResourceV1API", + "ResourceV2API", + "RunAPI", + "SetupAPI", + "StudyAPI", + "TaskAPI", ] diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 406bdfa50..200278fc2 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,32 +1,49 @@ from __future__ import annotations -from abc import abstractmethod from typing import TYPE_CHECKING from openml._api.config import ResourceType from openml._api.resources.base import ResourceAPI if TYPE_CHECKING: - from requests import Response + from openml._api.clients import HTTPClient, MinIOClient - from openml.datasets.dataset import OpenMLDataset - from openml.tasks.task import OpenMLTask - -class DatasetsAPI(ResourceAPI): +class DatasetAPI(ResourceAPI): resource_type: ResourceType = ResourceType.DATASET - @abstractmethod - def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... + def __init__(self, http: HTTPClient, minio: MinIOClient): + self._minio = minio + super().__init__(http) -class TasksAPI(ResourceAPI): +class TaskAPI(ResourceAPI): resource_type: ResourceType = ResourceType.TASK - @abstractmethod - def get( - self, - task_id: int, - *, - return_response: bool = False, - ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... + +class EvaluationMeasureAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.EVALUATION_MEASURE + + +class EstimationProcedureAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.ESTIMATION_PROCEDURE + + +class EvaluationAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.EVALUATION + + +class FlowAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.FLOW + + +class StudyAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.STUDY + + +class RunAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.RUN + + +class SetupAPI(ResourceAPI): + resource_type: ResourceType = ResourceType.SETUP diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 990c3f791..88ae87a1c 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -14,7 +14,7 @@ ) -class ResourceV1(ResourceAPI): +class ResourceV1API(ResourceAPI): api_version: APIVersion = APIVersion.V1 def publish(self, path: str, files: Mapping[str, Any] | None) -> int: @@ -138,7 +138,7 @@ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: raise ValueError("No ID found in upload response") -class ResourceV2(ResourceAPI): +class ResourceV2API(ResourceAPI): api_version: APIVersion = APIVersion.V2 def publish(self, path: str, files: Mapping[str, Any] | None) -> int: # noqa: ARG002 diff --git a/openml/_api/resources/dataset.py b/openml/_api/resources/dataset.py new file mode 100644 index 000000000..3ecad35da --- /dev/null +++ b/openml/_api/resources/dataset.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import DatasetAPI, ResourceV1API, ResourceV2API + + +class DatasetV1API(ResourceV1API, DatasetAPI): + pass + + +class DatasetV2API(ResourceV2API, DatasetAPI): + pass diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py deleted file mode 100644 index f3a49a84f..000000000 --- a/openml/_api/resources/datasets.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from openml._api.resources.base import DatasetsAPI, ResourceV1, ResourceV2 - -if TYPE_CHECKING: - from responses import Response - - from openml.datasets.dataset import OpenMLDataset - - -class DatasetsV1(ResourceV1, DatasetsAPI): - def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: - raise NotImplementedError - - -class DatasetsV2(ResourceV2, DatasetsAPI): - def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: - raise NotImplementedError diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py new file mode 100644 index 000000000..d2e73cfa6 --- /dev/null +++ b/openml/_api/resources/estimation_procedure.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import EstimationProcedureAPI, ResourceV1API, ResourceV2API + + +class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI): + pass + + +class EstimationProcedureV2API(ResourceV2API, EstimationProcedureAPI): + pass diff --git a/openml/_api/resources/evaluation.py b/openml/_api/resources/evaluation.py new file mode 100644 index 000000000..a0149e1e5 --- /dev/null +++ b/openml/_api/resources/evaluation.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import EvaluationAPI, ResourceV1API, ResourceV2API + + +class EvaluationV1API(ResourceV1API, EvaluationAPI): + pass + + +class EvaluationV2API(ResourceV2API, EvaluationAPI): + pass diff --git a/openml/_api/resources/evaluation_measure.py b/openml/_api/resources/evaluation_measure.py new file mode 100644 index 000000000..bd4318417 --- /dev/null +++ b/openml/_api/resources/evaluation_measure.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import EvaluationMeasureAPI, ResourceV1API, ResourceV2API + + +class EvaluationMeasureV1API(ResourceV1API, EvaluationMeasureAPI): + pass + + +class EvaluationMeasureV2API(ResourceV2API, EvaluationMeasureAPI): + pass diff --git a/openml/_api/resources/flow.py b/openml/_api/resources/flow.py new file mode 100644 index 000000000..3b62abd3f --- /dev/null +++ b/openml/_api/resources/flow.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import FlowAPI, ResourceV1API, ResourceV2API + + +class FlowV1API(ResourceV1API, FlowAPI): + pass + + +class FlowV2API(ResourceV2API, FlowAPI): + pass diff --git a/openml/_api/resources/run.py b/openml/_api/resources/run.py new file mode 100644 index 000000000..9698c59dd --- /dev/null +++ b/openml/_api/resources/run.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import ResourceV1API, ResourceV2API, RunAPI + + +class RunV1API(ResourceV1API, RunAPI): + pass + + +class RunV2API(ResourceV2API, RunAPI): + pass diff --git a/openml/_api/resources/setup.py b/openml/_api/resources/setup.py new file mode 100644 index 000000000..e948e1b38 --- /dev/null +++ b/openml/_api/resources/setup.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import ResourceV1API, ResourceV2API, SetupAPI + + +class SetupV1API(ResourceV1API, SetupAPI): + pass + + +class SetupV2API(ResourceV2API, SetupAPI): + pass diff --git a/openml/_api/resources/study.py b/openml/_api/resources/study.py new file mode 100644 index 000000000..8de5868d1 --- /dev/null +++ b/openml/_api/resources/study.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import ResourceV1API, ResourceV2API, StudyAPI + + +class StudyV1API(ResourceV1API, StudyAPI): + pass + + +class StudyV2API(ResourceV2API, StudyAPI): + pass diff --git a/openml/_api/resources/task.py b/openml/_api/resources/task.py new file mode 100644 index 000000000..a97d5f726 --- /dev/null +++ b/openml/_api/resources/task.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from openml._api.resources.base import ResourceV1API, ResourceV2API, TaskAPI + + +class TaskV1API(ResourceV1API, TaskAPI): + pass + + +class TaskV2API(ResourceV2API, TaskAPI): + pass diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py deleted file mode 100644 index 8420f8e57..000000000 --- a/openml/_api/resources/tasks.py +++ /dev/null @@ -1,128 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -import xmltodict - -from openml._api.resources.base import ResourceV1, ResourceV2, TasksAPI -from openml.tasks.task import ( - OpenMLClassificationTask, - OpenMLClusteringTask, - OpenMLLearningCurveTask, - OpenMLRegressionTask, - OpenMLTask, - TaskType, -) - -if TYPE_CHECKING: - from requests import Response - - -class TasksV1(ResourceV1, TasksAPI): - def get( - self, - task_id: int, - *, - return_response: bool = False, - ) -> OpenMLTask | tuple[OpenMLTask, Response]: - path = f"task/{task_id}" - response = self._http.get(path, use_cache=True) - xml_content = response.text - task = self._create_task_from_xml(xml_content) - - if return_response: - return task, response - - return task - - def _create_task_from_xml(self, xml: str) -> OpenMLTask: - """Create a task given a xml string. - - Parameters - ---------- - xml : string - Task xml representation. - - Returns - ------- - OpenMLTask - """ - dic = xmltodict.parse(xml)["oml:task"] - estimation_parameters = {} - inputs = {} - # Due to the unordered structure we obtain, we first have to extract - # the possible keys of oml:input; dic["oml:input"] is a list of - # OrderedDicts - - # Check if there is a list of inputs - if isinstance(dic["oml:input"], list): - for input_ in dic["oml:input"]: - name = input_["@name"] - inputs[name] = input_ - # Single input case - elif isinstance(dic["oml:input"], dict): - name = dic["oml:input"]["@name"] - inputs[name] = dic["oml:input"] - - evaluation_measures = None - if "evaluation_measures" in inputs: - evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][ - "oml:evaluation_measure" - ] - - task_type = TaskType(int(dic["oml:task_type_id"])) - common_kwargs = { - "task_id": dic["oml:task_id"], - "task_type": dic["oml:task_type"], - "task_type_id": task_type, - "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], - "evaluation_measure": evaluation_measures, - } - # TODO: add OpenMLClusteringTask? - if task_type in ( - TaskType.SUPERVISED_CLASSIFICATION, - TaskType.SUPERVISED_REGRESSION, - TaskType.LEARNING_CURVE, - ): - # Convert some more parameters - for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ - "oml:parameter" - ]: - name = parameter["@name"] - text = parameter.get("#text", "") - estimation_parameters[name] = text - - common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][ - "oml:estimation_procedure" - ]["oml:type"] - common_kwargs["estimation_procedure_id"] = int( - inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"] - ) - - common_kwargs["estimation_parameters"] = estimation_parameters - common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][ - "oml:target_feature" - ] - common_kwargs["data_splits_url"] = inputs["estimation_procedure"][ - "oml:estimation_procedure" - ]["oml:data_splits_url"] - - cls = { - TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, - TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, - TaskType.CLUSTERING: OpenMLClusteringTask, - TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, - }.get(task_type) - if cls is None: - raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") - return cls(**common_kwargs) # type: ignore - - -class TasksV2(ResourceV2, TasksAPI): - def get( - self, - task_id: int, # noqa: ARG002 - *, - return_response: bool = False, # noqa: ARG002 - ) -> OpenMLTask | tuple[OpenMLTask, Response]: - self._not_supported(method="get") diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index d4ae9b688..9c3ff70a5 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -3,91 +3,188 @@ from pathlib import Path from typing import TYPE_CHECKING -from openml._api.clients import HTTPCache, HTTPClient +from openml._api.clients import HTTPCache, HTTPClient, MinIOClient from openml._api.config import Settings from openml._api.resources import ( - DatasetsV1, - DatasetsV2, + DatasetV1API, + DatasetV2API, + EstimationProcedureV1API, + EstimationProcedureV2API, + EvaluationMeasureV1API, + EvaluationMeasureV2API, + EvaluationV1API, + EvaluationV2API, FallbackProxy, - TasksV1, - TasksV2, + FlowV1API, + FlowV2API, + RunV1API, + RunV2API, + SetupV1API, + SetupV2API, + StudyV1API, + StudyV2API, + TaskV1API, + TaskV2API, ) if TYPE_CHECKING: - from openml._api.resources.base import DatasetsAPI, TasksAPI - - -class APIBackend: - def __init__(self, *, datasets: DatasetsAPI | FallbackProxy, tasks: TasksAPI | FallbackProxy): - self.datasets = datasets - self.tasks = tasks - - -def build_backend(version: str, *, strict: bool) -> APIBackend: - settings = Settings.get() - - # Get config for v1. On first access, this triggers lazy initialization - # from openml.config, reading the user's actual API key, server URL, - # cache directory, and retry settings. This avoids circular imports - # (openml.config is imported inside the method, not at module load time) - # and ensures we use the user's configured values rather than hardcoded defaults. - v1_config = settings.get_api_config("v1") - - http_cache = HTTPCache( - path=Path(settings.cache.dir).expanduser(), - ttl=settings.cache.ttl, + from openml._api.resources.base import ( + DatasetAPI, + EstimationProcedureAPI, + EvaluationAPI, + EvaluationMeasureAPI, + FlowAPI, + RunAPI, + SetupAPI, + StudyAPI, + TaskAPI, ) - v1_http_client = HTTPClient( - server=v1_config.server, - base_url=v1_config.base_url, - api_key=v1_config.api_key, - timeout=v1_config.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - v1 = APIBackend( - datasets=DatasetsV1(v1_http_client), - tasks=TasksV1(v1_http_client), - ) +class APIBackend: + def __init__( # noqa: PLR0913 + self, + *, + dataset: DatasetAPI | FallbackProxy, + task: TaskAPI | FallbackProxy, + evaluation_measure: EvaluationMeasureAPI | FallbackProxy, + estimation_procedure: EstimationProcedureAPI | FallbackProxy, + evaluation: EvaluationAPI | FallbackProxy, + flow: FlowAPI | FallbackProxy, + study: StudyAPI | FallbackProxy, + run: RunAPI | FallbackProxy, + setup: SetupAPI | FallbackProxy, + ): + self.dataset = dataset + self.task = task + self.evaluation_measure = evaluation_measure + self.estimation_procedure = estimation_procedure + self.evaluation = evaluation + self.flow = flow + self.study = study + self.run = run + self.setup = setup + + @classmethod + def build(cls, version: str, *, strict: bool) -> APIBackend: + settings = Settings.get() + + # Get config for v1. On first access, this triggers lazy initialization + # from openml.config, reading the user's actual API key, server URL, + # cache directory, and retry settings. This avoids circular imports + # (openml.config is imported inside the method, not at module load time) + # and ensures we use the user's configured values rather than hardcoded defaults. + v1_config = settings.get_api_config("v1") + + http_cache = HTTPCache( + path=Path(settings.cache.dir).expanduser(), + ttl=settings.cache.ttl, + ) + minio_client = MinIOClient( + path=Path(settings.cache.dir).expanduser(), + ) + + v1_http_client = HTTPClient( + server=v1_config.server, + base_url=v1_config.base_url, + api_key=v1_config.api_key, + timeout=v1_config.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + cache=http_cache, + ) + v1_dataset = DatasetV1API(v1_http_client, minio_client) + v1_task = TaskV1API(v1_http_client) + v1_evaluation_measure = EvaluationMeasureV1API(v1_http_client) + v1_estimation_procedure = EstimationProcedureV1API(v1_http_client) + v1_evaluation = EvaluationV1API(v1_http_client) + v1_flow = FlowV1API(v1_http_client) + v1_study = StudyV1API(v1_http_client) + v1_run = RunV1API(v1_http_client) + v1_setup = SetupV1API(v1_http_client) + + v1 = cls( + dataset=v1_dataset, + task=v1_task, + evaluation_measure=v1_evaluation_measure, + estimation_procedure=v1_estimation_procedure, + evaluation=v1_evaluation, + flow=v1_flow, + study=v1_study, + run=v1_run, + setup=v1_setup, + ) + + if version == "v1": + return v1 + + # V2 support. Currently v2 is not yet available, + # so get_api_config("v2") raises NotImplementedError. When v2 becomes available, + # its config will be added to Settings._init_from_legacy_config(). + # In strict mode: propagate the error. + # In non-strict mode: silently fall back to v1 only. + try: + v2_config = settings.get_api_config("v2") + except NotImplementedError: + if strict: + raise + # Non-strict mode: fall back to v1 only + return v1 + + v2_http_client = HTTPClient( + server=v2_config.server, + base_url=v2_config.base_url, + api_key=v2_config.api_key, + timeout=v2_config.timeout, + retries=settings.connection.retries, + retry_policy=settings.connection.retry_policy, + cache=http_cache, + ) + v2_dataset = DatasetV2API(v2_http_client, minio_client) + v2_task = TaskV2API(v2_http_client) + v2_evaluation_measure = EvaluationMeasureV2API(v2_http_client) + v2_estimation_procedure = EstimationProcedureV2API(v2_http_client) + v2_evaluation = EvaluationV2API(v2_http_client) + v2_flow = FlowV2API(v2_http_client) + v2_study = StudyV2API(v2_http_client) + v2_run = RunV2API(v2_http_client) + v2_setup = SetupV2API(v2_http_client) + + v2 = cls( + dataset=v2_dataset, + task=v2_task, + evaluation_measure=v2_evaluation_measure, + estimation_procedure=v2_estimation_procedure, + evaluation=v2_evaluation, + flow=v2_flow, + study=v2_study, + run=v2_run, + setup=v2_setup, + ) - if version == "v1": - return v1 - - # V2 support. Currently v2 is not yet available, - # so get_api_config("v2") raises NotImplementedError. When v2 becomes available, - # its config will be added to Settings._init_from_legacy_config(). - # In strict mode: propagate the error. - # In non-strict mode: silently fall back to v1 only. - try: - v2_config = settings.get_api_config("v2") - except NotImplementedError: if strict: - raise - # Non-strict mode: fall back to v1 only - return v1 - - v2_http_client = HTTPClient( - server=v2_config.server, - base_url=v2_config.base_url, - api_key=v2_config.api_key, - timeout=v2_config.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - - v2 = APIBackend( - datasets=DatasetsV2(v2_http_client), - tasks=TasksV2(v2_http_client), - ) - - if strict: - return v2 - - return APIBackend( - datasets=FallbackProxy(DatasetsV2(v2_http_client), DatasetsV1(v1_http_client)), - tasks=FallbackProxy(TasksV2(v2_http_client), TasksV1(v1_http_client)), - ) + return v2 + + fallback_dataset = FallbackProxy(v1_dataset, v2_dataset) + fallback_task = FallbackProxy(v1_task, v2_task) + fallback_evaluation_measure = FallbackProxy(v1_evaluation_measure, v2_evaluation_measure) + fallback_estimation_procedure = FallbackProxy( + v1_estimation_procedure, v2_estimation_procedure + ) + fallback_evaluation = FallbackProxy(v1_evaluation, v2_evaluation) + fallback_flow = FallbackProxy(v1_flow, v2_flow) + fallback_study = FallbackProxy(v1_study, v2_study) + fallback_run = FallbackProxy(v1_run, v2_run) + fallback_setup = FallbackProxy(v1_setup, v2_setup) + + return cls( + dataset=fallback_dataset, + task=fallback_task, + evaluation_measure=fallback_evaluation_measure, + estimation_procedure=fallback_estimation_procedure, + evaluation=fallback_evaluation, + flow=fallback_flow, + study=fallback_study, + run=fallback_run, + setup=fallback_setup, + ) diff --git a/openml/_api/runtime/instance.py b/openml/_api/runtime/instance.py index 0d945b084..633d3f372 100644 --- a/openml/_api/runtime/instance.py +++ b/openml/_api/runtime/instance.py @@ -1,5 +1,5 @@ from __future__ import annotations -from openml._api.runtime.core import APIBackend, build_backend +from openml._api.runtime.core import APIBackend -_backend: APIBackend = build_backend("v1", strict=False) +_backend: APIBackend = APIBackend.build(version="v1", strict=False) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 9eb4c7a91..2203ab6da 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,13 +1,13 @@ import pytest from openml.testing import TestAPIBase -from openml._api.resources.base.versions import ResourceV1 +from openml._api.resources.base.versions import ResourceV1API from openml._api.config import ResourceType -class TestResourceV1(TestAPIBase): +class TestResourceV1API(TestAPIBase): def setUp(self): super().setUp() - self.resource = ResourceV1(self.http_client) + self.resource = ResourceV1API(self.http_client) self.resource.resource_type = ResourceType.TASK @pytest.mark.uses_test_server() From 187813839c57ddb0d12b702f371fe7d08220c963 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 10:37:59 +0500 Subject: [PATCH 40/86] more refactoring with setup/ --- openml/__init__.py | 2 - openml/_api/clients/http.py | 2 +- openml/_api/config.py | 114 ------------- openml/_api/resources/__init__.py | 2 + openml/_api/resources/_registry.py | 48 ++++++ openml/_api/resources/base/base.py | 7 +- openml/_api/resources/base/enums.py | 27 +++ openml/_api/resources/base/resources.py | 11 +- openml/_api/resources/base/versions.py | 2 +- openml/_api/runtime/core.py | 190 --------------------- openml/_api/runtime/instance.py | 5 - openml/_api/{runtime => setup}/__init__.py | 0 openml/_api/setup/builder.py | 71 ++++++++ openml/_api/setup/config.py | 62 +++++++ openml/_api/setup/utils.py | 49 ++++++ openml/testing.py | 2 +- tests/test_api/test_versions.py | 2 +- 17 files changed, 268 insertions(+), 328 deletions(-) delete mode 100644 openml/_api/config.py create mode 100644 openml/_api/resources/_registry.py create mode 100644 openml/_api/resources/base/enums.py delete mode 100644 openml/_api/runtime/core.py delete mode 100644 openml/_api/runtime/instance.py rename openml/_api/{runtime => setup}/__init__.py (100%) create mode 100644 openml/_api/setup/builder.py create mode 100644 openml/_api/setup/config.py create mode 100644 openml/_api/setup/utils.py diff --git a/openml/__init__.py b/openml/__init__.py index a7c95dc2e..ae5db261f 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -33,7 +33,6 @@ utils, ) from .__version__ import __version__ -from ._api.runtime.instance import _backend from .datasets import OpenMLDataFeature, OpenMLDataset from .evaluations import OpenMLEvaluation from .flows import OpenMLFlow @@ -110,7 +109,6 @@ def populate_cache( "OpenMLTask", "__version__", "_api_calls", - "_backend", "config", "datasets", "evaluations", diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index dfcdf5a8a..f700c108a 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -16,7 +16,7 @@ from requests import Response from openml.__version__ import __version__ -from openml._api.config import RetryPolicy +from openml._api.resources.base.enums import RetryPolicy from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/openml/_api/config.py b/openml/_api/config.py deleted file mode 100644 index 3afbf224f..000000000 --- a/openml/_api/config.py +++ /dev/null @@ -1,114 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import Enum - - -class APIVersion(str, Enum): - V1 = "v1" - V2 = "v2" - - -class ResourceType(str, Enum): - DATASET = "dataset" - TASK = "task" - TASK_TYPE = "task_type" - EVALUATION_MEASURE = "evaluation_measure" - ESTIMATION_PROCEDURE = "estimation_procedure" - EVALUATION = "evaluation" - FLOW = "flow" - STUDY = "study" - RUN = "run" - SETUP = "setup" - USER = "user" - - -class RetryPolicy(str, Enum): - HUMAN = "human" - ROBOT = "robot" - - -@dataclass -class APIConfig: - server: str - base_url: str - api_key: str - timeout: int = 10 # seconds - - -@dataclass -class ConnectionConfig: - retries: int = 3 - retry_policy: RetryPolicy = RetryPolicy.HUMAN - - -@dataclass -class CacheConfig: - dir: str = "~/.openml/cache" - ttl: int = 60 * 60 * 24 * 7 # one week - - -class Settings: - """Settings container that reads from openml.config on access.""" - - _instance: Settings | None = None - - def __init__(self) -> None: - self.api_configs: dict[str, APIConfig] = {} - self.connection = ConnectionConfig() - self.cache = CacheConfig() - self._initialized = False - - @classmethod - def get(cls) -> Settings: - """Get settings singleton, creating on first access.""" - if cls._instance is None: - cls._instance = cls() - return cls._instance - - @classmethod - def reset(cls) -> None: - """Reset the settings singleton. Useful for testing.""" - cls._instance = None - - def get_api_config(self, version: str) -> APIConfig: - """Get API config for a version, with lazy initialization from openml.config.""" - if not self._initialized: - self._init_from_legacy_config() - if version not in self.api_configs: - raise NotImplementedError( - f"API {version} is not yet available. " - f"Supported versions: {list(self.api_configs.keys())}" - ) - return self.api_configs[version] - - def _init_from_legacy_config(self) -> None: - """Lazy init from openml.config to avoid circular imports.""" - if self._initialized: - return - - # Import here (not at module level) to avoid circular imports. - # We read from openml.config to integrate with the existing config system - # where users set their API key, server, cache directory, etc. - # This avoids duplicating those settings with hardcoded values. - import openml.config as legacy - - server_url = legacy.server - server_base = server_url.rsplit("/api", 1)[0] + "/" if "/api" in server_url else server_url - - self.api_configs["v1"] = APIConfig( - server=server_base, - base_url="api/v1/xml/", - api_key=legacy.apikey, - ) - - # Sync connection- and cache- settings from legacy config - self.connection = ConnectionConfig( - retries=legacy.connection_n_retries, - retry_policy=RetryPolicy(legacy.retry_policy), - ) - self.cache = CacheConfig( - dir=str(legacy._root_cache_directory), - ) - - self._initialized = True diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index b666c018b..a3dc63798 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,3 +1,4 @@ +from openml._api.resources._registry import API_REGISTRY from openml._api.resources.base.fallback import FallbackProxy from openml._api.resources.dataset import DatasetV1API, DatasetV2API from openml._api.resources.estimation_procedure import ( @@ -13,6 +14,7 @@ from openml._api.resources.task import TaskV1API, TaskV2API __all__ = [ + "API_REGISTRY", "DatasetV1API", "DatasetV2API", "EstimationProcedureV1API", diff --git a/openml/_api/resources/_registry.py b/openml/_api/resources/_registry.py new file mode 100644 index 000000000..e8746f481 --- /dev/null +++ b/openml/_api/resources/_registry.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from openml._api.resources.base.enums import APIVersion, ResourceType +from openml._api.resources.dataset import DatasetV1API, DatasetV2API +from openml._api.resources.estimation_procedure import ( + EstimationProcedureV1API, + EstimationProcedureV2API, +) +from openml._api.resources.evaluation import EvaluationV1API, EvaluationV2API +from openml._api.resources.evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API +from openml._api.resources.flow import FlowV1API, FlowV2API +from openml._api.resources.run import RunV1API, RunV2API +from openml._api.resources.setup import SetupV1API, SetupV2API +from openml._api.resources.study import StudyV1API, StudyV2API +from openml._api.resources.task import TaskV1API, TaskV2API + +if TYPE_CHECKING: + from openml._api.resources.base import ResourceAPI + +API_REGISTRY: dict[ + APIVersion, + dict[ResourceType, type[ResourceAPI]], +] = { + APIVersion.V1: { + ResourceType.DATASET: DatasetV1API, + ResourceType.TASK: TaskV1API, + ResourceType.EVALUATION_MEASURE: EvaluationMeasureV1API, + ResourceType.ESTIMATION_PROCEDURE: EstimationProcedureV1API, + ResourceType.EVALUATION: EvaluationV1API, + ResourceType.FLOW: FlowV1API, + ResourceType.STUDY: StudyV1API, + ResourceType.RUN: RunV1API, + ResourceType.SETUP: SetupV1API, + }, + APIVersion.V2: { + ResourceType.DATASET: DatasetV2API, + ResourceType.TASK: TaskV2API, + ResourceType.EVALUATION_MEASURE: EvaluationMeasureV2API, + ResourceType.ESTIMATION_PROCEDURE: EstimationProcedureV2API, + ResourceType.EVALUATION: EvaluationV2API, + ResourceType.FLOW: FlowV2API, + ResourceType.STUDY: StudyV2API, + ResourceType.RUN: RunV2API, + ResourceType.SETUP: SetupV2API, + }, +} diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index dbe3e95ea..6a47f83f4 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -9,16 +9,17 @@ from collections.abc import Mapping from typing import Any - from openml._api.clients import HTTPClient - from openml._api.config import APIVersion, ResourceType + from openml._api.clients import HTTPClient, MinIOClient + from openml._api.resources.base.enums import APIVersion, ResourceType class ResourceAPI(ABC): api_version: APIVersion resource_type: ResourceType - def __init__(self, http: HTTPClient): + def __init__(self, http: HTTPClient, minio: MinIOClient | None = None): self._http = http + self._minio = minio @abstractmethod def delete(self, resource_id: int) -> bool: ... diff --git a/openml/_api/resources/base/enums.py b/openml/_api/resources/base/enums.py new file mode 100644 index 000000000..13201b3ec --- /dev/null +++ b/openml/_api/resources/base/enums.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from enum import Enum + + +class APIVersion(str, Enum): + V1 = "v1" + V2 = "v2" + + +class ResourceType(str, Enum): + DATASET = "dataset" + TASK = "task" + TASK_TYPE = "task_type" + EVALUATION_MEASURE = "evaluation_measure" + ESTIMATION_PROCEDURE = "estimation_procedure" + EVALUATION = "evaluation" + FLOW = "flow" + STUDY = "study" + RUN = "run" + SETUP = "setup" + USER = "user" + + +class RetryPolicy(str, Enum): + HUMAN = "human" + ROBOT = "robot" diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 200278fc2..270472029 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,21 +1,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING - -from openml._api.config import ResourceType from openml._api.resources.base import ResourceAPI - -if TYPE_CHECKING: - from openml._api.clients import HTTPClient, MinIOClient +from openml._api.resources.base.enums import ResourceType class DatasetAPI(ResourceAPI): resource_type: ResourceType = ResourceType.DATASET - def __init__(self, http: HTTPClient, minio: MinIOClient): - self._minio = minio - super().__init__(http) - class TaskAPI(ResourceAPI): resource_type: ResourceType = ResourceType.TASK diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 88ae87a1c..f8b21a469 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -5,8 +5,8 @@ import xmltodict -from openml._api.config import APIVersion, ResourceType from openml._api.resources.base import ResourceAPI +from openml._api.resources.base.enums import APIVersion, ResourceType from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py deleted file mode 100644 index 9c3ff70a5..000000000 --- a/openml/_api/runtime/core.py +++ /dev/null @@ -1,190 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import TYPE_CHECKING - -from openml._api.clients import HTTPCache, HTTPClient, MinIOClient -from openml._api.config import Settings -from openml._api.resources import ( - DatasetV1API, - DatasetV2API, - EstimationProcedureV1API, - EstimationProcedureV2API, - EvaluationMeasureV1API, - EvaluationMeasureV2API, - EvaluationV1API, - EvaluationV2API, - FallbackProxy, - FlowV1API, - FlowV2API, - RunV1API, - RunV2API, - SetupV1API, - SetupV2API, - StudyV1API, - StudyV2API, - TaskV1API, - TaskV2API, -) - -if TYPE_CHECKING: - from openml._api.resources.base import ( - DatasetAPI, - EstimationProcedureAPI, - EvaluationAPI, - EvaluationMeasureAPI, - FlowAPI, - RunAPI, - SetupAPI, - StudyAPI, - TaskAPI, - ) - - -class APIBackend: - def __init__( # noqa: PLR0913 - self, - *, - dataset: DatasetAPI | FallbackProxy, - task: TaskAPI | FallbackProxy, - evaluation_measure: EvaluationMeasureAPI | FallbackProxy, - estimation_procedure: EstimationProcedureAPI | FallbackProxy, - evaluation: EvaluationAPI | FallbackProxy, - flow: FlowAPI | FallbackProxy, - study: StudyAPI | FallbackProxy, - run: RunAPI | FallbackProxy, - setup: SetupAPI | FallbackProxy, - ): - self.dataset = dataset - self.task = task - self.evaluation_measure = evaluation_measure - self.estimation_procedure = estimation_procedure - self.evaluation = evaluation - self.flow = flow - self.study = study - self.run = run - self.setup = setup - - @classmethod - def build(cls, version: str, *, strict: bool) -> APIBackend: - settings = Settings.get() - - # Get config for v1. On first access, this triggers lazy initialization - # from openml.config, reading the user's actual API key, server URL, - # cache directory, and retry settings. This avoids circular imports - # (openml.config is imported inside the method, not at module load time) - # and ensures we use the user's configured values rather than hardcoded defaults. - v1_config = settings.get_api_config("v1") - - http_cache = HTTPCache( - path=Path(settings.cache.dir).expanduser(), - ttl=settings.cache.ttl, - ) - minio_client = MinIOClient( - path=Path(settings.cache.dir).expanduser(), - ) - - v1_http_client = HTTPClient( - server=v1_config.server, - base_url=v1_config.base_url, - api_key=v1_config.api_key, - timeout=v1_config.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - v1_dataset = DatasetV1API(v1_http_client, minio_client) - v1_task = TaskV1API(v1_http_client) - v1_evaluation_measure = EvaluationMeasureV1API(v1_http_client) - v1_estimation_procedure = EstimationProcedureV1API(v1_http_client) - v1_evaluation = EvaluationV1API(v1_http_client) - v1_flow = FlowV1API(v1_http_client) - v1_study = StudyV1API(v1_http_client) - v1_run = RunV1API(v1_http_client) - v1_setup = SetupV1API(v1_http_client) - - v1 = cls( - dataset=v1_dataset, - task=v1_task, - evaluation_measure=v1_evaluation_measure, - estimation_procedure=v1_estimation_procedure, - evaluation=v1_evaluation, - flow=v1_flow, - study=v1_study, - run=v1_run, - setup=v1_setup, - ) - - if version == "v1": - return v1 - - # V2 support. Currently v2 is not yet available, - # so get_api_config("v2") raises NotImplementedError. When v2 becomes available, - # its config will be added to Settings._init_from_legacy_config(). - # In strict mode: propagate the error. - # In non-strict mode: silently fall back to v1 only. - try: - v2_config = settings.get_api_config("v2") - except NotImplementedError: - if strict: - raise - # Non-strict mode: fall back to v1 only - return v1 - - v2_http_client = HTTPClient( - server=v2_config.server, - base_url=v2_config.base_url, - api_key=v2_config.api_key, - timeout=v2_config.timeout, - retries=settings.connection.retries, - retry_policy=settings.connection.retry_policy, - cache=http_cache, - ) - v2_dataset = DatasetV2API(v2_http_client, minio_client) - v2_task = TaskV2API(v2_http_client) - v2_evaluation_measure = EvaluationMeasureV2API(v2_http_client) - v2_estimation_procedure = EstimationProcedureV2API(v2_http_client) - v2_evaluation = EvaluationV2API(v2_http_client) - v2_flow = FlowV2API(v2_http_client) - v2_study = StudyV2API(v2_http_client) - v2_run = RunV2API(v2_http_client) - v2_setup = SetupV2API(v2_http_client) - - v2 = cls( - dataset=v2_dataset, - task=v2_task, - evaluation_measure=v2_evaluation_measure, - estimation_procedure=v2_estimation_procedure, - evaluation=v2_evaluation, - flow=v2_flow, - study=v2_study, - run=v2_run, - setup=v2_setup, - ) - - if strict: - return v2 - - fallback_dataset = FallbackProxy(v1_dataset, v2_dataset) - fallback_task = FallbackProxy(v1_task, v2_task) - fallback_evaluation_measure = FallbackProxy(v1_evaluation_measure, v2_evaluation_measure) - fallback_estimation_procedure = FallbackProxy( - v1_estimation_procedure, v2_estimation_procedure - ) - fallback_evaluation = FallbackProxy(v1_evaluation, v2_evaluation) - fallback_flow = FallbackProxy(v1_flow, v2_flow) - fallback_study = FallbackProxy(v1_study, v2_study) - fallback_run = FallbackProxy(v1_run, v2_run) - fallback_setup = FallbackProxy(v1_setup, v2_setup) - - return cls( - dataset=fallback_dataset, - task=fallback_task, - evaluation_measure=fallback_evaluation_measure, - estimation_procedure=fallback_estimation_procedure, - evaluation=fallback_evaluation, - flow=fallback_flow, - study=fallback_study, - run=fallback_run, - setup=fallback_setup, - ) diff --git a/openml/_api/runtime/instance.py b/openml/_api/runtime/instance.py deleted file mode 100644 index 633d3f372..000000000 --- a/openml/_api/runtime/instance.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import annotations - -from openml._api.runtime.core import APIBackend - -_backend: APIBackend = APIBackend.build(version="v1", strict=False) diff --git a/openml/_api/runtime/__init__.py b/openml/_api/setup/__init__.py similarity index 100% rename from openml/_api/runtime/__init__.py rename to openml/_api/setup/__init__.py diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py new file mode 100644 index 000000000..4f4b843d7 --- /dev/null +++ b/openml/_api/setup/builder.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from collections.abc import Mapping +from pathlib import Path +from typing import TYPE_CHECKING + +from openml._api.clients import HTTPCache, HTTPClient, MinIOClient +from openml._api.resources import API_REGISTRY, FallbackProxy + +if TYPE_CHECKING: + from openml._api.resources.base import ResourceAPI + from openml._api.resources.base.enums import ResourceType + from openml._api.setup.config import Config + + +class APIBackendBuilder: + def __init__( + self, + resource_apis: Mapping[ResourceType, ResourceAPI | FallbackProxy], + ): + for resource_type, resource_api in resource_apis.items(): + setattr(self, resource_type.value, resource_api) + + @classmethod + def build(cls, config: Config) -> APIBackendBuilder: + cache_dir = Path(config.cache.dir).expanduser() + + http_cache = HTTPCache(path=cache_dir, ttl=config.cache.ttl) + minio_client = MinIOClient(path=cache_dir) + + primary_api_config = config.api_configs[config.api_version] + primary_http_client = HTTPClient( + server=primary_api_config.server, + base_url=primary_api_config.base_url, + api_key=primary_api_config.api_key, + timeout=config.connection.timeout, + retries=config.connection.retries, + retry_policy=config.connection.retry_policy, + cache=http_cache, + ) + + resource_apis: dict[ResourceType, ResourceAPI] = {} + for resource_type, resource_api_cls in API_REGISTRY[config.api_version].items(): + resource_apis[resource_type] = resource_api_cls(primary_http_client, minio_client) + + if config.fallback_api_version is None: + return cls(resource_apis) + + fallback_api_config = config.api_configs[config.fallback_api_version] + fallback_http_client = HTTPClient( + server=fallback_api_config.server, + base_url=fallback_api_config.base_url, + api_key=fallback_api_config.api_key, + timeout=config.connection.timeout, + retries=config.connection.retries, + retry_policy=config.connection.retry_policy, + cache=http_cache, + ) + + fallback_resource_apis: dict[ResourceType, ResourceAPI] = {} + for resource_type, resource_api_cls in API_REGISTRY[config.fallback_api_version].items(): + fallback_resource_apis[resource_type] = resource_api_cls( + fallback_http_client, minio_client + ) + + merged: dict[ResourceType, FallbackProxy] = { + name: FallbackProxy(resource_apis[name], fallback_resource_apis[name]) + for name in resource_apis + } + + return cls(merged) diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py new file mode 100644 index 000000000..0f783a23e --- /dev/null +++ b/openml/_api/setup/config.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from dataclasses import dataclass, field + +from openml._api.resources.base.enums import APIVersion, RetryPolicy +from openml._api.setup.utils import _resolve_default_cache_dir + + +@dataclass +class APIConfig: + server: str + base_url: str + api_key: str + + +@dataclass +class ConnectionConfig: + retries: int + retry_policy: RetryPolicy + timeout: int + + +@dataclass +class CacheConfig: + dir: str + ttl: int + + +@dataclass +class Config: + api_version: APIVersion = APIVersion.V1 + fallback_api_version: APIVersion | None = None + + api_configs: dict[APIVersion, APIConfig] = field( + default_factory=lambda: { + APIVersion.V1: APIConfig( + server="https://www.openml.org/", + base_url="api/v1/xml/", + api_key="", + ), + APIVersion.V2: APIConfig( + server="http://localhost:8002/", + base_url="", + api_key="", + ), + } + ) + + connection: ConnectionConfig = field( + default_factory=lambda: ConnectionConfig( + retries=5, + retry_policy=RetryPolicy.HUMAN, + timeout=10, + ) + ) + + cache: CacheConfig = field( + default_factory=lambda: CacheConfig( + dir=str(_resolve_default_cache_dir()), + ttl=60 * 60 * 24 * 7, + ) + ) diff --git a/openml/_api/setup/utils.py b/openml/_api/setup/utils.py new file mode 100644 index 000000000..ddcf5b41c --- /dev/null +++ b/openml/_api/setup/utils.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import logging +import os +import platform +from pathlib import Path + +openml_logger = logging.getLogger("openml") + +# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) +_user_path = Path("~").expanduser().absolute() + + +def _resolve_default_cache_dir() -> Path: + user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") + if user_defined_cache_dir is not None: + return Path(user_defined_cache_dir) + + if platform.system().lower() != "linux": + return _user_path / ".openml" + + xdg_cache_home = os.environ.get("XDG_CACHE_HOME") + if xdg_cache_home is None: + return Path("~", ".cache", "openml") + + # This is the proper XDG_CACHE_HOME directory, but + # we unfortunately had a problem where we used XDG_CACHE_HOME/org, + # we check heuristically if this old directory still exists and issue + # a warning if it does. There's too much data to move to do this for the user. + + # The new cache directory exists + cache_dir = Path(xdg_cache_home) / "openml" + if cache_dir.exists(): + return cache_dir + + # The old cache directory *does not* exist + heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" + if not heuristic_dir_for_backwards_compat.exists(): + return cache_dir + + root_dir_to_delete = Path(xdg_cache_home) / "org" + openml_logger.warning( + "An old cache directory was found at '%s'. This directory is no longer used by " + "OpenML-Python. To silence this warning you would need to delete the old cache " + "directory. The cached files will then be located in '%s'.", + root_dir_to_delete, + cache_dir, + ) + return Path(xdg_cache_home) diff --git a/openml/testing.py b/openml/testing.py index b0aaac9be..18e03fb86 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -17,7 +17,7 @@ import openml from openml._api.clients import HTTPCache, HTTPClient -from openml._api.config import RetryPolicy +from openml._api.resources.base.enums import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 2203ab6da..fd41feb2a 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,7 +1,7 @@ import pytest from openml.testing import TestAPIBase from openml._api.resources.base.versions import ResourceV1API -from openml._api.config import ResourceType +from openml._api.resources.base.enums import ResourceType class TestResourceV1API(TestAPIBase): From dc26e016e02b4ed23961f148234398582b152e6f Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 10:40:03 +0500 Subject: [PATCH 41/86] implement APIBackend as controller --- openml/__init__.py | 2 ++ openml/_api/setup/_instance.py | 5 +++ openml/_api/setup/backend.py | 62 ++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 openml/_api/setup/_instance.py create mode 100644 openml/_api/setup/backend.py diff --git a/openml/__init__.py b/openml/__init__.py index ae5db261f..fdf3b90e4 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -33,6 +33,7 @@ utils, ) from .__version__ import __version__ +from ._api.setup._instance import _backend from .datasets import OpenMLDataFeature, OpenMLDataset from .evaluations import OpenMLEvaluation from .flows import OpenMLFlow @@ -109,6 +110,7 @@ def populate_cache( "OpenMLTask", "__version__", "_api_calls", + "_backend", "config", "datasets", "evaluations", diff --git a/openml/_api/setup/_instance.py b/openml/_api/setup/_instance.py new file mode 100644 index 000000000..2d9818a0d --- /dev/null +++ b/openml/_api/setup/_instance.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from openml._api.setup.backend import APIBackend + +_backend = APIBackend.get_instance() diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py new file mode 100644 index 000000000..7c300e143 --- /dev/null +++ b/openml/_api/setup/backend.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from copy import deepcopy +from typing import Any + +from openml._api.setup.builder import APIBackendBuilder +from openml._api.setup.config import Config + + +class APIBackend: + _instance: APIBackend | None = None + + def __init__(self, config: Config | None = None): + self._config: Config = config or Config() + self._backend = APIBackendBuilder.build(self._config) + + def __getattr__(self, name: str) -> Any: + """ + Delegate attribute access to the underlying backend. + Called only if attribute is not found on RuntimeBackend. + """ + return getattr(self._backend, name) + + @classmethod + def get_instance(cls) -> APIBackend: + if cls._instance is None: + cls._instance = cls() + return cls._instance + + @classmethod + def get_config(cls) -> Config: + return deepcopy(cls.get_instance()._config) + + @classmethod + def set_config(cls, config: Config) -> None: + instance = cls.get_instance() + instance._config = config + instance._backend = APIBackendBuilder.build(config) + + @classmethod + def get_config_value(cls, key: str) -> Config: + keys = key.split(".") + config_value = cls.get_instance()._config + for k in keys: + if isinstance(config_value, dict): + config_value = config_value[k] + else: + config_value = getattr(config_value, k) + return deepcopy(config_value) + + @classmethod + def set_config_value(cls, key: str, value: Any) -> None: + keys = key.split(".") + config = cls.get_instance()._config + parent = config + for k in keys[:-1]: + parent = parent[k] if isinstance(parent, dict) else getattr(parent, k) + if isinstance(parent, dict): + parent[keys[-1]] = value + else: + setattr(parent, keys[-1], value) + cls.set_config(config) From e2d059b110da6d6b1355773b5b1b35689e977dca Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 12:05:33 +0500 Subject: [PATCH 42/86] move enums --- openml/_api/clients/http.py | 2 +- openml/_api/resources/_registry.py | 2 +- openml/_api/resources/base/base.py | 2 +- openml/_api/resources/base/resources.py | 2 +- openml/_api/resources/base/versions.py | 2 +- openml/_api/setup/builder.py | 2 +- openml/_api/setup/config.py | 2 +- openml/{_api/resources/base => }/enums.py | 6 ++++++ openml/testing.py | 2 +- tests/test_api/test_versions.py | 2 +- 10 files changed, 15 insertions(+), 9 deletions(-) rename openml/{_api/resources/base => }/enums.py (76%) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index f700c108a..353cd5e9e 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -16,7 +16,7 @@ from requests import Response from openml.__version__ import __version__ -from openml._api.resources.base.enums import RetryPolicy +from openml.enums import RetryPolicy from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/openml/_api/resources/_registry.py b/openml/_api/resources/_registry.py index e8746f481..b1a5f2b74 100644 --- a/openml/_api/resources/_registry.py +++ b/openml/_api/resources/_registry.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING -from openml._api.resources.base.enums import APIVersion, ResourceType from openml._api.resources.dataset import DatasetV1API, DatasetV2API from openml._api.resources.estimation_procedure import ( EstimationProcedureV1API, @@ -15,6 +14,7 @@ from openml._api.resources.setup import SetupV1API, SetupV2API from openml._api.resources.study import StudyV1API, StudyV2API from openml._api.resources.task import TaskV1API, TaskV2API +from openml.enums import APIVersion, ResourceType if TYPE_CHECKING: from openml._api.resources.base import ResourceAPI diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 6a47f83f4..5eadc4932 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -10,7 +10,7 @@ from typing import Any from openml._api.clients import HTTPClient, MinIOClient - from openml._api.resources.base.enums import APIVersion, ResourceType + from openml.enums import APIVersion, ResourceType class ResourceAPI(ABC): diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 270472029..5c4dde9de 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,7 +1,7 @@ from __future__ import annotations from openml._api.resources.base import ResourceAPI -from openml._api.resources.base.enums import ResourceType +from openml.enums import ResourceType class DatasetAPI(ResourceAPI): diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index f8b21a469..a98a0ad43 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -6,7 +6,7 @@ import xmltodict from openml._api.resources.base import ResourceAPI -from openml._api.resources.base.enums import APIVersion, ResourceType +from openml.enums import APIVersion, ResourceType from openml.exceptions import ( OpenMLNotAuthorizedError, OpenMLServerError, diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 4f4b843d7..135b18da3 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -9,8 +9,8 @@ if TYPE_CHECKING: from openml._api.resources.base import ResourceAPI - from openml._api.resources.base.enums import ResourceType from openml._api.setup.config import Config + from openml.enums import ResourceType class APIBackendBuilder: diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 0f783a23e..64e790404 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -2,8 +2,8 @@ from dataclasses import dataclass, field -from openml._api.resources.base.enums import APIVersion, RetryPolicy from openml._api.setup.utils import _resolve_default_cache_dir +from openml.enums import APIVersion, RetryPolicy @dataclass diff --git a/openml/_api/resources/base/enums.py b/openml/enums.py similarity index 76% rename from openml/_api/resources/base/enums.py rename to openml/enums.py index 13201b3ec..f5a4381b7 100644 --- a/openml/_api/resources/base/enums.py +++ b/openml/enums.py @@ -4,11 +4,15 @@ class APIVersion(str, Enum): + """Supported OpenML API versions.""" + V1 = "v1" V2 = "v2" class ResourceType(str, Enum): + """Canonical resource types exposed by the OpenML API.""" + DATASET = "dataset" TASK = "task" TASK_TYPE = "task_type" @@ -23,5 +27,7 @@ class ResourceType(str, Enum): class RetryPolicy(str, Enum): + """Retry behavior for failed API requests.""" + HUMAN = "human" ROBOT = "robot" diff --git a/openml/testing.py b/openml/testing.py index 18e03fb86..3ca2d1b76 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -17,7 +17,7 @@ import openml from openml._api.clients import HTTPCache, HTTPClient -from openml._api.resources.base.enums import RetryPolicy +from openml.enums import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index fd41feb2a..a7451f3ae 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,7 +1,7 @@ import pytest from openml.testing import TestAPIBase from openml._api.resources.base.versions import ResourceV1API -from openml._api.resources.base.enums import ResourceType +from openml.enums import ResourceType class TestResourceV1API(TestAPIBase): From d156ad4e6f1c1d2488242419baf20f5e5fa0e219 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 12:21:17 +0500 Subject: [PATCH 43/86] module level imports --- openml/_api/__init__.py | 69 +++++++++++++++++++ openml/_api/resources/__init__.py | 23 ++++--- openml/_api/resources/_registry.py | 23 ++++--- openml/_api/resources/base/__init__.py | 8 +-- openml/_api/resources/base/resources.py | 3 +- openml/_api/resources/base/versions.py | 3 +- openml/_api/resources/dataset.py | 2 +- openml/_api/resources/estimation_procedure.py | 2 +- openml/_api/resources/evaluation.py | 2 +- openml/_api/resources/evaluation_measure.py | 2 +- openml/_api/resources/flow.py | 2 +- openml/_api/resources/run.py | 2 +- openml/_api/resources/setup.py | 2 +- openml/_api/resources/study.py | 2 +- openml/_api/resources/task.py | 2 +- openml/_api/setup/__init__.py | 12 ++++ openml/_api/setup/_instance.py | 2 +- openml/_api/setup/backend.py | 4 +- openml/_api/setup/builder.py | 6 +- openml/_api/setup/config.py | 3 +- openml/_api/setup/utils.py | 49 ------------- 21 files changed, 130 insertions(+), 93 deletions(-) delete mode 100644 openml/_api/setup/utils.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index e69de29bb..25bc2f262 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -0,0 +1,69 @@ +from .clients import ( + HTTPCache, + HTTPClient, + MinIOClient, +) +from .resources import ( + API_REGISTRY, + DatasetV1API, + DatasetV2API, + EstimationProcedureV1API, + EstimationProcedureV2API, + EvaluationMeasureV1API, + EvaluationMeasureV2API, + EvaluationV1API, + EvaluationV2API, + FallbackProxy, + FlowV1API, + FlowV2API, + ResourceAPI, + RunV1API, + RunV2API, + SetupV1API, + SetupV2API, + StudyV1API, + StudyV2API, + TaskV1API, + TaskV2API, +) +from .setup import ( + APIBackend, + APIBackendBuilder, + APIConfig, + CacheConfig, + Config, + ConnectionConfig, +) + +__all__ = [ + "API_REGISTRY", + "APIBackend", + "APIBackendBuilder", + "APIConfig", + "CacheConfig", + "Config", + "ConnectionConfig", + "DatasetV1API", + "DatasetV2API", + "EstimationProcedureV1API", + "EstimationProcedureV2API", + "EvaluationMeasureV1API", + "EvaluationMeasureV2API", + "EvaluationV1API", + "EvaluationV2API", + "FallbackProxy", + "FlowV1API", + "FlowV2API", + "HTTPCache", + "HTTPClient", + "MinIOClient", + "ResourceAPI", + "RunV1API", + "RunV2API", + "SetupV1API", + "SetupV2API", + "StudyV1API", + "StudyV2API", + "TaskV1API", + "TaskV2API", +] diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index a3dc63798..863ec0f72 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,17 +1,17 @@ -from openml._api.resources._registry import API_REGISTRY -from openml._api.resources.base.fallback import FallbackProxy -from openml._api.resources.dataset import DatasetV1API, DatasetV2API -from openml._api.resources.estimation_procedure import ( +from ._registry import API_REGISTRY +from .base import FallbackProxy, ResourceAPI +from .dataset import DatasetV1API, DatasetV2API +from .estimation_procedure import ( EstimationProcedureV1API, EstimationProcedureV2API, ) -from openml._api.resources.evaluation import EvaluationV1API, EvaluationV2API -from openml._api.resources.evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API -from openml._api.resources.flow import FlowV1API, FlowV2API -from openml._api.resources.run import RunV1API, RunV2API -from openml._api.resources.setup import SetupV1API, SetupV2API -from openml._api.resources.study import StudyV1API, StudyV2API -from openml._api.resources.task import TaskV1API, TaskV2API +from .evaluation import EvaluationV1API, EvaluationV2API +from .evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API +from .flow import FlowV1API, FlowV2API +from .run import RunV1API, RunV2API +from .setup import SetupV1API, SetupV2API +from .study import StudyV1API, StudyV2API +from .task import TaskV1API, TaskV2API __all__ = [ "API_REGISTRY", @@ -26,6 +26,7 @@ "FallbackProxy", "FlowV1API", "FlowV2API", + "ResourceAPI", "RunV1API", "RunV2API", "SetupV1API", diff --git a/openml/_api/resources/_registry.py b/openml/_api/resources/_registry.py index b1a5f2b74..66d7ec428 100644 --- a/openml/_api/resources/_registry.py +++ b/openml/_api/resources/_registry.py @@ -2,22 +2,23 @@ from typing import TYPE_CHECKING -from openml._api.resources.dataset import DatasetV1API, DatasetV2API -from openml._api.resources.estimation_procedure import ( +from openml.enums import APIVersion, ResourceType + +from .dataset import DatasetV1API, DatasetV2API +from .estimation_procedure import ( EstimationProcedureV1API, EstimationProcedureV2API, ) -from openml._api.resources.evaluation import EvaluationV1API, EvaluationV2API -from openml._api.resources.evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API -from openml._api.resources.flow import FlowV1API, FlowV2API -from openml._api.resources.run import RunV1API, RunV2API -from openml._api.resources.setup import SetupV1API, SetupV2API -from openml._api.resources.study import StudyV1API, StudyV2API -from openml._api.resources.task import TaskV1API, TaskV2API -from openml.enums import APIVersion, ResourceType +from .evaluation import EvaluationV1API, EvaluationV2API +from .evaluation_measure import EvaluationMeasureV1API, EvaluationMeasureV2API +from .flow import FlowV1API, FlowV2API +from .run import RunV1API, RunV2API +from .setup import SetupV1API, SetupV2API +from .study import StudyV1API, StudyV2API +from .task import TaskV1API, TaskV2API if TYPE_CHECKING: - from openml._api.resources.base import ResourceAPI + from .base import ResourceAPI API_REGISTRY: dict[ APIVersion, diff --git a/openml/_api/resources/base/__init__.py b/openml/_api/resources/base/__init__.py index f222a0b87..ed6dc26f7 100644 --- a/openml/_api/resources/base/__init__.py +++ b/openml/_api/resources/base/__init__.py @@ -1,6 +1,6 @@ -from openml._api.resources.base.base import ResourceAPI -from openml._api.resources.base.fallback import FallbackProxy -from openml._api.resources.base.resources import ( +from .base import ResourceAPI +from .fallback import FallbackProxy +from .resources import ( DatasetAPI, EstimationProcedureAPI, EvaluationAPI, @@ -11,7 +11,7 @@ StudyAPI, TaskAPI, ) -from openml._api.resources.base.versions import ResourceV1API, ResourceV2API +from .versions import ResourceV1API, ResourceV2API __all__ = [ "DatasetAPI", diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 5c4dde9de..8ccd5776e 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,8 +1,9 @@ from __future__ import annotations -from openml._api.resources.base import ResourceAPI from openml.enums import ResourceType +from .base import ResourceAPI + class DatasetAPI(ResourceAPI): resource_type: ResourceType = ResourceType.DATASET diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index a98a0ad43..b86272377 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -5,7 +5,6 @@ import xmltodict -from openml._api.resources.base import ResourceAPI from openml.enums import APIVersion, ResourceType from openml.exceptions import ( OpenMLNotAuthorizedError, @@ -13,6 +12,8 @@ OpenMLServerException, ) +from .base import ResourceAPI + class ResourceV1API(ResourceAPI): api_version: APIVersion = APIVersion.V1 diff --git a/openml/_api/resources/dataset.py b/openml/_api/resources/dataset.py index 3ecad35da..51688a2fd 100644 --- a/openml/_api/resources/dataset.py +++ b/openml/_api/resources/dataset.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import DatasetAPI, ResourceV1API, ResourceV2API +from .base import DatasetAPI, ResourceV1API, ResourceV2API class DatasetV1API(ResourceV1API, DatasetAPI): diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py index d2e73cfa6..b8ea7d2c3 100644 --- a/openml/_api/resources/estimation_procedure.py +++ b/openml/_api/resources/estimation_procedure.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import EstimationProcedureAPI, ResourceV1API, ResourceV2API +from .base import EstimationProcedureAPI, ResourceV1API, ResourceV2API class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI): diff --git a/openml/_api/resources/evaluation.py b/openml/_api/resources/evaluation.py index a0149e1e5..07877e14e 100644 --- a/openml/_api/resources/evaluation.py +++ b/openml/_api/resources/evaluation.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import EvaluationAPI, ResourceV1API, ResourceV2API +from .base import EvaluationAPI, ResourceV1API, ResourceV2API class EvaluationV1API(ResourceV1API, EvaluationAPI): diff --git a/openml/_api/resources/evaluation_measure.py b/openml/_api/resources/evaluation_measure.py index bd4318417..63cf16c77 100644 --- a/openml/_api/resources/evaluation_measure.py +++ b/openml/_api/resources/evaluation_measure.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import EvaluationMeasureAPI, ResourceV1API, ResourceV2API +from .base import EvaluationMeasureAPI, ResourceV1API, ResourceV2API class EvaluationMeasureV1API(ResourceV1API, EvaluationMeasureAPI): diff --git a/openml/_api/resources/flow.py b/openml/_api/resources/flow.py index 3b62abd3f..ad2e05bd9 100644 --- a/openml/_api/resources/flow.py +++ b/openml/_api/resources/flow.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import FlowAPI, ResourceV1API, ResourceV2API +from .base import FlowAPI, ResourceV1API, ResourceV2API class FlowV1API(ResourceV1API, FlowAPI): diff --git a/openml/_api/resources/run.py b/openml/_api/resources/run.py index 9698c59dd..151c69e35 100644 --- a/openml/_api/resources/run.py +++ b/openml/_api/resources/run.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import ResourceV1API, ResourceV2API, RunAPI +from .base import ResourceV1API, ResourceV2API, RunAPI class RunV1API(ResourceV1API, RunAPI): diff --git a/openml/_api/resources/setup.py b/openml/_api/resources/setup.py index e948e1b38..78a36cecc 100644 --- a/openml/_api/resources/setup.py +++ b/openml/_api/resources/setup.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import ResourceV1API, ResourceV2API, SetupAPI +from .base import ResourceV1API, ResourceV2API, SetupAPI class SetupV1API(ResourceV1API, SetupAPI): diff --git a/openml/_api/resources/study.py b/openml/_api/resources/study.py index 8de5868d1..cefd55004 100644 --- a/openml/_api/resources/study.py +++ b/openml/_api/resources/study.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import ResourceV1API, ResourceV2API, StudyAPI +from .base import ResourceV1API, ResourceV2API, StudyAPI class StudyV1API(ResourceV1API, StudyAPI): diff --git a/openml/_api/resources/task.py b/openml/_api/resources/task.py index a97d5f726..a367c9aa1 100644 --- a/openml/_api/resources/task.py +++ b/openml/_api/resources/task.py @@ -1,6 +1,6 @@ from __future__ import annotations -from openml._api.resources.base import ResourceV1API, ResourceV2API, TaskAPI +from .base import ResourceV1API, ResourceV2API, TaskAPI class TaskV1API(ResourceV1API, TaskAPI): diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py index e69de29bb..7f8c65ba3 100644 --- a/openml/_api/setup/__init__.py +++ b/openml/_api/setup/__init__.py @@ -0,0 +1,12 @@ +from .backend import APIBackend +from .builder import APIBackendBuilder +from .config import APIConfig, CacheConfig, Config, ConnectionConfig + +__all__ = [ + "APIBackend", + "APIBackendBuilder", + "APIConfig", + "CacheConfig", + "Config", + "ConnectionConfig", +] diff --git a/openml/_api/setup/_instance.py b/openml/_api/setup/_instance.py index 2d9818a0d..c98ccaf57 100644 --- a/openml/_api/setup/_instance.py +++ b/openml/_api/setup/_instance.py @@ -1,5 +1,5 @@ from __future__ import annotations -from openml._api.setup.backend import APIBackend +from .backend import APIBackend _backend = APIBackend.get_instance() diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index 7c300e143..f0faf5165 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -3,8 +3,8 @@ from copy import deepcopy from typing import Any -from openml._api.setup.builder import APIBackendBuilder -from openml._api.setup.config import Config +from .builder import APIBackendBuilder +from .config import Config class APIBackend: diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 135b18da3..750db431a 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -5,13 +5,13 @@ from typing import TYPE_CHECKING from openml._api.clients import HTTPCache, HTTPClient, MinIOClient -from openml._api.resources import API_REGISTRY, FallbackProxy +from openml._api.resources import API_REGISTRY, FallbackProxy, ResourceAPI if TYPE_CHECKING: - from openml._api.resources.base import ResourceAPI - from openml._api.setup.config import Config from openml.enums import ResourceType + from .config import Config + class APIBackendBuilder: def __init__( diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 64e790404..ea868262a 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -2,9 +2,10 @@ from dataclasses import dataclass, field -from openml._api.setup.utils import _resolve_default_cache_dir from openml.enums import APIVersion, RetryPolicy +from ._utils import _resolve_default_cache_dir + @dataclass class APIConfig: diff --git a/openml/_api/setup/utils.py b/openml/_api/setup/utils.py deleted file mode 100644 index ddcf5b41c..000000000 --- a/openml/_api/setup/utils.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import annotations - -import logging -import os -import platform -from pathlib import Path - -openml_logger = logging.getLogger("openml") - -# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) -_user_path = Path("~").expanduser().absolute() - - -def _resolve_default_cache_dir() -> Path: - user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") - if user_defined_cache_dir is not None: - return Path(user_defined_cache_dir) - - if platform.system().lower() != "linux": - return _user_path / ".openml" - - xdg_cache_home = os.environ.get("XDG_CACHE_HOME") - if xdg_cache_home is None: - return Path("~", ".cache", "openml") - - # This is the proper XDG_CACHE_HOME directory, but - # we unfortunately had a problem where we used XDG_CACHE_HOME/org, - # we check heuristically if this old directory still exists and issue - # a warning if it does. There's too much data to move to do this for the user. - - # The new cache directory exists - cache_dir = Path(xdg_cache_home) / "openml" - if cache_dir.exists(): - return cache_dir - - # The old cache directory *does not* exist - heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" - if not heuristic_dir_for_backwards_compat.exists(): - return cache_dir - - root_dir_to_delete = Path(xdg_cache_home) / "org" - openml_logger.warning( - "An old cache directory was found at '%s'. This directory is no longer used by " - "OpenML-Python. To silence this warning you would need to delete the old cache " - "directory. The cached files will then be located in '%s'.", - root_dir_to_delete, - cache_dir, - ) - return Path(xdg_cache_home) From d7a37884cc18fee1509cd43fcec696dd0efbf466 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 12:24:43 +0500 Subject: [PATCH 44/86] module level import for _backend --- openml/__init__.py | 2 +- openml/_api/__init__.py | 2 ++ openml/_api/setup/__init__.py | 2 ++ openml/_api/setup/_utils.py | 49 +++++++++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 openml/_api/setup/_utils.py diff --git a/openml/__init__.py b/openml/__init__.py index fdf3b90e4..21dda24ad 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -33,7 +33,7 @@ utils, ) from .__version__ import __version__ -from ._api.setup._instance import _backend +from ._api import _backend from .datasets import OpenMLDataFeature, OpenMLDataset from .evaluations import OpenMLEvaluation from .flows import OpenMLFlow diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 25bc2f262..2d4651431 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -33,6 +33,7 @@ CacheConfig, Config, ConnectionConfig, + _backend, ) __all__ = [ @@ -66,4 +67,5 @@ "StudyV2API", "TaskV1API", "TaskV2API", + "_backend", ] diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py index 7f8c65ba3..1c28cfa9e 100644 --- a/openml/_api/setup/__init__.py +++ b/openml/_api/setup/__init__.py @@ -1,3 +1,4 @@ +from ._instance import _backend from .backend import APIBackend from .builder import APIBackendBuilder from .config import APIConfig, CacheConfig, Config, ConnectionConfig @@ -9,4 +10,5 @@ "CacheConfig", "Config", "ConnectionConfig", + "_backend", ] diff --git a/openml/_api/setup/_utils.py b/openml/_api/setup/_utils.py new file mode 100644 index 000000000..ddcf5b41c --- /dev/null +++ b/openml/_api/setup/_utils.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import logging +import os +import platform +from pathlib import Path + +openml_logger = logging.getLogger("openml") + +# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) +_user_path = Path("~").expanduser().absolute() + + +def _resolve_default_cache_dir() -> Path: + user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") + if user_defined_cache_dir is not None: + return Path(user_defined_cache_dir) + + if platform.system().lower() != "linux": + return _user_path / ".openml" + + xdg_cache_home = os.environ.get("XDG_CACHE_HOME") + if xdg_cache_home is None: + return Path("~", ".cache", "openml") + + # This is the proper XDG_CACHE_HOME directory, but + # we unfortunately had a problem where we used XDG_CACHE_HOME/org, + # we check heuristically if this old directory still exists and issue + # a warning if it does. There's too much data to move to do this for the user. + + # The new cache directory exists + cache_dir = Path(xdg_cache_home) / "openml" + if cache_dir.exists(): + return cache_dir + + # The old cache directory *does not* exist + heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" + if not heuristic_dir_for_backwards_compat.exists(): + return cache_dir + + root_dir_to_delete = Path(xdg_cache_home) / "org" + openml_logger.warning( + "An old cache directory was found at '%s'. This directory is no longer used by " + "OpenML-Python. To silence this warning you would need to delete the old cache " + "directory. The cached files will then be located in '%s'.", + root_dir_to_delete, + cache_dir, + ) + return Path(xdg_cache_home) From b5b9ef60047cff083e30ab7eb6cb66f02baa1ff6 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 12:29:12 +0500 Subject: [PATCH 45/86] module level import for tests --- openml/_api/__init__.py | 24 ++++++++++++++++++++++++ openml/_api/resources/__init__.py | 29 ++++++++++++++++++++++++++++- openml/testing.py | 2 +- tests/test_api/test_versions.py | 2 +- 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 2d4651431..926fee3d4 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -5,24 +5,35 @@ ) from .resources import ( API_REGISTRY, + DatasetAPI, DatasetV1API, DatasetV2API, + EstimationProcedureAPI, EstimationProcedureV1API, EstimationProcedureV2API, + EvaluationAPI, + EvaluationMeasureAPI, EvaluationMeasureV1API, EvaluationMeasureV2API, EvaluationV1API, EvaluationV2API, FallbackProxy, + FlowAPI, FlowV1API, FlowV2API, ResourceAPI, + ResourceV1API, + ResourceV2API, + RunAPI, RunV1API, RunV2API, + SetupAPI, SetupV1API, SetupV2API, + StudyAPI, StudyV1API, StudyV2API, + TaskAPI, TaskV1API, TaskV2API, ) @@ -44,27 +55,40 @@ "CacheConfig", "Config", "ConnectionConfig", + "DatasetAPI", "DatasetV1API", "DatasetV2API", + "EstimationProcedureAPI", "EstimationProcedureV1API", "EstimationProcedureV2API", + "EvaluationAPI", + "EvaluationMeasureAPI", "EvaluationMeasureV1API", "EvaluationMeasureV2API", "EvaluationV1API", "EvaluationV2API", "FallbackProxy", + "FallbackProxy", + "FlowAPI", "FlowV1API", "FlowV2API", "HTTPCache", "HTTPClient", "MinIOClient", "ResourceAPI", + "ResourceAPI", + "ResourceV1API", + "ResourceV2API", + "RunAPI", "RunV1API", "RunV2API", + "SetupAPI", "SetupV1API", "SetupV2API", + "StudyAPI", "StudyV1API", "StudyV2API", + "TaskAPI", "TaskV1API", "TaskV2API", "_backend", diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index 863ec0f72..1f0b2caa1 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,5 +1,19 @@ from ._registry import API_REGISTRY -from .base import FallbackProxy, ResourceAPI +from .base import ( + DatasetAPI, + EstimationProcedureAPI, + EvaluationAPI, + EvaluationMeasureAPI, + FallbackProxy, + FlowAPI, + ResourceAPI, + ResourceV1API, + ResourceV2API, + RunAPI, + SetupAPI, + StudyAPI, + TaskAPI, +) from .dataset import DatasetV1API, DatasetV2API from .estimation_procedure import ( EstimationProcedureV1API, @@ -15,24 +29,37 @@ __all__ = [ "API_REGISTRY", + "DatasetAPI", "DatasetV1API", "DatasetV2API", + "EstimationProcedureAPI", "EstimationProcedureV1API", "EstimationProcedureV2API", + "EvaluationAPI", + "EvaluationMeasureAPI", "EvaluationMeasureV1API", "EvaluationMeasureV2API", "EvaluationV1API", "EvaluationV2API", "FallbackProxy", + "FallbackProxy", + "FlowAPI", "FlowV1API", "FlowV2API", "ResourceAPI", + "ResourceAPI", + "ResourceV1API", + "ResourceV2API", + "RunAPI", "RunV1API", "RunV2API", + "SetupAPI", "SetupV1API", "SetupV2API", + "StudyAPI", "StudyV1API", "StudyV2API", + "TaskAPI", "TaskV1API", "TaskV2API", ] diff --git a/openml/testing.py b/openml/testing.py index 3ca2d1b76..a971aa1c3 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -16,7 +16,7 @@ import requests import openml -from openml._api.clients import HTTPCache, HTTPClient +from openml._api import HTTPCache, HTTPClient from openml.enums import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index a7451f3ae..2507a3cd5 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,6 +1,6 @@ import pytest from openml.testing import TestAPIBase -from openml._api.resources.base.versions import ResourceV1API +from openml._api import ResourceV1API from openml.enums import ResourceType From 567eca4096d1332d1db07f8646a3733c241885f3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 2 Feb 2026 13:00:38 +0500 Subject: [PATCH 46/86] add test: test_tag_and_untag --- tests/test_api/test_versions.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 2507a3cd5..6a4cad97d 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,3 +1,4 @@ +from time import time import pytest from openml.testing import TestAPIBase from openml._api import ResourceV1API @@ -41,4 +42,12 @@ def test_publish_and_delete(self): @pytest.mark.uses_test_server() def test_tag_and_untag(self): - pass + resource_id = 1 + unique_indicator = str(time()).replace(".", "") + tag = f"TestResourceV1API_test_tag_and_untag_{unique_indicator}" + + tags = self.resource.tag(resource_id, tag) + self.assertIn(tag, tags) + + tags = self.resource.untag(resource_id, tag) + self.assertNotIn(tag, tags) From b2287c32f5637a755f6b2e95c5472308969ef252 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 10:06:20 +0500 Subject: [PATCH 47/86] implement get/set_config_values --- openml/_api/setup/backend.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index f0faf5165..d8cf83f03 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -38,7 +38,7 @@ def set_config(cls, config: Config) -> None: instance._backend = APIBackendBuilder.build(config) @classmethod - def get_config_value(cls, key: str) -> Config: + def get_config_value(cls, key: str) -> Any: keys = key.split(".") config_value = cls.get_instance()._config for k in keys: @@ -60,3 +60,16 @@ def set_config_value(cls, key: str, value: Any) -> None: else: setattr(parent, keys[-1], value) cls.set_config(config) + + @classmethod + def get_config_values(cls, keys: list[str]) -> list[Any]: + values = [] + for key in keys: + value = cls.get_config_value(key) + values.append(value) + return values + + @classmethod + def set_config_values(cls, config_dict: dict[str, Any]) -> None: + for key, value in config_dict.items(): + cls.set_config_value(key, value) From b7e285eaafadabe88b7d4e0f42edc1f72459a2ee Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:22:36 +0500 Subject: [PATCH 48/86] improve APIBackend.set_config_values --- openml/_api/setup/backend.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index d8cf83f03..4dd0f4390 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -71,5 +71,16 @@ def get_config_values(cls, keys: list[str]) -> list[Any]: @classmethod def set_config_values(cls, config_dict: dict[str, Any]) -> None: + config = cls.get_instance()._config + for key, value in config_dict.items(): - cls.set_config_value(key, value) + keys = key.split(".") + parent = config + for k in keys[:-1]: + parent = parent[k] if isinstance(parent, dict) else getattr(parent, k) + if isinstance(parent, dict): + parent[keys[-1]] = value + else: + setattr(parent, keys[-1], value) + + cls.set_config(config) From fd43c489523c1a95e84bc2a95bf2caedd44262c2 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:24:24 +0500 Subject: [PATCH 49/86] use LegacyConfig --- openml/__init__.py | 7 +++- openml/_api_calls.py | 19 +++++----- openml/{config.py => _config.py} | 36 +++++++++++++++++++ openml/_legacy_config.py | 19 ++++++++++ openml/base.py | 2 +- openml/cli.py | 14 ++++---- openml/datasets/dataset.py | 6 ++-- openml/datasets/functions.py | 6 ++-- openml/evaluations/evaluation.py | 1 - openml/runs/functions.py | 18 +++++----- openml/setups/functions.py | 5 ++- openml/setups/setup.py | 1 - openml/study/functions.py | 2 +- openml/study/study.py | 4 +-- openml/tasks/task.py | 2 +- openml/utils.py | 6 ++-- .../test_evaluations_example.py | 5 ++- tests/test_openml/test_api_calls.py | 1 - tests/test_openml/test_config.py | 2 +- 19 files changed, 106 insertions(+), 50 deletions(-) rename openml/{config.py => _config.py} (95%) create mode 100644 openml/_legacy_config.py diff --git a/openml/__init__.py b/openml/__init__.py index 21dda24ad..30f38f5f0 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -20,7 +20,8 @@ from . import ( _api_calls, - config, + _config, + _legacy_config, datasets, evaluations, exceptions, @@ -50,6 +51,8 @@ OpenMLTask, ) +config = _legacy_config.LegacyConfig + def populate_cache( task_ids: list[int] | None = None, @@ -111,6 +114,8 @@ def populate_cache( "__version__", "_api_calls", "_backend", + "_config", + "_legacy_config", "config", "datasets", "evaluations", diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 9e53bd9fa..21d5c4391 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -19,7 +19,8 @@ import xmltodict from urllib3 import ProxyManager -from . import config +import openml + from .__version__ import __version__ from .exceptions import ( OpenMLHashException, @@ -70,7 +71,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url = config.server + url: str = openml.config.server if not url.endswith("/"): url += "/" url += endpoint @@ -171,7 +172,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if config.show_progress else None, + progress=ProgressBar() if openml.config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -300,7 +301,7 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_url = config.server.split("/api/") + openml_url: str = openml.config.server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: url += "/" + filename @@ -316,7 +317,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = config.apikey + data["api_key"] = openml.config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -336,8 +337,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if config.apikey: - data["api_key"] = config.apikey + if openml.config.apikey: + data["api_key"] = openml.config.apikey return _send_request( request_method=request_method, url=url, @@ -362,10 +363,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, config.connection_n_retries) + n_retries = max(1, openml.config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if openml.config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/config.py b/openml/_config.py similarity index 95% rename from openml/config.py rename to openml/_config.py index e6104fd7f..c266ae9d9 100644 --- a/openml/config.py +++ b/openml/_config.py @@ -18,6 +18,8 @@ from typing_extensions import TypedDict from urllib.parse import urlparse +from openml.enums import RetryPolicy + logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") console_handler: logging.StreamHandler | None = None @@ -206,6 +208,8 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N retry_policy = value connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries + _sync_api_config() + class ConfigurationForExamples: """Allows easy switching to and from a test configuration, used for examples.""" @@ -244,6 +248,8 @@ def start_using_configuration_for_example(cls) -> None: stacklevel=2, ) + _sync_api_config() + @classmethod def stop_using_configuration_for_example(cls) -> None: """Return to configuration as it was before `start_use_example_configuration`.""" @@ -262,6 +268,8 @@ def stop_using_configuration_for_example(cls) -> None: apikey = cast("str", cls._last_used_key) cls._start_last_called = False + _sync_api_config() + def _handle_xdg_config_home_backwards_compatibility( xdg_home: str, @@ -374,6 +382,8 @@ def _setup(config: _Config | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() + _sync_api_config() + try: cache_exists = _root_cache_directory.exists() # create the cache subdirectory @@ -408,6 +418,8 @@ def set_field_in_config_file(field: str, value: Any) -> None: if value is not None: fh.write(f"{f} = {value}\n") + _sync_api_config() + def _parse_config(config_file: str | Path) -> _Config: """Parse the config file, set up defaults.""" @@ -495,6 +507,8 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: global _root_cache_directory # noqa: PLW0603 _root_cache_directory = Path(root_cache_directory) + _sync_api_config() + start_using_configuration_for_example = ( ConfigurationForExamples.start_using_configuration_for_example @@ -514,6 +528,28 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: _setup(existing_config) +def _sync_api_config() -> None: + """Sync the new API config with the legacy config in this file.""" + from ._api import APIBackend + + p = urlparse(server) + v1_server = f"{p.scheme}://{p.netloc}/" + v1_base_url = p.path.lstrip("/") + connection_retry_policy = RetryPolicy.HUMAN if retry_policy == "human" else RetryPolicy.ROBOT + cache_dir = str(_root_cache_directory) + + APIBackend.set_config_values( + { + "api_configs.v1.server": v1_server, + "api_configs.v1.base_url": v1_base_url, + "api_configs.v1.api_key": apikey, + "cache.dir": cache_dir, + "connection.retry_policy": connection_retry_policy, + "connection.retries": connection_n_retries, + } + ) + + __all__ = [ "get_cache_directory", "get_config_as_dict", diff --git a/openml/_legacy_config.py b/openml/_legacy_config.py new file mode 100644 index 000000000..b26b13c01 --- /dev/null +++ b/openml/_legacy_config.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import Any + + +class LegacyConfigMeta(type): + def __getattr__(cls, name: str) -> Any: + import openml + + return getattr(openml._config, name) + + def __setattr__(cls, name: str, value: Any) -> None: + import openml + + setattr(openml._config, name, value) + + +class LegacyConfig(metaclass=LegacyConfigMeta): + pass diff --git a/openml/base.py b/openml/base.py index a282be8eb..f79bc2931 100644 --- a/openml/base.py +++ b/openml/base.py @@ -8,8 +8,8 @@ import xmltodict +import openml import openml._api_calls -import openml.config from .utils import _get_rest_api_type_alias, _tag_openml_base diff --git a/openml/cli.py b/openml/cli.py index 0afb089c2..2120449e8 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -9,7 +9,7 @@ from pathlib import Path from urllib.parse import urlparse -from openml import config +import openml from openml.__version__ import __version__ @@ -59,17 +59,17 @@ def wait_until_valid_input( def print_configuration() -> None: - file = config.determine_config_file_path() + file = openml.config.determine_config_file_path() header = f"File '{file}' contains (or defaults to):" print(header) - max_key_length = max(map(len, config.get_config_as_dict())) - for field, value in config.get_config_as_dict().items(): + max_key_length = max(map(len, openml.config.get_config_as_dict())) + for field, value in openml.config.get_config_as_dict().items(): print(f"{field.ljust(max_key_length)}: {value}") def verbose_set(field: str, value: str) -> None: - config.set_field_in_config_file(field, value) + openml.config.set_field_in_config_file(field, value) print(f"{field} set to '{value}'.") @@ -82,7 +82,7 @@ def check_apikey(apikey: str) -> str: return "" instructions = ( - f"Your current API key is set to: '{config.apikey}'. " + f"Your current API key is set to: '{openml.config.apikey}'. " "You can get an API key at https://new.openml.org. " "You must create an account if you don't have one yet:\n" " 1. Log in with the account.\n" @@ -347,7 +347,7 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in config._defaults if f not in ["max_retries"]] + configurable_fields = [f for f in openml.config._defaults if f not in ["max_retries"]] parser_configure.add_argument( "field", diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index d9eee278d..59d6205ba 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -17,8 +17,8 @@ import scipy.sparse import xmltodict +import openml from openml.base import OpenMLBase -from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from .data_feature import OpenMLDataFeature @@ -375,7 +375,9 @@ def _download_data(self) -> None: # import required here to avoid circular import. from .functions import _get_dataset_arff, _get_dataset_parquet - skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + skip_parquet = ( + os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + ) if self._parquet_url is not None and not skip_parquet: parquet_file = _get_dataset_parquet(self) self.parquet_file = None if parquet_file is None else str(parquet_file) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 3ac657ea0..432938520 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -19,9 +19,9 @@ import xmltodict from scipy.sparse import coo_matrix +import openml import openml._api_calls import openml.utils -from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from openml.exceptions import ( OpenMLHashException, OpenMLPrivateDatasetError, @@ -492,7 +492,9 @@ def get_dataset( # noqa: C901, PLR0912 qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id) parquet_file = None - skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + skip_parquet = ( + os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" + ) download_parquet = "oml:parquet_url" in description and not skip_parquet if download_parquet and (download_data or download_all_files): try: diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py index 5db087024..87df8454a 100644 --- a/openml/evaluations/evaluation.py +++ b/openml/evaluations/evaluation.py @@ -3,7 +3,6 @@ from dataclasses import asdict, dataclass -import openml.config import openml.datasets import openml.flows import openml.runs diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 503788dbd..914a3b46b 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -18,7 +18,6 @@ import openml import openml._api_calls import openml.utils -from openml import config from openml.exceptions import ( OpenMLCacheException, OpenMLRunsExistError, @@ -45,7 +44,6 @@ # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: - from openml.config import _Config from openml.extensions.extension_interface import Extension # get_dict is in run.py to avoid circular imports @@ -107,7 +105,7 @@ def run_model_on_task( # noqa: PLR0913 """ if avoid_duplicate_runs is None: avoid_duplicate_runs = openml.config.avoid_duplicate_runs - if avoid_duplicate_runs and not config.apikey: + if avoid_duplicate_runs and not openml.config.apikey: warnings.warn( "avoid_duplicate_runs is set to True, but no API key is set. " "Please set your API key in the OpenML configuration file, see" @@ -336,7 +334,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 message = f"Executed Task {task.task_id} with Flow id:{run.flow_id}" else: message = f"Executed Task {task.task_id} on local Flow with name {flow.name}." - config.logger.info(message) + openml.config.logger.info(message) return run @@ -528,7 +526,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 # The forked child process may not copy the configuration state of OpenML from the parent. # Current configuration setup needs to be copied and passed to the child processes. - _config = config.get_config_as_dict() + _config = openml.config.get_config_as_dict() # Execute runs in parallel # assuming the same number of tasks as workers (n_jobs), the total compute time for this # statement will be similar to the slowest run @@ -551,7 +549,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 rep_no=rep_no, sample_no=sample_no, task=task, - configuration=_config, + configuration=openml.config._Config, ) for _n_fit, rep_no, fold_no, sample_no in jobs ) # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs` @@ -694,7 +692,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 rep_no: int, sample_no: int, task: OpenMLTask, - configuration: _Config | None = None, + configuration: openml.config._Config | None = None, # type: ignore[name-defined] ) -> tuple[ np.ndarray, pd.DataFrame | None, @@ -719,7 +717,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 Sample number to be run. task : OpenMLTask The task object from OpenML. - configuration : _Config + configuration : openml.config._Config Hyperparameters to configure the model. Returns @@ -733,7 +731,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 """ # Sets up the OpenML instantiated in the child process to match that of the parent's # if configuration=None, loads the default - config._setup(configuration) + openml.config._setup(configuration) train_indices, test_indices = task.get_train_test_split_indices( repeat=rep_no, @@ -762,7 +760,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 f"task_class={task.__class__.__name__}" ) - config.logger.info( + openml.config.logger.info( f"Going to run model {model!s} on " f"dataset {openml.datasets.get_dataset(task.dataset_id).name} " f"for repeat {rep_no} fold {fold_no} sample {sample_no}" diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 4bf279ed1..a24d3a456 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -14,7 +14,6 @@ import openml import openml.exceptions import openml.utils -from openml import config from openml.flows import OpenMLFlow, flow_exists from .setup import OpenMLParameter, OpenMLSetup @@ -84,7 +83,7 @@ def _get_cached_setup(setup_id: int) -> OpenMLSetup: OpenMLCacheException If the setup file for the given setup ID is not cached. """ - cache_dir = Path(config.get_cache_directory()) + cache_dir = Path(openml.config.get_cache_directory()) setup_cache_dir = cache_dir / "setups" / str(setup_id) try: setup_file = setup_cache_dir / "description.xml" @@ -112,7 +111,7 @@ def get_setup(setup_id: int) -> OpenMLSetup: ------- OpenMLSetup (an initialized openml setup object) """ - setup_dir = Path(config.get_cache_directory()) / "setups" / str(setup_id) + setup_dir = Path(openml.config.get_cache_directory()) / "setups" / str(setup_id) setup_dir.mkdir(exist_ok=True, parents=True) setup_file = setup_dir / "description.xml" diff --git a/openml/setups/setup.py b/openml/setups/setup.py index 0960ad4c1..6c63b88ef 100644 --- a/openml/setups/setup.py +++ b/openml/setups/setup.py @@ -3,7 +3,6 @@ from typing import Any -import openml.config import openml.flows diff --git a/openml/study/functions.py b/openml/study/functions.py index bb24ddcff..367537773 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -8,8 +8,8 @@ import pandas as pd import xmltodict +import openml import openml._api_calls -import openml.config import openml.utils from openml.study.study import OpenMLBenchmarkSuite, OpenMLStudy diff --git a/openml/study/study.py b/openml/study/study.py index 7a9c80bbe..803c6455b 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -5,8 +5,8 @@ from collections.abc import Sequence from typing import Any +import openml from openml.base import OpenMLBase -from openml.config import get_server_base_url class BaseStudy(OpenMLBase): @@ -111,7 +111,7 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str]]]: fields["ID"] = self.study_id fields["Study URL"] = self.openml_url if self.creator is not None: - fields["Creator"] = f"{get_server_base_url()}/u/{self.creator}" + fields["Creator"] = f"{openml.config.get_server_base_url()}/u/{self.creator}" if self.creation_date is not None: fields["Upload Time"] = self.creation_date.replace("T", " ") if self.data is not None: diff --git a/openml/tasks/task.py b/openml/tasks/task.py index b297a105c..202abac32 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -11,8 +11,8 @@ from typing import TYPE_CHECKING, Any from typing_extensions import TypedDict +import openml import openml._api_calls -import openml.config from openml import datasets from openml.base import OpenMLBase from openml.utils import _create_cache_directory_for_id diff --git a/openml/utils.py b/openml/utils.py index 3680bc0ff..daa86ab50 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -19,8 +19,6 @@ import openml._api_calls import openml.exceptions -from . import config - # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: from openml.base import OpenMLBase @@ -329,7 +327,7 @@ def _list_all( # noqa: C901 def _get_cache_dir_for_key(key: str) -> Path: - return Path(config.get_cache_directory()) / key + return Path(openml.config.get_cache_directory()) / key def _create_cache_directory(key: str) -> Path: @@ -429,7 +427,7 @@ def safe_func(*args: P.args, **kwargs: P.kwargs) -> R: def _create_lockfiles_dir() -> Path: - path = Path(config.get_cache_directory()) / "locks" + path = Path(openml.config.get_cache_directory()) / "locks" # TODO(eddiebergman): Not sure why this is allowed to error and ignore??? with contextlib.suppress(OSError): path.mkdir(exist_ok=True, parents=True) diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py index a9ad7e8c1..7ea25e55c 100644 --- a/tests/test_evaluations/test_evaluations_example.py +++ b/tests/test_evaluations/test_evaluations_example.py @@ -2,15 +2,14 @@ from __future__ import annotations import unittest - -from openml.config import overwrite_config_context +import openml class TestEvaluationsExample(unittest.TestCase): def test_example_python_paper(self): # Example script which will appear in the upcoming OpenML-Python paper # This test ensures that the example will keep running! - with overwrite_config_context( + with openml.config.overwrite_config_context( { "server": "https://www.openml.org/api/v1/xml", "apikey": None, diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py index a295259ef..6b1cc64b1 100644 --- a/tests/test_openml/test_api_calls.py +++ b/tests/test_openml/test_api_calls.py @@ -9,7 +9,6 @@ import pytest import openml -from openml.config import ConfigurationForExamples import openml.testing from openml._api_calls import _download_minio_bucket, API_TOKEN_HELP_LINK diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 7ef223504..bcb37dcec 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -12,7 +12,7 @@ import pytest -import openml.config +import openml import openml.testing from openml.testing import TestBase From f4aab6bc2191a94ed37aed2dea0e837630baba11 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:24:43 +0500 Subject: [PATCH 50/86] Revert "use LegacyConfig" This reverts commit fd43c489523c1a95e84bc2a95bf2caedd44262c2. --- openml/__init__.py | 7 +--- openml/_api_calls.py | 19 +++++----- openml/_legacy_config.py | 19 ---------- openml/base.py | 2 +- openml/cli.py | 14 ++++---- openml/{_config.py => config.py} | 36 ------------------- openml/datasets/dataset.py | 6 ++-- openml/datasets/functions.py | 6 ++-- openml/evaluations/evaluation.py | 1 + openml/runs/functions.py | 18 +++++----- openml/setups/functions.py | 5 +-- openml/setups/setup.py | 1 + openml/study/functions.py | 2 +- openml/study/study.py | 4 +-- openml/tasks/task.py | 2 +- openml/utils.py | 6 ++-- .../test_evaluations_example.py | 5 +-- tests/test_openml/test_api_calls.py | 1 + tests/test_openml/test_config.py | 2 +- 19 files changed, 50 insertions(+), 106 deletions(-) delete mode 100644 openml/_legacy_config.py rename openml/{_config.py => config.py} (95%) diff --git a/openml/__init__.py b/openml/__init__.py index 30f38f5f0..21dda24ad 100644 --- a/openml/__init__.py +++ b/openml/__init__.py @@ -20,8 +20,7 @@ from . import ( _api_calls, - _config, - _legacy_config, + config, datasets, evaluations, exceptions, @@ -51,8 +50,6 @@ OpenMLTask, ) -config = _legacy_config.LegacyConfig - def populate_cache( task_ids: list[int] | None = None, @@ -114,8 +111,6 @@ def populate_cache( "__version__", "_api_calls", "_backend", - "_config", - "_legacy_config", "config", "datasets", "evaluations", diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 21d5c4391..9e53bd9fa 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -19,8 +19,7 @@ import xmltodict from urllib3 import ProxyManager -import openml - +from . import config from .__version__ import __version__ from .exceptions import ( OpenMLHashException, @@ -71,7 +70,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url: str = openml.config.server + url = config.server if not url.endswith("/"): url += "/" url += endpoint @@ -172,7 +171,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if openml.config.show_progress else None, + progress=ProgressBar() if config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -301,7 +300,7 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_url: str = openml.config.server.split("/api/") + openml_url = config.server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: url += "/" + filename @@ -317,7 +316,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = openml.config.apikey + data["api_key"] = config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -337,8 +336,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if openml.config.apikey: - data["api_key"] = openml.config.apikey + if config.apikey: + data["api_key"] = config.apikey return _send_request( request_method=request_method, url=url, @@ -363,10 +362,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, openml.config.connection_n_retries) + n_retries = max(1, config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if openml.config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/_legacy_config.py b/openml/_legacy_config.py deleted file mode 100644 index b26b13c01..000000000 --- a/openml/_legacy_config.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import annotations - -from typing import Any - - -class LegacyConfigMeta(type): - def __getattr__(cls, name: str) -> Any: - import openml - - return getattr(openml._config, name) - - def __setattr__(cls, name: str, value: Any) -> None: - import openml - - setattr(openml._config, name, value) - - -class LegacyConfig(metaclass=LegacyConfigMeta): - pass diff --git a/openml/base.py b/openml/base.py index f79bc2931..a282be8eb 100644 --- a/openml/base.py +++ b/openml/base.py @@ -8,8 +8,8 @@ import xmltodict -import openml import openml._api_calls +import openml.config from .utils import _get_rest_api_type_alias, _tag_openml_base diff --git a/openml/cli.py b/openml/cli.py index 2120449e8..0afb089c2 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -9,7 +9,7 @@ from pathlib import Path from urllib.parse import urlparse -import openml +from openml import config from openml.__version__ import __version__ @@ -59,17 +59,17 @@ def wait_until_valid_input( def print_configuration() -> None: - file = openml.config.determine_config_file_path() + file = config.determine_config_file_path() header = f"File '{file}' contains (or defaults to):" print(header) - max_key_length = max(map(len, openml.config.get_config_as_dict())) - for field, value in openml.config.get_config_as_dict().items(): + max_key_length = max(map(len, config.get_config_as_dict())) + for field, value in config.get_config_as_dict().items(): print(f"{field.ljust(max_key_length)}: {value}") def verbose_set(field: str, value: str) -> None: - openml.config.set_field_in_config_file(field, value) + config.set_field_in_config_file(field, value) print(f"{field} set to '{value}'.") @@ -82,7 +82,7 @@ def check_apikey(apikey: str) -> str: return "" instructions = ( - f"Your current API key is set to: '{openml.config.apikey}'. " + f"Your current API key is set to: '{config.apikey}'. " "You can get an API key at https://new.openml.org. " "You must create an account if you don't have one yet:\n" " 1. Log in with the account.\n" @@ -347,7 +347,7 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in openml.config._defaults if f not in ["max_retries"]] + configurable_fields = [f for f in config._defaults if f not in ["max_retries"]] parser_configure.add_argument( "field", diff --git a/openml/_config.py b/openml/config.py similarity index 95% rename from openml/_config.py rename to openml/config.py index c266ae9d9..e6104fd7f 100644 --- a/openml/_config.py +++ b/openml/config.py @@ -18,8 +18,6 @@ from typing_extensions import TypedDict from urllib.parse import urlparse -from openml.enums import RetryPolicy - logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") console_handler: logging.StreamHandler | None = None @@ -208,8 +206,6 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N retry_policy = value connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries - _sync_api_config() - class ConfigurationForExamples: """Allows easy switching to and from a test configuration, used for examples.""" @@ -248,8 +244,6 @@ def start_using_configuration_for_example(cls) -> None: stacklevel=2, ) - _sync_api_config() - @classmethod def stop_using_configuration_for_example(cls) -> None: """Return to configuration as it was before `start_use_example_configuration`.""" @@ -268,8 +262,6 @@ def stop_using_configuration_for_example(cls) -> None: apikey = cast("str", cls._last_used_key) cls._start_last_called = False - _sync_api_config() - def _handle_xdg_config_home_backwards_compatibility( xdg_home: str, @@ -382,8 +374,6 @@ def _setup(config: _Config | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() - _sync_api_config() - try: cache_exists = _root_cache_directory.exists() # create the cache subdirectory @@ -418,8 +408,6 @@ def set_field_in_config_file(field: str, value: Any) -> None: if value is not None: fh.write(f"{f} = {value}\n") - _sync_api_config() - def _parse_config(config_file: str | Path) -> _Config: """Parse the config file, set up defaults.""" @@ -507,8 +495,6 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: global _root_cache_directory # noqa: PLW0603 _root_cache_directory = Path(root_cache_directory) - _sync_api_config() - start_using_configuration_for_example = ( ConfigurationForExamples.start_using_configuration_for_example @@ -528,28 +514,6 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: _setup(existing_config) -def _sync_api_config() -> None: - """Sync the new API config with the legacy config in this file.""" - from ._api import APIBackend - - p = urlparse(server) - v1_server = f"{p.scheme}://{p.netloc}/" - v1_base_url = p.path.lstrip("/") - connection_retry_policy = RetryPolicy.HUMAN if retry_policy == "human" else RetryPolicy.ROBOT - cache_dir = str(_root_cache_directory) - - APIBackend.set_config_values( - { - "api_configs.v1.server": v1_server, - "api_configs.v1.base_url": v1_base_url, - "api_configs.v1.api_key": apikey, - "cache.dir": cache_dir, - "connection.retry_policy": connection_retry_policy, - "connection.retries": connection_n_retries, - } - ) - - __all__ = [ "get_cache_directory", "get_config_as_dict", diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index 59d6205ba..d9eee278d 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -17,8 +17,8 @@ import scipy.sparse import xmltodict -import openml from openml.base import OpenMLBase +from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from .data_feature import OpenMLDataFeature @@ -375,9 +375,7 @@ def _download_data(self) -> None: # import required here to avoid circular import. from .functions import _get_dataset_arff, _get_dataset_parquet - skip_parquet = ( - os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" - ) + skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" if self._parquet_url is not None and not skip_parquet: parquet_file = _get_dataset_parquet(self) self.parquet_file = None if parquet_file is None else str(parquet_file) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 432938520..3ac657ea0 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -19,9 +19,9 @@ import xmltodict from scipy.sparse import coo_matrix -import openml import openml._api_calls import openml.utils +from openml.config import OPENML_SKIP_PARQUET_ENV_VAR from openml.exceptions import ( OpenMLHashException, OpenMLPrivateDatasetError, @@ -492,9 +492,7 @@ def get_dataset( # noqa: C901, PLR0912 qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id) parquet_file = None - skip_parquet = ( - os.environ.get(openml.config.OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" - ) + skip_parquet = os.environ.get(OPENML_SKIP_PARQUET_ENV_VAR, "false").casefold() == "true" download_parquet = "oml:parquet_url" in description and not skip_parquet if download_parquet and (download_data or download_all_files): try: diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py index 87df8454a..5db087024 100644 --- a/openml/evaluations/evaluation.py +++ b/openml/evaluations/evaluation.py @@ -3,6 +3,7 @@ from dataclasses import asdict, dataclass +import openml.config import openml.datasets import openml.flows import openml.runs diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 914a3b46b..503788dbd 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -18,6 +18,7 @@ import openml import openml._api_calls import openml.utils +from openml import config from openml.exceptions import ( OpenMLCacheException, OpenMLRunsExistError, @@ -44,6 +45,7 @@ # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: + from openml.config import _Config from openml.extensions.extension_interface import Extension # get_dict is in run.py to avoid circular imports @@ -105,7 +107,7 @@ def run_model_on_task( # noqa: PLR0913 """ if avoid_duplicate_runs is None: avoid_duplicate_runs = openml.config.avoid_duplicate_runs - if avoid_duplicate_runs and not openml.config.apikey: + if avoid_duplicate_runs and not config.apikey: warnings.warn( "avoid_duplicate_runs is set to True, but no API key is set. " "Please set your API key in the OpenML configuration file, see" @@ -334,7 +336,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 message = f"Executed Task {task.task_id} with Flow id:{run.flow_id}" else: message = f"Executed Task {task.task_id} on local Flow with name {flow.name}." - openml.config.logger.info(message) + config.logger.info(message) return run @@ -526,7 +528,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 # The forked child process may not copy the configuration state of OpenML from the parent. # Current configuration setup needs to be copied and passed to the child processes. - _config = openml.config.get_config_as_dict() + _config = config.get_config_as_dict() # Execute runs in parallel # assuming the same number of tasks as workers (n_jobs), the total compute time for this # statement will be similar to the slowest run @@ -549,7 +551,7 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901 rep_no=rep_no, sample_no=sample_no, task=task, - configuration=openml.config._Config, + configuration=_config, ) for _n_fit, rep_no, fold_no, sample_no in jobs ) # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs` @@ -692,7 +694,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 rep_no: int, sample_no: int, task: OpenMLTask, - configuration: openml.config._Config | None = None, # type: ignore[name-defined] + configuration: _Config | None = None, ) -> tuple[ np.ndarray, pd.DataFrame | None, @@ -717,7 +719,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 Sample number to be run. task : OpenMLTask The task object from OpenML. - configuration : openml.config._Config + configuration : _Config Hyperparameters to configure the model. Returns @@ -731,7 +733,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 """ # Sets up the OpenML instantiated in the child process to match that of the parent's # if configuration=None, loads the default - openml.config._setup(configuration) + config._setup(configuration) train_indices, test_indices = task.get_train_test_split_indices( repeat=rep_no, @@ -760,7 +762,7 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 f"task_class={task.__class__.__name__}" ) - openml.config.logger.info( + config.logger.info( f"Going to run model {model!s} on " f"dataset {openml.datasets.get_dataset(task.dataset_id).name} " f"for repeat {rep_no} fold {fold_no} sample {sample_no}" diff --git a/openml/setups/functions.py b/openml/setups/functions.py index a24d3a456..4bf279ed1 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -14,6 +14,7 @@ import openml import openml.exceptions import openml.utils +from openml import config from openml.flows import OpenMLFlow, flow_exists from .setup import OpenMLParameter, OpenMLSetup @@ -83,7 +84,7 @@ def _get_cached_setup(setup_id: int) -> OpenMLSetup: OpenMLCacheException If the setup file for the given setup ID is not cached. """ - cache_dir = Path(openml.config.get_cache_directory()) + cache_dir = Path(config.get_cache_directory()) setup_cache_dir = cache_dir / "setups" / str(setup_id) try: setup_file = setup_cache_dir / "description.xml" @@ -111,7 +112,7 @@ def get_setup(setup_id: int) -> OpenMLSetup: ------- OpenMLSetup (an initialized openml setup object) """ - setup_dir = Path(openml.config.get_cache_directory()) / "setups" / str(setup_id) + setup_dir = Path(config.get_cache_directory()) / "setups" / str(setup_id) setup_dir.mkdir(exist_ok=True, parents=True) setup_file = setup_dir / "description.xml" diff --git a/openml/setups/setup.py b/openml/setups/setup.py index 6c63b88ef..0960ad4c1 100644 --- a/openml/setups/setup.py +++ b/openml/setups/setup.py @@ -3,6 +3,7 @@ from typing import Any +import openml.config import openml.flows diff --git a/openml/study/functions.py b/openml/study/functions.py index 367537773..bb24ddcff 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -8,8 +8,8 @@ import pandas as pd import xmltodict -import openml import openml._api_calls +import openml.config import openml.utils from openml.study.study import OpenMLBenchmarkSuite, OpenMLStudy diff --git a/openml/study/study.py b/openml/study/study.py index 803c6455b..7a9c80bbe 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -5,8 +5,8 @@ from collections.abc import Sequence from typing import Any -import openml from openml.base import OpenMLBase +from openml.config import get_server_base_url class BaseStudy(OpenMLBase): @@ -111,7 +111,7 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str]]]: fields["ID"] = self.study_id fields["Study URL"] = self.openml_url if self.creator is not None: - fields["Creator"] = f"{openml.config.get_server_base_url()}/u/{self.creator}" + fields["Creator"] = f"{get_server_base_url()}/u/{self.creator}" if self.creation_date is not None: fields["Upload Time"] = self.creation_date.replace("T", " ") if self.data is not None: diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 202abac32..b297a105c 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -11,8 +11,8 @@ from typing import TYPE_CHECKING, Any from typing_extensions import TypedDict -import openml import openml._api_calls +import openml.config from openml import datasets from openml.base import OpenMLBase from openml.utils import _create_cache_directory_for_id diff --git a/openml/utils.py b/openml/utils.py index daa86ab50..3680bc0ff 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -19,6 +19,8 @@ import openml._api_calls import openml.exceptions +from . import config + # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: from openml.base import OpenMLBase @@ -327,7 +329,7 @@ def _list_all( # noqa: C901 def _get_cache_dir_for_key(key: str) -> Path: - return Path(openml.config.get_cache_directory()) / key + return Path(config.get_cache_directory()) / key def _create_cache_directory(key: str) -> Path: @@ -427,7 +429,7 @@ def safe_func(*args: P.args, **kwargs: P.kwargs) -> R: def _create_lockfiles_dir() -> Path: - path = Path(openml.config.get_cache_directory()) / "locks" + path = Path(config.get_cache_directory()) / "locks" # TODO(eddiebergman): Not sure why this is allowed to error and ignore??? with contextlib.suppress(OSError): path.mkdir(exist_ok=True, parents=True) diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py index 7ea25e55c..a9ad7e8c1 100644 --- a/tests/test_evaluations/test_evaluations_example.py +++ b/tests/test_evaluations/test_evaluations_example.py @@ -2,14 +2,15 @@ from __future__ import annotations import unittest -import openml + +from openml.config import overwrite_config_context class TestEvaluationsExample(unittest.TestCase): def test_example_python_paper(self): # Example script which will appear in the upcoming OpenML-Python paper # This test ensures that the example will keep running! - with openml.config.overwrite_config_context( + with overwrite_config_context( { "server": "https://www.openml.org/api/v1/xml", "apikey": None, diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py index 6b1cc64b1..a295259ef 100644 --- a/tests/test_openml/test_api_calls.py +++ b/tests/test_openml/test_api_calls.py @@ -9,6 +9,7 @@ import pytest import openml +from openml.config import ConfigurationForExamples import openml.testing from openml._api_calls import _download_minio_bucket, API_TOKEN_HELP_LINK diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index bcb37dcec..7ef223504 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -12,7 +12,7 @@ import pytest -import openml +import openml.config import openml.testing from openml.testing import TestBase From d43cf86f3869392976d70fdbeba0d140ac1e04f3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:35:57 +0500 Subject: [PATCH 51/86] implement _sync_api_config --- openml/config.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/openml/config.py b/openml/config.py index e6104fd7f..c266ae9d9 100644 --- a/openml/config.py +++ b/openml/config.py @@ -18,6 +18,8 @@ from typing_extensions import TypedDict from urllib.parse import urlparse +from openml.enums import RetryPolicy + logger = logging.getLogger(__name__) openml_logger = logging.getLogger("openml") console_handler: logging.StreamHandler | None = None @@ -206,6 +208,8 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N retry_policy = value connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries + _sync_api_config() + class ConfigurationForExamples: """Allows easy switching to and from a test configuration, used for examples.""" @@ -244,6 +248,8 @@ def start_using_configuration_for_example(cls) -> None: stacklevel=2, ) + _sync_api_config() + @classmethod def stop_using_configuration_for_example(cls) -> None: """Return to configuration as it was before `start_use_example_configuration`.""" @@ -262,6 +268,8 @@ def stop_using_configuration_for_example(cls) -> None: apikey = cast("str", cls._last_used_key) cls._start_last_called = False + _sync_api_config() + def _handle_xdg_config_home_backwards_compatibility( xdg_home: str, @@ -374,6 +382,8 @@ def _setup(config: _Config | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() + _sync_api_config() + try: cache_exists = _root_cache_directory.exists() # create the cache subdirectory @@ -408,6 +418,8 @@ def set_field_in_config_file(field: str, value: Any) -> None: if value is not None: fh.write(f"{f} = {value}\n") + _sync_api_config() + def _parse_config(config_file: str | Path) -> _Config: """Parse the config file, set up defaults.""" @@ -495,6 +507,8 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: global _root_cache_directory # noqa: PLW0603 _root_cache_directory = Path(root_cache_directory) + _sync_api_config() + start_using_configuration_for_example = ( ConfigurationForExamples.start_using_configuration_for_example @@ -514,6 +528,28 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: _setup(existing_config) +def _sync_api_config() -> None: + """Sync the new API config with the legacy config in this file.""" + from ._api import APIBackend + + p = urlparse(server) + v1_server = f"{p.scheme}://{p.netloc}/" + v1_base_url = p.path.lstrip("/") + connection_retry_policy = RetryPolicy.HUMAN if retry_policy == "human" else RetryPolicy.ROBOT + cache_dir = str(_root_cache_directory) + + APIBackend.set_config_values( + { + "api_configs.v1.server": v1_server, + "api_configs.v1.base_url": v1_base_url, + "api_configs.v1.api_key": apikey, + "cache.dir": cache_dir, + "connection.retry_policy": connection_retry_policy, + "connection.retries": connection_n_retries, + } + ) + + __all__ = [ "get_cache_directory", "get_config_as_dict", From 3e323edff1787e01f8f9aa74e419f3f27fc9400b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:36:18 +0500 Subject: [PATCH 52/86] update tests with _sync_api_config --- openml/testing.py | 3 +++ tests/conftest.py | 3 +++ tests/test_datasets/test_dataset_functions.py | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/openml/testing.py b/openml/testing.py index a971aa1c3..a3d137916 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -110,6 +110,7 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: self.retry_policy = openml.config.retry_policy self.connection_n_retries = openml.config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) + openml.config._sync_api_config() def use_production_server(self) -> None: """ @@ -119,6 +120,7 @@ def use_production_server(self) -> None: """ openml.config.server = self.production_server openml.config.apikey = "" + openml.config._sync_api_config() def tearDown(self) -> None: """Tear down the test""" @@ -132,6 +134,7 @@ def tearDown(self) -> None: openml.config.connection_n_retries = self.connection_n_retries openml.config.retry_policy = self.retry_policy + openml.config._sync_api_config() @classmethod def _mark_entity_for_removal( diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..bcf93bd72 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -99,6 +99,7 @@ def delete_remote_files(tracker, flow_names) -> None: """ openml.config.server = TestBase.test_server openml.config.apikey = TestBase.user_key + openml.config._sync_api_config() # reordering to delete sub flows at the end of flows # sub-flows have shorter names, hence, sorting by descending order of flow name length @@ -275,10 +276,12 @@ def with_server(request): if "production" in request.keywords: openml.config.server = "https://www.openml.org/api/v1/xml" openml.config.apikey = None + openml.config._sync_api_config() yield return openml.config.server = "https://test.openml.org/api/v1/xml" openml.config.apikey = TestBase.user_key + openml.config._sync_api_config() yield diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index c41664ba7..39a6c9cae 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -158,6 +158,7 @@ def test_check_datasets_active(self): [79], ) openml.config.server = self.test_server + openml.config._sync_api_config() @pytest.mark.uses_test_server() def test_illegal_character_tag(self): @@ -186,6 +187,7 @@ def test__name_to_id_with_deactivated(self): # /d/1 was deactivated assert openml.datasets.functions._name_to_id("anneal") == 2 openml.config.server = self.test_server + openml.config._sync_api_config() @pytest.mark.production() def test__name_to_id_with_multiple_active(self): @@ -438,6 +440,7 @@ def test__getarff_md5_issue(self): } n = openml.config.connection_n_retries openml.config.connection_n_retries = 1 + openml.config._sync_api_config() self.assertRaisesRegex( OpenMLHashException, @@ -448,6 +451,7 @@ def test__getarff_md5_issue(self): ) openml.config.connection_n_retries = n + openml.config._sync_api_config() @pytest.mark.uses_test_server() def test__get_dataset_features(self): @@ -617,6 +621,7 @@ def test_data_status(self): # admin key for test server (only admins can activate datasets. # all users can deactivate their own datasets) openml.config.apikey = TestBase.admin_key + openml.config._sync_api_config() openml.datasets.status_update(did, "active") self._assert_status_of_dataset(did=did, status="active") @@ -1555,6 +1560,7 @@ def test_list_datasets_with_high_size_parameter(self): # Reverting to test server openml.config.server = self.test_server + openml.config._sync_api_config() assert len(datasets_a) == len(datasets_b) From 9195fa6ea6de253141fe68e922fd414c85b1d806 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:51:44 +0500 Subject: [PATCH 53/86] rename config: timeout -> timeout_seconds --- openml/_api/clients/http.py | 6 +++--- openml/_api/setup/builder.py | 4 ++-- openml/_api/setup/config.py | 4 ++-- openml/testing.py | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 353cd5e9e..2c1e52d19 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -116,7 +116,7 @@ def __init__( # noqa: PLR0913 server: str, base_url: str, api_key: str, - timeout: int, + timeout_seconds: int, retries: int, retry_policy: RetryPolicy, cache: HTTPCache | None = None, @@ -124,7 +124,7 @@ def __init__( # noqa: PLR0913 self.server = server self.base_url = base_url self.api_key = api_key - self.timeout = timeout + self.timeout_seconds = timeout_seconds self.retries = retries self.retry_policy = retry_policy self.cache = cache @@ -343,7 +343,7 @@ def request( headers = request_kwargs.pop("headers", {}).copy() headers.update(self.headers) - timeout = request_kwargs.pop("timeout", self.timeout) + timeout = request_kwargs.pop("timeout", self.timeout_seconds) files = request_kwargs.pop("files", None) if use_cache and not reset_cache and self.cache is not None: diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 750db431a..d411189ee 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -33,7 +33,7 @@ def build(cls, config: Config) -> APIBackendBuilder: server=primary_api_config.server, base_url=primary_api_config.base_url, api_key=primary_api_config.api_key, - timeout=config.connection.timeout, + timeout_seconds=config.connection.timeout_seconds, retries=config.connection.retries, retry_policy=config.connection.retry_policy, cache=http_cache, @@ -51,7 +51,7 @@ def build(cls, config: Config) -> APIBackendBuilder: server=fallback_api_config.server, base_url=fallback_api_config.base_url, api_key=fallback_api_config.api_key, - timeout=config.connection.timeout, + timeout_seconds=config.connection.timeout_seconds, retries=config.connection.retries, retry_policy=config.connection.retry_policy, cache=http_cache, diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index ea868262a..8e8fc1f5d 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -18,7 +18,7 @@ class APIConfig: class ConnectionConfig: retries: int retry_policy: RetryPolicy - timeout: int + timeout_seconds: int @dataclass @@ -51,7 +51,7 @@ class Config: default_factory=lambda: ConnectionConfig( retries=5, retry_policy=RetryPolicy.HUMAN, - timeout=10, + timeout_seconds=10, ) ) diff --git a/openml/testing.py b/openml/testing.py index a3d137916..2087283d3 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -286,7 +286,7 @@ class TestAPIBase(unittest.TestCase): server: str base_url: str api_key: str - timeout: int + timeout_seconds: int retries: int retry_policy: RetryPolicy dir: str @@ -298,7 +298,7 @@ def setUp(self) -> None: self.server = "https://test.openml.org/" self.base_url = "api/v1/xml" self.api_key = "normaluser" - self.timeout = 10 + self.timeout_seconds = 10 self.retries = 3 self.retry_policy = RetryPolicy.HUMAN self.dir = "test_cache" @@ -312,7 +312,7 @@ def setUp(self) -> None: server=self.server, base_url=self.base_url, api_key=self.api_key, - timeout=self.timeout, + timeout_seconds=self.timeout_seconds, retries=self.retries, retry_policy=self.retry_policy, cache=self.cache, @@ -340,7 +340,7 @@ def _get_http_client( # noqa: PLR0913 server: str, base_url: str, api_key: str, - timeout: int, + timeout_seconds: int, retries: int, retry_policy: RetryPolicy, cache: HTTPCache | None = None, @@ -349,7 +349,7 @@ def _get_http_client( # noqa: PLR0913 server=server, base_url=base_url, api_key=api_key, - timeout=timeout, + timeout_seconds=timeout_seconds, retries=retries, retry_policy=retry_policy, cache=cache, From 5342eec3716e1c50ee020156702bb658d7e37cba Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 12:57:07 +0500 Subject: [PATCH 54/86] use timedelta for default ttl value --- openml/_api/setup/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 8e8fc1f5d..9b87ffbaf 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from datetime import timedelta from openml.enums import APIVersion, RetryPolicy @@ -58,6 +59,6 @@ class Config: cache: CacheConfig = field( default_factory=lambda: CacheConfig( dir=str(_resolve_default_cache_dir()), - ttl=60 * 60 * 24 * 7, + ttl=int(timedelta(weeks=1).total_seconds()), ) ) From adc0e7498469154d32fa5a16f637b5792964dd49 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 13:35:04 +0500 Subject: [PATCH 55/86] update tests, adds v2/fallback --- tests/test_api/test_versions.py | 56 ++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 6a4cad97d..4906cf9f4 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,8 +1,9 @@ from time import time import pytest from openml.testing import TestAPIBase -from openml._api import ResourceV1API +from openml._api import ResourceV1API, ResourceV2API, FallbackProxy from openml.enums import ResourceType +from openml.exceptions import OpenMLNotSupportedError class TestResourceV1API(TestAPIBase): @@ -51,3 +52,56 @@ def test_tag_and_untag(self): tags = self.resource.untag(resource_id, tag) self.assertNotIn(tag, tags) + + +class TestResourceV2API(TestResourceV1API): + def setUp(self): + super().setUp() + + self.server = "" + self.base_url = "" + self.api_key = "" + self.http_client = self._get_http_client( + server=self.server, + base_url=self.base_url, + api_key=self.api_key, + timeout_seconds=self.timeout_seconds, + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ) + + self.resource = ResourceV2API(self.http_client) + self.resource.resource_type = ResourceType.TASK + + @pytest.mark.xfail(raises=OpenMLNotSupportedError) + def test_publish_and_delete(self): + super().test_tag_and_untag() + + + @pytest.mark.xfail(raises=OpenMLNotSupportedError) + def test_tag_and_untag(self): + super().test_tag_and_untag() + + +class TestResourceFallbackAPI(TestResourceV1API): + def setUp(self): + super().setUp() + + self.http_client_v2 = self._get_http_client( + server="", + base_url="", + api_key="", + timeout_seconds=self.timeout_seconds, + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ) + + resource_v1 = ResourceV1API(self.http_client) + resource_v1.resource_type = ResourceType.TASK + + resource_v2 = ResourceV2API(self.http_client_v2) + resource_v2.resource_type = ResourceType.TASK + + self.resource = FallbackProxy(resource_v2, resource_v1) From bfb2d3e18a83982391f6653ec12fd710bbb92412 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 13:39:42 +0500 Subject: [PATCH 56/86] add MinIOClient in TestBase --- openml/testing.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/openml/testing.py b/openml/testing.py index 2087283d3..5f0697f87 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -16,7 +16,7 @@ import requests import openml -from openml._api import HTTPCache, HTTPClient +from openml._api import HTTPCache, HTTPClient, MinIOClient from openml.enums import RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -317,6 +317,7 @@ def setUp(self) -> None: retry_policy=self.retry_policy, cache=self.cache, ) + self.minio_client = self._get_minio_client(path=Path(self.dir)) if self.cache.path.exists(): shutil.rmtree(self.cache.path) @@ -355,6 +356,12 @@ def _get_http_client( # noqa: PLR0913 cache=cache, ) + def _get_minio_client( + self, + path: Path | None = None, + ) -> MinIOClient: + return MinIOClient(path=path) + def _get_url( self, server: str | None = None, From cabaecf27704d0797bcb8d4c855c6e5280b03945 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 3 Feb 2026 18:43:37 +0500 Subject: [PATCH 57/86] fix linting for builder --- openml/_api/setup/backend.py | 56 +++++++++++++++++++++++++++++++----- openml/_api/setup/builder.py | 14 ++++++--- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index 4dd0f4390..c29d1dbad 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -1,11 +1,24 @@ from __future__ import annotations from copy import deepcopy -from typing import Any +from typing import TYPE_CHECKING, Any, cast from .builder import APIBackendBuilder from .config import Config +if TYPE_CHECKING: + from openml._api.resources import ( + DatasetAPI, + EstimationProcedureAPI, + EvaluationAPI, + EvaluationMeasureAPI, + FlowAPI, + RunAPI, + SetupAPI, + StudyAPI, + TaskAPI, + ) + class APIBackend: _instance: APIBackend | None = None @@ -14,12 +27,41 @@ def __init__(self, config: Config | None = None): self._config: Config = config or Config() self._backend = APIBackendBuilder.build(self._config) - def __getattr__(self, name: str) -> Any: - """ - Delegate attribute access to the underlying backend. - Called only if attribute is not found on RuntimeBackend. - """ - return getattr(self._backend, name) + @property + def dataset(self) -> DatasetAPI: + return cast("DatasetAPI", self._backend.dataset) + + @property + def task(self) -> TaskAPI: + return cast("TaskAPI", self._backend.task) + + @property + def evaluation_measure(self) -> EvaluationMeasureAPI: + return cast("EvaluationMeasureAPI", self._backend.evaluation_measure) + + @property + def estimation_procedure(self) -> EstimationProcedureAPI: + return cast("EstimationProcedureAPI", self._backend.estimation_procedure) + + @property + def evaluation(self) -> EvaluationAPI: + return cast("EvaluationAPI", self._backend.evaluation) + + @property + def flow(self) -> FlowAPI: + return cast("FlowAPI", self._backend.flow) + + @property + def study(self) -> StudyAPI: + return cast("StudyAPI", self._backend.study) + + @property + def run(self) -> RunAPI: + return cast("RunAPI", self._backend.run) + + @property + def setup(self) -> SetupAPI: + return cast("SetupAPI", self._backend.setup) @classmethod def get_instance(cls) -> APIBackend: diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index d411189ee..5518a2a13 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -6,10 +6,9 @@ from openml._api.clients import HTTPCache, HTTPClient, MinIOClient from openml._api.resources import API_REGISTRY, FallbackProxy, ResourceAPI +from openml.enums import ResourceType if TYPE_CHECKING: - from openml.enums import ResourceType - from .config import Config @@ -18,8 +17,15 @@ def __init__( self, resource_apis: Mapping[ResourceType, ResourceAPI | FallbackProxy], ): - for resource_type, resource_api in resource_apis.items(): - setattr(self, resource_type.value, resource_api) + self.dataset = resource_apis[ResourceType.DATASET] + self.task = resource_apis[ResourceType.TASK] + self.evaluation_measure = resource_apis[ResourceType.EVALUATION_MEASURE] + self.estimation_procedure = resource_apis[ResourceType.ESTIMATION_PROCEDURE] + self.evaluation = resource_apis[ResourceType.EVALUATION] + self.flow = resource_apis[ResourceType.FLOW] + self.study = resource_apis[ResourceType.STUDY] + self.run = resource_apis[ResourceType.RUN] + self.setup = resource_apis[ResourceType.SETUP] @classmethod def build(cls, config: Config) -> APIBackendBuilder: From 85c11139928fc3de67e2c8e1527a77db07d95887 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 4 Feb 2026 13:57:00 +0500 Subject: [PATCH 58/86] fix unbound variables: "code", "message" source: https://github.com/openml/openml-python/pull/1606#issuecomment-3844025047 --- openml/_api/clients/http.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 2c1e52d19..323da8793 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -238,6 +238,8 @@ def _validate_response( raise OpenMLServerError(f"URI too long! ({url})") retry_raise_e: Exception | None = None + code: int | None = None + message: str = "" try: code, message = self._parse_exception_response(response) From 39bf86a3a62bff24ffc41f10feef93eb62687b8a Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 4 Feb 2026 14:19:02 +0500 Subject: [PATCH 59/86] use requests.Session() --- openml/_api/clients/http.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 323da8793..98b19a937 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -278,6 +278,7 @@ def _validate_response( def _request( # noqa: PLR0913 self, + session: requests.Session, method: str, url: str, params: Mapping[str, Any], @@ -291,7 +292,7 @@ def _request( # noqa: PLR0913 response: Response | None = None try: - response = requests.request( + response = session.request( method=method, url=url, params=params, @@ -357,8 +358,10 @@ def request( except Exception: raise # propagate unexpected cache errors + session = requests.Session() for retry_counter in range(1, retries + 1): response, retry_raise_e = self._request( + session=session, method=method, url=url, params=params, @@ -379,6 +382,8 @@ def request( delay = self.retry_func(retry_counter) time.sleep(delay) + session.close() + assert response is not None if use_cache and self.cache is not None: From 7b66677988e73a5b67a599d8a64aac97f1dee2d8 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 4 Feb 2026 14:20:44 +0500 Subject: [PATCH 60/86] remove "timeout_seconds" entirely - removing this since it was not part of the sdk previously - some tests fail because of the timeout in stacked PRs - this option can easily be added if needed in future --- openml/_api/clients/http.py | 6 ------ openml/_api/setup/builder.py | 2 -- openml/_api/setup/config.py | 2 -- openml/testing.py | 5 ----- tests/test_api/test_versions.py | 2 -- 5 files changed, 17 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 98b19a937..db782cca7 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -116,7 +116,6 @@ def __init__( # noqa: PLR0913 server: str, base_url: str, api_key: str, - timeout_seconds: int, retries: int, retry_policy: RetryPolicy, cache: HTTPCache | None = None, @@ -124,7 +123,6 @@ def __init__( # noqa: PLR0913 self.server = server self.base_url = base_url self.api_key = api_key - self.timeout_seconds = timeout_seconds self.retries = retries self.retry_policy = retry_policy self.cache = cache @@ -284,7 +282,6 @@ def _request( # noqa: PLR0913 params: Mapping[str, Any], data: Mapping[str, Any], headers: Mapping[str, str], - timeout: float | int, files: Mapping[str, Any] | None, **request_kwargs: Any, ) -> tuple[Response | None, Exception | None]: @@ -298,7 +295,6 @@ def _request( # noqa: PLR0913 params=params, data=data, headers=headers, - timeout=timeout, files=files, **request_kwargs, ) @@ -346,7 +342,6 @@ def request( headers = request_kwargs.pop("headers", {}).copy() headers.update(self.headers) - timeout = request_kwargs.pop("timeout", self.timeout_seconds) files = request_kwargs.pop("files", None) if use_cache and not reset_cache and self.cache is not None: @@ -367,7 +362,6 @@ def request( params=params, data=data, headers=headers, - timeout=timeout, files=files, **request_kwargs, ) diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 5518a2a13..f801fe525 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -39,7 +39,6 @@ def build(cls, config: Config) -> APIBackendBuilder: server=primary_api_config.server, base_url=primary_api_config.base_url, api_key=primary_api_config.api_key, - timeout_seconds=config.connection.timeout_seconds, retries=config.connection.retries, retry_policy=config.connection.retry_policy, cache=http_cache, @@ -57,7 +56,6 @@ def build(cls, config: Config) -> APIBackendBuilder: server=fallback_api_config.server, base_url=fallback_api_config.base_url, api_key=fallback_api_config.api_key, - timeout_seconds=config.connection.timeout_seconds, retries=config.connection.retries, retry_policy=config.connection.retry_policy, cache=http_cache, diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 9b87ffbaf..4108227aa 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -19,7 +19,6 @@ class APIConfig: class ConnectionConfig: retries: int retry_policy: RetryPolicy - timeout_seconds: int @dataclass @@ -52,7 +51,6 @@ class Config: default_factory=lambda: ConnectionConfig( retries=5, retry_policy=RetryPolicy.HUMAN, - timeout_seconds=10, ) ) diff --git a/openml/testing.py b/openml/testing.py index 5f0697f87..d254b7bcb 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -286,7 +286,6 @@ class TestAPIBase(unittest.TestCase): server: str base_url: str api_key: str - timeout_seconds: int retries: int retry_policy: RetryPolicy dir: str @@ -298,7 +297,6 @@ def setUp(self) -> None: self.server = "https://test.openml.org/" self.base_url = "api/v1/xml" self.api_key = "normaluser" - self.timeout_seconds = 10 self.retries = 3 self.retry_policy = RetryPolicy.HUMAN self.dir = "test_cache" @@ -312,7 +310,6 @@ def setUp(self) -> None: server=self.server, base_url=self.base_url, api_key=self.api_key, - timeout_seconds=self.timeout_seconds, retries=self.retries, retry_policy=self.retry_policy, cache=self.cache, @@ -341,7 +338,6 @@ def _get_http_client( # noqa: PLR0913 server: str, base_url: str, api_key: str, - timeout_seconds: int, retries: int, retry_policy: RetryPolicy, cache: HTTPCache | None = None, @@ -350,7 +346,6 @@ def _get_http_client( # noqa: PLR0913 server=server, base_url=base_url, api_key=api_key, - timeout_seconds=timeout_seconds, retries=retries, retry_policy=retry_policy, cache=cache, diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 4906cf9f4..9f9e61ba6 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -65,7 +65,6 @@ def setUp(self): server=self.server, base_url=self.base_url, api_key=self.api_key, - timeout_seconds=self.timeout_seconds, retries=self.retries, retry_policy=self.retry_policy, cache=self.cache, @@ -92,7 +91,6 @@ def setUp(self): server="", base_url="", api_key="", - timeout_seconds=self.timeout_seconds, retries=self.retries, retry_policy=self.retry_policy, cache=self.cache, From d2224c462b7bc46b129dfab5b7887f700c1fda69 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 4 Feb 2026 22:42:25 +0500 Subject: [PATCH 61/86] update/refactor tests --- openml/testing.py | 114 +++++++++++--------------------- tests/test_api/test_http.py | 20 ++++-- tests/test_api/test_versions.py | 103 ++++++++++++----------------- 3 files changed, 97 insertions(+), 140 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index d254b7bcb..d73e15a2d 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -11,13 +11,12 @@ import unittest from pathlib import Path from typing import ClassVar -from urllib.parse import urljoin import requests import openml from openml._api import HTTPCache, HTTPClient, MinIOClient -from openml.enums import RetryPolicy +from openml.enums import APIVersion, RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -283,90 +282,53 @@ def _check_fold_timing_evaluations( # noqa: PLR0913 class TestAPIBase(unittest.TestCase): - server: str - base_url: str - api_key: str retries: int retry_policy: RetryPolicy - dir: str ttl: int + cache_dir: Path cache: HTTPCache - http_client: HTTPClient + http_clients: dict[APIVersion, HTTPClient] + minio_client: MinIOClient + current_api_version: APIVersion | None def setUp(self) -> None: - self.server = "https://test.openml.org/" - self.base_url = "api/v1/xml" - self.api_key = "normaluser" - self.retries = 3 - self.retry_policy = RetryPolicy.HUMAN - self.dir = "test_cache" - self.ttl = 60 * 60 * 24 * 7 - - self.cache = self._get_http_cache( - path=Path(self.dir), - ttl=self.ttl, - ) - self.http_client = self._get_http_client( - server=self.server, - base_url=self.base_url, - api_key=self.api_key, - retries=self.retries, - retry_policy=self.retry_policy, - cache=self.cache, - ) - self.minio_client = self._get_minio_client(path=Path(self.dir)) + config = openml._backend.get_config() - if self.cache.path.exists(): - shutil.rmtree(self.cache.path) - - def tearDown(self) -> None: - if self.cache.path.exists(): - shutil.rmtree(self.cache.path) + self.retries = config.connection.retries + self.retry_policy = config.connection.retry_policy + self.ttl = config.cache.ttl + self.current_api_version = None - def _get_http_cache( - self, - path: Path, - ttl: int, - ) -> HTTPCache: - return HTTPCache( - path=path, - ttl=ttl, - ) + abspath_this_file = Path(inspect.getfile(self.__class__)).absolute() + self.cache_dir = abspath_this_file.parent.parent / "files" + if not self.cache_dir.is_dir(): + raise ValueError( + f"Cannot find test cache dir, expected it to be {self.cache_dir}!", + ) - def _get_http_client( # noqa: PLR0913 - self, - server: str, - base_url: str, - api_key: str, - retries: int, - retry_policy: RetryPolicy, - cache: HTTPCache | None = None, - ) -> HTTPClient: - return HTTPClient( - server=server, - base_url=base_url, - api_key=api_key, - retries=retries, - retry_policy=retry_policy, - cache=cache, + self.cache = HTTPCache( + path=self.cache_dir, + ttl=self.ttl, ) - - def _get_minio_client( - self, - path: Path | None = None, - ) -> MinIOClient: - return MinIOClient(path=path) - - def _get_url( - self, - server: str | None = None, - base_url: str | None = None, - path: str | None = None, - ) -> str: - server = server if server else self.server - base_url = base_url if base_url else self.base_url - path = path if path else "" - return urljoin(self.server, urljoin(self.base_url, path)) + self.http_clients = { + APIVersion.V1: HTTPClient( + server="https://test.openml.org/", + base_url="api/v1/xml/", + api_key="normaluser", + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ), + APIVersion.V2: HTTPClient( + server="http://localhost:8002/", + base_url="", + api_key="", + retries=self.retries, + retry_policy=self.retry_policy, + cache=self.cache, + ), + } + self.minio_client = MinIOClient(path=self.cache_dir) def check_task_existence( diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index efaeaeeef..3c35ea5e1 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -4,11 +4,22 @@ import pytest from openml.testing import TestAPIBase import os +from urllib.parse import urljoin +from openml.enums import APIVersion class TestHTTPClient(TestAPIBase): + def setUp(self): + super().setUp() + self.http_client = self.http_clients[APIVersion.V1] + + def _prepare_url(self, path: str | None = None) -> str: + server = self.http_client.server + base_url = self.http_client.base_url + return urljoin(server, urljoin(base_url, path)) + def test_cache(self): - url = self._get_url(path="task/31") + url = self._prepare_url(path="task/31") params = {"param1": "value1", "param2": "value2"} key = self.cache.get_key(url, params) @@ -18,6 +29,7 @@ def test_cache(self): "test", "api", "v1", + "xml", "task", "31", "param1=value1¶m2=value2", @@ -68,7 +80,7 @@ def test_get_with_cache_creates_cache(self): # verify cache directory structure exists cache_key = self.cache.get_key( - self._get_url(path="task/1"), + self._prepare_url(path="task/1"), {}, ) cache_path = self.cache._key_to_path(cache_key) @@ -94,7 +106,7 @@ def test_get_cache_expires(self): self.cache.ttl = 1 path = "task/1" - url = self._get_url(path=path) + url = self._prepare_url(path=path) key = self.cache.get_key(url, {}) cache_path = self.cache._key_to_path(key) / "meta.json" @@ -115,7 +127,7 @@ def test_get_cache_expires(self): def test_get_reset_cache(self): path = "task/1" - url = self._get_url(path=path) + url = self._prepare_url(path=path) key = self.cache.get_key(url, {}) cache_path = self.cache._key_to_path(key) / "meta.json" diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 9f9e61ba6..5fa9d624d 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -2,18 +2,13 @@ import pytest from openml.testing import TestAPIBase from openml._api import ResourceV1API, ResourceV2API, FallbackProxy -from openml.enums import ResourceType +from openml.enums import ResourceType, APIVersion from openml.exceptions import OpenMLNotSupportedError -class TestResourceV1API(TestAPIBase): - def setUp(self): - super().setUp() - self.resource = ResourceV1API(self.http_client) - self.resource.resource_type = ResourceType.TASK - - @pytest.mark.uses_test_server() - def test_publish_and_delete(self): +@pytest.mark.uses_test_server() +class TestResourceAPIBase(TestAPIBase): + def _publish_and_delete(self): task_xml = """ 5 @@ -22,30 +17,19 @@ def test_publish_and_delete(self): """ - task_id = None - try: - # Publish the task - task_id = self.resource.publish( - "task", - files={"description": task_xml}, - ) - - # Get the task to verify it exists - get_response = self.http_client.get(f"task/{task_id}") - self.assertEqual(get_response.status_code, 200) - - finally: - # delete the task if it was created - if task_id is not None: - success = self.resource.delete(task_id) - self.assertTrue(success) + task_id = self.resource.publish( + "task", + files={"description": task_xml}, + ) + self.assertIsNotNone(task_id) + success = self.resource.delete(task_id) + self.assertTrue(success) - @pytest.mark.uses_test_server() - def test_tag_and_untag(self): + def _tag_and_untag(self): resource_id = 1 unique_indicator = str(time()).replace(".", "") - tag = f"TestResourceV1API_test_tag_and_untag_{unique_indicator}" + tag = f"{self.__class__.__name__}_test_tag_and_untag_{unique_indicator}" tags = self.resource.tag(resource_id, tag) self.assertIn(tag, tags) @@ -54,52 +38,51 @@ def test_tag_and_untag(self): self.assertNotIn(tag, tags) -class TestResourceV2API(TestResourceV1API): +class TestResourceV1API(TestResourceAPIBase): def setUp(self): super().setUp() - - self.server = "" - self.base_url = "" - self.api_key = "" - self.http_client = self._get_http_client( - server=self.server, - base_url=self.base_url, - api_key=self.api_key, - retries=self.retries, - retry_policy=self.retry_policy, - cache=self.cache, - ) - - self.resource = ResourceV2API(self.http_client) + http_client = self.http_clients[APIVersion.V1] + self.resource = ResourceV1API(http_client) self.resource.resource_type = ResourceType.TASK - @pytest.mark.xfail(raises=OpenMLNotSupportedError) def test_publish_and_delete(self): - super().test_tag_and_untag() - + self._publish_and_delete() - @pytest.mark.xfail(raises=OpenMLNotSupportedError) def test_tag_and_untag(self): - super().test_tag_and_untag() + self._tag_and_untag() -class TestResourceFallbackAPI(TestResourceV1API): +class TestResourceV2API(TestResourceAPIBase): def setUp(self): super().setUp() + http_client = self.http_clients[APIVersion.V2] + self.resource = ResourceV2API(http_client) + self.resource.resource_type = ResourceType.TASK + + def test_publish_and_delete(self): + with pytest.raises(OpenMLNotSupportedError): + self._tag_and_untag() + + def test_tag_and_untag(self): + with pytest.raises(OpenMLNotSupportedError): + self._tag_and_untag() - self.http_client_v2 = self._get_http_client( - server="", - base_url="", - api_key="", - retries=self.retries, - retry_policy=self.retry_policy, - cache=self.cache, - ) - resource_v1 = ResourceV1API(self.http_client) +class TestResourceFallbackAPI(TestResourceAPIBase): + def setUp(self): + super().setUp() + http_client_v1 = self.http_clients[APIVersion.V1] + resource_v1 = ResourceV1API(http_client_v1) resource_v1.resource_type = ResourceType.TASK - resource_v2 = ResourceV2API(self.http_client_v2) + http_client_v2 = self.http_clients[APIVersion.V2] + resource_v2 = ResourceV2API(http_client_v2) resource_v2.resource_type = ResourceType.TASK self.resource = FallbackProxy(resource_v2, resource_v1) + + def test_publish_and_delete(self): + self._publish_and_delete() + + def test_tag_and_untag(self): + self._tag_and_untag() From 9608c3652cfc74642c8bb71253af8dc31765d0a8 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 5 Feb 2026 15:27:51 +0500 Subject: [PATCH 62/86] remove unused current_api_version from TestAPIBase --- openml/testing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index d73e15a2d..63a93a0b8 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -289,7 +289,6 @@ class TestAPIBase(unittest.TestCase): cache: HTTPCache http_clients: dict[APIVersion, HTTPClient] minio_client: MinIOClient - current_api_version: APIVersion | None def setUp(self) -> None: config = openml._backend.get_config() @@ -297,7 +296,6 @@ def setUp(self) -> None: self.retries = config.connection.retries self.retry_policy = config.connection.retry_policy self.ttl = config.cache.ttl - self.current_api_version = None abspath_this_file = Path(inspect.getfile(self.__class__)).absolute() self.cache_dir = abspath_this_file.parent.parent / "files" From f6bc7f70707e422f727e38b9da7aaba4d4b6c322 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 5 Feb 2026 15:39:12 +0500 Subject: [PATCH 63/86] make TestAPIBase inherit TestBase --- openml/testing.py | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index 63a93a0b8..5a1a4d10f 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -281,52 +281,42 @@ def _check_fold_timing_evaluations( # noqa: PLR0913 assert evaluation <= max_val -class TestAPIBase(unittest.TestCase): - retries: int - retry_policy: RetryPolicy - ttl: int - cache_dir: Path +class TestAPIBase(TestBase): cache: HTTPCache http_clients: dict[APIVersion, HTTPClient] minio_client: MinIOClient - def setUp(self) -> None: - config = openml._backend.get_config() - - self.retries = config.connection.retries - self.retry_policy = config.connection.retry_policy - self.ttl = config.cache.ttl + def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: + super().setUp(n_levels=n_levels, tmpdir_suffix=tmpdir_suffix) - abspath_this_file = Path(inspect.getfile(self.__class__)).absolute() - self.cache_dir = abspath_this_file.parent.parent / "files" - if not self.cache_dir.is_dir(): - raise ValueError( - f"Cannot find test cache dir, expected it to be {self.cache_dir}!", - ) + retries = self.connection_n_retries + retry_policy = RetryPolicy.HUMAN if self.retry_policy == "human" else RetryPolicy.ROBOT + ttl = openml._backend.get_config_value("cache.ttl") + cache_dir = self.static_cache_dir self.cache = HTTPCache( - path=self.cache_dir, - ttl=self.ttl, + path=cache_dir, + ttl=ttl, ) self.http_clients = { APIVersion.V1: HTTPClient( server="https://test.openml.org/", base_url="api/v1/xml/", api_key="normaluser", - retries=self.retries, - retry_policy=self.retry_policy, + retries=retries, + retry_policy=retry_policy, cache=self.cache, ), APIVersion.V2: HTTPClient( server="http://localhost:8002/", base_url="", api_key="", - retries=self.retries, - retry_policy=self.retry_policy, + retries=retries, + retry_policy=retry_policy, cache=self.cache, ), } - self.minio_client = MinIOClient(path=self.cache_dir) + self.minio_client = MinIOClient(path=cache_dir) def check_task_existence( From baa3a38bedd4b888964a8e46d867ceb03e70942b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 5 Feb 2026 15:43:40 +0500 Subject: [PATCH 64/86] nits: test classes --- tests/test_api/test_http.py | 3 +++ tests/test_api/test_versions.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 3c35ea5e1..ab9bd7412 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -6,9 +6,12 @@ import os from urllib.parse import urljoin from openml.enums import APIVersion +from openml._api import HTTPClient class TestHTTPClient(TestAPIBase): + http_client: HTTPClient + def setUp(self): super().setUp() self.http_client = self.http_clients[APIVersion.V1] diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 5fa9d624d..1313889bc 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,13 +1,15 @@ from time import time import pytest from openml.testing import TestAPIBase -from openml._api import ResourceV1API, ResourceV2API, FallbackProxy +from openml._api import ResourceV1API, ResourceV2API, FallbackProxy, ResourceAPI from openml.enums import ResourceType, APIVersion from openml.exceptions import OpenMLNotSupportedError @pytest.mark.uses_test_server() class TestResourceAPIBase(TestAPIBase): + resource: ResourceAPI | FallbackProxy + def _publish_and_delete(self): task_xml = """ From 52b93feab0512c182299337292a79e00a1f6317e Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sat, 7 Feb 2026 00:03:53 +0500 Subject: [PATCH 65/86] minor fix in _sync_api_config identified while debugging https://github.com/openml/openml-python/pull/1616#issuecomment-3858997021 --- openml/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/config.py b/openml/config.py index c266ae9d9..692543a00 100644 --- a/openml/config.py +++ b/openml/config.py @@ -534,7 +534,7 @@ def _sync_api_config() -> None: p = urlparse(server) v1_server = f"{p.scheme}://{p.netloc}/" - v1_base_url = p.path.lstrip("/") + v1_base_url = p.path.rstrip("/") + "/" # requirement for urllib.parse.urljoin connection_retry_policy = RetryPolicy.HUMAN if retry_policy == "human" else RetryPolicy.ROBOT cache_dir = str(_root_cache_directory) From ec9477ffbe282c8177cb56e469fce71da7040126 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Sat, 7 Feb 2026 00:14:14 +0500 Subject: [PATCH 66/86] chore: rerun CI From 10d134ab5915cc6b777857659e1647e26b22f2d3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 10 Feb 2026 22:02:52 +0500 Subject: [PATCH 67/86] remove duplicates in _api/resources/__init__.py --- openml/_api/resources/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index 1f0b2caa1..6d957966e 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -42,12 +42,10 @@ "EvaluationV1API", "EvaluationV2API", "FallbackProxy", - "FallbackProxy", "FlowAPI", "FlowV1API", "FlowV2API", "ResourceAPI", - "ResourceAPI", "ResourceV1API", "ResourceV2API", "RunAPI", From 935f0f431e8814a4b789d93ebdca04651dc030a3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 10 Feb 2026 22:21:11 +0500 Subject: [PATCH 68/86] implement HTTPClient.download and add tests --- openml/_api/clients/http.py | 56 +++++++++++++++++++++++++++++-- openml/exceptions.py | 4 +++ tests/test_api/test_http.py | 66 +++++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 3 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index db782cca7..2c15515f3 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -1,12 +1,13 @@ from __future__ import annotations +import hashlib import json import logging import math import random import time import xml -from collections.abc import Mapping +from collections.abc import Callable, Mapping from pathlib import Path from typing import Any from urllib.parse import urlencode, urljoin, urlparse @@ -18,6 +19,8 @@ from openml.__version__ import __version__ from openml.enums import RetryPolicy from openml.exceptions import ( + OpenMLCacheRequiredError, + OpenMLHashException, OpenMLNotAuthorizedError, OpenMLServerError, OpenMLServerException, @@ -315,7 +318,7 @@ def _request( # noqa: PLR0913 return response, retry_raise_e - def request( + def request( # noqa: PLR0913, C901 self, method: str, path: str, @@ -323,6 +326,7 @@ def request( use_cache: bool = False, reset_cache: bool = False, use_api_key: bool = False, + md5_checksum: str | None = None, **request_kwargs: Any, ) -> Response: url = urljoin(self.server, urljoin(self.base_url, path)) @@ -384,8 +388,20 @@ def request( cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) + if md5_checksum is not None: + self._verify_checksum(response, md5_checksum) + return response + def _verify_checksum(self, response: Response, md5_checksum: str) -> None: + # ruff sees hashlib.md5 as insecure + actual = hashlib.md5(response.content).hexdigest() # noqa: S324 + if actual != md5_checksum: + raise OpenMLHashException( + f"Checksum of downloaded file is unequal to the expected checksum {md5_checksum} " + f"when downloading {response.url}.", + ) + def get( self, path: str, @@ -393,6 +409,7 @@ def get( use_cache: bool = False, reset_cache: bool = False, use_api_key: bool = False, + md5_checksum: str | None = None, **request_kwargs: Any, ) -> Response: return self.request( @@ -401,19 +418,22 @@ def get( use_cache=use_cache, reset_cache=reset_cache, use_api_key=use_api_key, + md5_checksum=md5_checksum, **request_kwargs, ) def post( self, path: str, + *, + use_api_key: bool = True, **request_kwargs: Any, ) -> Response: return self.request( method="POST", path=path, use_cache=False, - use_api_key=True, + use_api_key=use_api_key, **request_kwargs, ) @@ -429,3 +449,33 @@ def delete( use_api_key=True, **request_kwargs, ) + + def download( + self, + url: str, + handler: Callable[[Response, Path, str], Path] | None = None, + encoding: str = "utf-8", + file_name: str = "response.txt", + md5_checksum: str | None = None, + ) -> Path: + if self.cache is None: + raise OpenMLCacheRequiredError( + "A cache object is required for download, but none was provided in the HTTPClient." + ) + base = self.cache.path + file_path = base / "downloads" / urlparse(url).path.lstrip("/") / file_name + file_path = file_path.expanduser() + file_path.parent.mkdir(parents=True, exist_ok=True) + if file_path.exists(): + return file_path + + response = self.get(url, md5_checksum=md5_checksum) + if handler is not None: + return handler(response, file_path, encoding) + + return self._text_handler(response, file_path, encoding) + + def _text_handler(self, response: Response, path: Path, encoding: str) -> Path: + with path.open("w", encoding=encoding) as f: + f.write(response.text) + return path diff --git a/openml/exceptions.py b/openml/exceptions.py index 26c2d2591..10f693648 100644 --- a/openml/exceptions.py +++ b/openml/exceptions.py @@ -69,3 +69,7 @@ class ObjectNotPublishedError(PyOpenMLError): class OpenMLNotSupportedError(PyOpenMLError): """Raised when an API operation is not supported for a resource/version.""" + + +class OpenMLCacheRequiredError(PyOpenMLError): + """Raised when a cache object is required but not provided.""" diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index ab9bd7412..8dc6303d1 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -4,9 +4,11 @@ import pytest from openml.testing import TestAPIBase import os +from pathlib import Path from urllib.parse import urljoin from openml.enums import APIVersion from openml._api import HTTPClient +from openml.exceptions import OpenMLCacheRequiredError class TestHTTPClient(TestAPIBase): @@ -174,3 +176,67 @@ def test_post_and_delete(self): if task_id is not None: del_response = self.http_client.delete(f"task/{task_id}") self.assertEqual(del_response.status_code, 200) + + def test_download_requires_cache(self): + client = HTTPClient( + server=self.http_client.server, + base_url=self.http_client.base_url, + api_key=self.http_client.api_key, + retries=1, + retry_policy=self.http_client.retry_policy, + cache=None, + ) + + with pytest.raises(OpenMLCacheRequiredError): + client.download("https://www.openml.org") + + @pytest.mark.uses_test_server() + def test_download_creates_file(self): + # small stable resource + url = self.http_client.server + + path = self.http_client.download( + url, + file_name="index.html", + ) + + assert path.exists() + assert path.is_file() + assert path.read_text(encoding="utf-8") + + @pytest.mark.uses_test_server() + def test_download_is_cached_on_disk(self): + url = self.http_client.server + + path1 = self.http_client.download( + url, + file_name="cached.html", + ) + mtime1 = path1.stat().st_mtime + + # second call should NOT re-download + path2 = self.http_client.download( + url, + file_name="cached.html", + ) + mtime2 = path2.stat().st_mtime + + assert path1 == path2 + assert mtime1 == mtime2 + + @pytest.mark.uses_test_server() + def test_download_respects_custom_handler(self): + url = self.http_client.server + + def handler(response, path: Path, encoding: str): + path.write_text("HANDLED", encoding=encoding) + return path + + path = self.http_client.download( + url, + handler=handler, + file_name="handled.txt", + ) + + assert path.exists() + assert path.read_text() == "HANDLED" From 9514df8920119d6bfedda83cbd8f558ef1e10792 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 11 Feb 2026 11:54:29 +0500 Subject: [PATCH 69/86] add docstrings --- openml/_api/clients/http.py | 383 ++++++++++++++++++ openml/_api/clients/minio.py | 23 ++ openml/_api/resources/base/base.py | 124 +++++- openml/_api/resources/base/fallback.py | 108 +++++ openml/_api/resources/base/resources.py | 18 + openml/_api/resources/base/versions.py | 164 ++++++++ openml/_api/resources/dataset.py | 4 +- openml/_api/resources/estimation_procedure.py | 4 +- openml/_api/resources/evaluation.py | 4 +- openml/_api/resources/evaluation_measure.py | 4 +- openml/_api/resources/flow.py | 4 +- openml/_api/resources/run.py | 4 +- openml/_api/resources/setup.py | 4 +- openml/_api/resources/study.py | 4 +- openml/_api/resources/task.py | 4 +- openml/_api/setup/_utils.py | 24 ++ openml/_api/setup/backend.py | 107 +++++ openml/_api/setup/builder.py | 53 +++ openml/_api/setup/config.py | 54 +++ 19 files changed, 1072 insertions(+), 22 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 2c15515f3..a1ccc5122 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -29,11 +29,52 @@ class HTTPCache: + """ + Filesystem-based cache for HTTP responses. + + This class stores HTTP responses on disk using a structured directory layout + derived from the request URL and parameters. Each cached response consists of + three files: metadata (``meta.json``), headers (``headers.json``), and the raw + body (``body.bin``). Entries are considered valid until their time-to-live + (TTL) expires. + + Parameters + ---------- + path : pathlib.Path + Base directory where cache entries are stored. + ttl : int + Time-to-live in seconds. Cached entries older than this value are treated + as expired. + + Notes + ----- + The cache key is derived from the URL (domain and path components) and query + parameters, excluding the ``api_key`` parameter. + """ + def __init__(self, *, path: Path, ttl: int) -> None: self.path = path self.ttl = ttl def get_key(self, url: str, params: dict[str, Any]) -> str: + """ + Generate a filesystem-safe cache key for a request. + + The key is constructed from the reversed domain components, URL path + segments, and URL-encoded query parameters (excluding ``api_key``). + + Parameters + ---------- + url : str + The full request URL. + params : dict of str to Any + Query parameters associated with the request. + + Returns + ------- + str + A relative path string representing the cache key. + """ parsed_url = urlparse(url) netloc_parts = parsed_url.netloc.split(".")[::-1] path_parts = parsed_url.path.strip("/").split("/") @@ -44,9 +85,44 @@ def get_key(self, url: str, params: dict[str, Any]) -> str: return str(Path(*netloc_parts, *path_parts, *params_part)) def _key_to_path(self, key: str) -> Path: + """ + Convert a cache key into an absolute filesystem path. + + Parameters + ---------- + key : str + Cache key as returned by :meth:`get_key`. + + Returns + ------- + pathlib.Path + Absolute path corresponding to the cache entry. + """ return self.path.joinpath(key) def load(self, key: str) -> Response: + """ + Load a cached HTTP response from disk. + + Parameters + ---------- + key : str + Cache key identifying the stored response. + + Returns + ------- + requests.Response + Reconstructed response object with status code, headers, body, and metadata. + + Raises + ------ + FileNotFoundError + If the cache entry or required files are missing. + TimeoutError + If the cached entry has expired based on the configured TTL. + ValueError + If required metadata is missing or malformed. + """ path = self._key_to_path(key) if not path.exists(): @@ -85,6 +161,22 @@ def load(self, key: str) -> Response: return response def save(self, key: str, response: Response) -> None: + """ + Persist an HTTP response to disk. + + Parameters + ---------- + key : str + Cache key identifying where to store the response. + response : requests.Response + Response object to cache. + + Notes + ----- + The response body is stored as binary data. Headers and metadata + (status code, URL, reason, encoding, elapsed time, request info, and + creation timestamp) are stored as JSON. + """ path = self._key_to_path(key) path.mkdir(parents=True, exist_ok=True) @@ -113,6 +205,29 @@ def save(self, key: str, response: Response) -> None: class HTTPClient: + """ + HTTP client for interacting with the OpenML API. + + This client supports configurable retry policies, optional filesystem + caching, API key authentication, and response validation including + checksum verification. + + Parameters + ---------- + server : str + Base server URL (e.g., ``https://www.openml.org``). + base_url : str + Base API path appended to the server URL. + api_key : str + API key used for authenticated endpoints. + retries : int + Maximum number of retry attempts for failed requests. + retry_policy : RetryPolicy + Strategy controlling delay between retries. + cache : HTTPCache or None, optional + Cache instance for storing and retrieving responses. + """ + def __init__( # noqa: PLR0913 self, *, @@ -136,17 +251,62 @@ def __init__( # noqa: PLR0913 self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} def _robot_delay(self, n: int) -> float: + """ + Compute delay for automated retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + + Notes + ----- + Uses a sigmoid-based growth curve with Gaussian noise to gradually + increase waiting time. + """ wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 variation = random.gauss(0, wait / 10) return max(1.0, wait + variation) def _human_delay(self, n: int) -> float: + """ + Compute delay for human-like retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + """ return max(1.0, n) def _parse_exception_response( self, response: Response, ) -> tuple[int | None, str]: + """ + Parse an error response returned by the server. + + Parameters + ---------- + response : requests.Response + HTTP response containing error details in JSON or XML format. + + Returns + ------- + tuple of (int or None, str) + Parsed error code and combined error message. The code may be + ``None`` if unavailable. + """ content_type = response.headers.get("Content-Type", "").lower() if "json" in content_type: @@ -183,6 +343,29 @@ def _raise_code_specific_error( url: str, files: Mapping[str, Any] | None, ) -> None: + """ + Raise specialized exceptions based on OpenML error codes. + + Parameters + ---------- + code : int + Server-provided error code. + message : str + Parsed error message. + url : str + Request URL associated with the error. + files : Mapping of str to Any or None + Files sent with the request, if any. + + Raises + ------ + OpenMLServerNoResult + If the error indicates a missing resource. + OpenMLNotAuthorizedError + If authentication is required or invalid. + OpenMLServerException + For other server-side errors (except retryable database errors). + """ if code in [111, 372, 512, 500, 482, 542, 674]: # 512 for runs, 372 for datasets, 500 for flows # 482 for tasks, 542 for evaluations, 674 for setups @@ -226,6 +409,31 @@ def _validate_response( files: Mapping[str, Any] | None, response: Response, ) -> Exception | None: + """ + Validate an HTTP response and determine whether to retry. + + Parameters + ---------- + method : str + HTTP method used for the request. + url : str + Full request URL. + files : Mapping of str to Any or None + Files sent with the request, if any. + response : requests.Response + Received HTTP response. + + Returns + ------- + Exception or None + ``None`` if the response is valid. Otherwise, an exception + indicating the error to raise or retry. + + Raises + ------ + OpenMLServerError + For unexpected server errors or malformed responses. + """ if ( "Content-Encoding" not in response.headers or response.headers["Content-Encoding"] != "gzip" @@ -288,6 +496,33 @@ def _request( # noqa: PLR0913 files: Mapping[str, Any] | None, **request_kwargs: Any, ) -> tuple[Response | None, Exception | None]: + """ + Execute a single HTTP request attempt. + + Parameters + ---------- + session : requests.Session + Active session used to send the request. + method : str + HTTP method (e.g., ``GET``, ``POST``). + url : str + Full request URL. + params : Mapping of str to Any + Query parameters. + data : Mapping of str to Any + Request body data. + headers : Mapping of str to str + HTTP headers. + files : Mapping of str to Any or None + Files to upload. + **request_kwargs : Any + Additional arguments forwarded to ``requests.Session.request``. + + Returns + ------- + tuple of (requests.Response or None, Exception or None) + Response and potential retry exception. + """ retry_raise_e: Exception | None = None response: Response | None = None @@ -329,6 +564,38 @@ def request( # noqa: PLR0913, C901 md5_checksum: str | None = None, **request_kwargs: Any, ) -> Response: + """ + Send an HTTP request with retry, caching, and validation support. + + Parameters + ---------- + method : str + HTTP method to use. + path : str + API path relative to the base URL. + use_cache : bool, optional + Whether to load/store responses from cache. + reset_cache : bool, optional + If True, bypass existing cache entries. + use_api_key : bool, optional + Whether to include the API key in query parameters. + md5_checksum : str or None, optional + Expected MD5 checksum of the response body. + **request_kwargs : Any + Additional arguments passed to the underlying request. + + Returns + ------- + requests.Response + Final validated response. + + Raises + ------ + Exception + Propagates network, validation, or server exceptions after retries. + OpenMLHashException + If checksum verification fails. + """ url = urljoin(self.server, urljoin(self.base_url, path)) retries = max(1, self.retries) @@ -394,6 +661,21 @@ def request( # noqa: PLR0913, C901 return response def _verify_checksum(self, response: Response, md5_checksum: str) -> None: + """ + Verify MD5 checksum of a response body. + + Parameters + ---------- + response : requests.Response + HTTP response whose content should be verified. + md5_checksum : str + Expected hexadecimal MD5 checksum. + + Raises + ------ + OpenMLHashException + If the computed checksum does not match the expected value. + """ # ruff sees hashlib.md5 as insecure actual = hashlib.md5(response.content).hexdigest() # noqa: S324 if actual != md5_checksum: @@ -412,6 +694,29 @@ def get( md5_checksum: str | None = None, **request_kwargs: Any, ) -> Response: + """ + Send a GET request. + + Parameters + ---------- + path : str + API path relative to the base URL. + use_cache : bool, optional + Whether to use the response cache. + reset_cache : bool, optional + Whether to ignore existing cached entries. + use_api_key : bool, optional + Whether to include the API key. + md5_checksum : str or None, optional + Expected MD5 checksum for response validation. + **request_kwargs : Any + Additional request arguments. + + Returns + ------- + requests.Response + HTTP response. + """ return self.request( method="GET", path=path, @@ -429,6 +734,23 @@ def post( use_api_key: bool = True, **request_kwargs: Any, ) -> Response: + """ + Send a POST request. + + Parameters + ---------- + path : str + API path relative to the base URL. + use_api_key : bool, optional + Whether to include the API key. + **request_kwargs : Any + Additional request arguments. + + Returns + ------- + requests.Response + HTTP response. + """ return self.request( method="POST", path=path, @@ -442,6 +764,21 @@ def delete( path: str, **request_kwargs: Any, ) -> Response: + """ + Send a DELETE request. + + Parameters + ---------- + path : str + API path relative to the base URL. + **request_kwargs : Any + Additional request arguments. + + Returns + ------- + requests.Response + HTTP response. + """ return self.request( method="DELETE", path=path, @@ -458,6 +795,35 @@ def download( file_name: str = "response.txt", md5_checksum: str | None = None, ) -> Path: + """ + Download a resource and store it in the cache directory. + + Parameters + ---------- + url : str + Absolute URL of the resource to download. + handler : callable or None, optional + Custom handler function accepting ``(response, path, encoding)`` + and returning a ``pathlib.Path``. + encoding : str, optional + Text encoding used when writing the response body. + file_name : str, optional + Name of the saved file. + md5_checksum : str or None, optional + Expected MD5 checksum for integrity verification. + + Returns + ------- + pathlib.Path + Path to the downloaded file. + + Raises + ------ + OpenMLCacheRequiredError + If no cache instance is configured. + OpenMLHashException + If checksum verification fails. + """ if self.cache is None: raise OpenMLCacheRequiredError( "A cache object is required for download, but none was provided in the HTTPClient." @@ -476,6 +842,23 @@ def download( return self._text_handler(response, file_path, encoding) def _text_handler(self, response: Response, path: Path, encoding: str) -> Path: + """ + Write response text content to a file. + + Parameters + ---------- + response : requests.Response + HTTP response containing text data. + path : pathlib.Path + Destination file path. + encoding : str + Text encoding for writing the file. + + Returns + ------- + pathlib.Path + Path to the written file. + """ with path.open("w", encoding=encoding) as f: f.write(response.text) return path diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py index 2edc8269b..1e9b534fb 100644 --- a/openml/_api/clients/minio.py +++ b/openml/_api/clients/minio.py @@ -6,6 +6,29 @@ class MinIOClient: + """ + Lightweight client configuration for interacting with a MinIO-compatible + object storage service. + + This class stores basic configuration such as a base filesystem path and + default HTTP headers. It is intended to be extended with actual request + or storage logic elsewhere. + + Parameters + ---------- + path : pathlib.Path or None, optional + Base path used for local storage or downloads. If ``None``, no + default path is configured. + + Attributes + ---------- + path : pathlib.Path or None + Configured base path for storage operations. + headers : dict of str to str + Default HTTP headers, including a user-agent identifying the + OpenML Python client version. + """ + def __init__(self, path: Path | None = None) -> None: self.path = path self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 5eadc4932..5a2c1faa6 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -14,6 +14,33 @@ class ResourceAPI(ABC): + """ + Abstract base class for OpenML resource APIs. + + This class defines the common interface for interacting with OpenML + resources (e.g., datasets, flows, runs) across different API versions. + Concrete subclasses must implement the resource-specific operations + such as publishing, deleting, and tagging. + + Parameters + ---------- + http : HTTPClient + Configured HTTP client used for communication with the OpenML API. + minio : MinIOClient or None, optional + Optional MinIO client used for object storage operations. + + Attributes + ---------- + api_version : APIVersion + API version implemented by the resource. + resource_type : ResourceType + Type of OpenML resource handled by the implementation. + _http : HTTPClient + Internal HTTP client instance. + _minio : MinIOClient or None + Internal MinIO client instance, if provided. + """ + api_version: APIVersion resource_type: ResourceType @@ -22,18 +49,107 @@ def __init__(self, http: HTTPClient, minio: MinIOClient | None = None): self._minio = minio @abstractmethod - def delete(self, resource_id: int) -> bool: ... + def delete(self, resource_id: int) -> bool: + """ + Delete a resource by its identifier. + + Parameters + ---------- + resource_id : int + Unique identifier of the resource to delete. + + Returns + ------- + bool + ``True`` if the deletion was successful. + + Notes + ----- + Concrete subclasses must implement this method. + """ @abstractmethod - def publish(self, path: str, files: Mapping[str, Any] | None) -> int: ... + def publish(self, path: str, files: Mapping[str, Any] | None) -> int: + """ + Publish a new resource to the OpenML server. + + Parameters + ---------- + path : str + API endpoint path used for publishing the resource. + files : Mapping of str to Any or None + Files or payload data required for publishing. The structure + depends on the resource type. + + Returns + ------- + int + Identifier of the newly created resource. + + Notes + ----- + Concrete subclasses must implement this method. + """ @abstractmethod - def tag(self, resource_id: int, tag: str) -> list[str]: ... + def tag(self, resource_id: int, tag: str) -> list[str]: + """ + Add a tag to a resource. + + Parameters + ---------- + resource_id : int + Identifier of the resource to tag. + tag : str + Tag to associate with the resource. + + Returns + ------- + list of str + Updated list of tags assigned to the resource. + + Notes + ----- + Concrete subclasses must implement this method. + """ @abstractmethod - def untag(self, resource_id: int, tag: str) -> list[str]: ... + def untag(self, resource_id: int, tag: str) -> list[str]: + """ + Remove a tag from a resource. + + Parameters + ---------- + resource_id : int + Identifier of the resource to untag. + tag : str + Tag to remove from the resource. + + Returns + ------- + list of str + Updated list of tags assigned to the resource. + + Notes + ----- + Concrete subclasses must implement this method. + """ def _not_supported(self, *, method: str) -> NoReturn: + """ + Raise an error indicating that a method is not supported. + + Parameters + ---------- + method : str + Name of the unsupported method. + + Raises + ------ + OpenMLNotSupportedError + If the current API version does not support the requested method + for the given resource type. + """ version = getattr(self.api_version, "value", "unknown") resource = getattr(self.resource_type, "value", "unknown") diff --git a/openml/_api/resources/base/fallback.py b/openml/_api/resources/base/fallback.py index 3919c36a9..9b8f64a17 100644 --- a/openml/_api/resources/base/fallback.py +++ b/openml/_api/resources/base/fallback.py @@ -7,18 +7,82 @@ class FallbackProxy: + """ + Proxy object that provides transparent fallback across multiple API versions. + + This class delegates attribute access to a sequence of API implementations. + When a callable attribute is invoked and raises ``OpenMLNotSupportedError``, + the proxy automatically attempts the same method on subsequent API instances + until one succeeds. + + Parameters + ---------- + *api_versions : Any + One or more API implementation instances ordered by priority. + The first API is treated as the primary implementation, and + subsequent APIs are used as fallbacks. + + Raises + ------ + ValueError + If no API implementations are provided. + + Notes + ----- + Attribute lookup is performed dynamically via ``__getattr__``. + Only methods that raise ``OpenMLNotSupportedError`` trigger fallback + behavior. Other exceptions are propagated immediately. + """ + def __init__(self, *api_versions: Any): if not api_versions: raise ValueError("At least one API version must be provided") self._apis = api_versions def __getattr__(self, name: str) -> Any: + """ + Dynamically resolve attribute access across API implementations. + + Parameters + ---------- + name : str + Name of the attribute being accessed. + + Returns + ------- + Any + The resolved attribute. If it is callable, a wrapped function + providing fallback behavior is returned. + + Raises + ------ + AttributeError + If none of the API implementations define the attribute. + """ api, attr = self._find_attr(name) if callable(attr): return self._wrap_callable(name, api, attr) return attr def _find_attr(self, name: str) -> tuple[Any, Any]: + """ + Find the first API implementation that defines a given attribute. + + Parameters + ---------- + name : str + Name of the attribute to search for. + + Returns + ------- + tuple of (Any, Any) + The API instance and the corresponding attribute. + + Raises + ------ + AttributeError + If no API implementation defines the attribute. + """ for api in self._apis: attr = getattr(api, name, None) if attr is not None: @@ -31,6 +95,25 @@ def _wrap_callable( primary_api: Any, primary_attr: Callable[..., Any], ) -> Callable[..., Any]: + """ + Wrap a callable attribute to enable fallback behavior. + + Parameters + ---------- + name : str + Name of the method being wrapped. + primary_api : Any + Primary API instance providing the callable. + primary_attr : Callable[..., Any] + Callable attribute obtained from the primary API. + + Returns + ------- + Callable[..., Any] + Wrapped function that attempts the primary call first and + falls back to other APIs if ``OpenMLNotSupportedError`` is raised. + """ + def wrapper(*args: Any, **kwargs: Any) -> Any: try: return primary_attr(*args, **kwargs) @@ -46,6 +129,31 @@ def _call_fallbacks( *args: Any, **kwargs: Any, ) -> Any: + """ + Attempt to call a method on fallback API implementations. + + Parameters + ---------- + name : str + Name of the method to invoke. + skip_api : Any + API instance to skip (typically the primary API that already failed). + *args : Any + Positional arguments passed to the method. + **kwargs : Any + Keyword arguments passed to the method. + + Returns + ------- + Any + Result returned by the first successful fallback invocation. + + Raises + ------ + OpenMLNotSupportedError + If all API implementations either do not define the method + or raise ``OpenMLNotSupportedError``. + """ for api in self._apis: if api is skip_api: continue diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 8ccd5776e..ede0e1034 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -6,36 +6,54 @@ class DatasetAPI(ResourceAPI): + """Abstract API interface for dataset resources.""" + resource_type: ResourceType = ResourceType.DATASET class TaskAPI(ResourceAPI): + """Abstract API interface for task resources.""" + resource_type: ResourceType = ResourceType.TASK class EvaluationMeasureAPI(ResourceAPI): + """Abstract API interface for evaluation measure resources.""" + resource_type: ResourceType = ResourceType.EVALUATION_MEASURE class EstimationProcedureAPI(ResourceAPI): + """Abstract API interface for estimation procedure resources.""" + resource_type: ResourceType = ResourceType.ESTIMATION_PROCEDURE class EvaluationAPI(ResourceAPI): + """Abstract API interface for evaluation resources.""" + resource_type: ResourceType = ResourceType.EVALUATION class FlowAPI(ResourceAPI): + """Abstract API interface for flow resources.""" + resource_type: ResourceType = ResourceType.FLOW class StudyAPI(ResourceAPI): + """Abstract API interface for study resources.""" + resource_type: ResourceType = ResourceType.STUDY class RunAPI(ResourceAPI): + """Abstract API interface for run resources.""" + resource_type: ResourceType = ResourceType.RUN class SetupAPI(ResourceAPI): + """Abstract API interface for setup resources.""" + resource_type: ResourceType = ResourceType.SETUP diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index b86272377..51a958b90 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -16,14 +16,74 @@ class ResourceV1API(ResourceAPI): + """ + Version 1 implementation of the OpenML resource API. + + This class provides XML-based implementations for publishing, + deleting, tagging, and untagging resources using the V1 API + endpoints. Responses are parsed using ``xmltodict``. + + Notes + ----- + V1 endpoints expect and return XML. Error handling follows the + legacy OpenML server behavior and maps specific error codes to + more descriptive exceptions where appropriate. + """ + api_version: APIVersion = APIVersion.V1 def publish(self, path: str, files: Mapping[str, Any] | None) -> int: + """ + Publish a new resource using the V1 API. + + Parameters + ---------- + path : str + API endpoint path for the upload. + files : Mapping of str to Any or None + Files to upload as part of the request payload. + + Returns + ------- + int + Identifier of the newly created resource. + + Raises + ------ + ValueError + If the server response does not contain a valid resource ID. + OpenMLServerException + If the server returns an error during upload. + """ response = self._http.post(path, files=files) parsed_response = xmltodict.parse(response.content) return self._extract_id_from_upload(parsed_response) def delete(self, resource_id: int) -> bool: + """ + Delete a resource using the V1 API. + + Parameters + ---------- + resource_id : int + Identifier of the resource to delete. + + Returns + ------- + bool + ``True`` if the server confirms successful deletion. + + Raises + ------ + ValueError + If the resource type is not supported for deletion. + OpenMLNotAuthorizedError + If the user is not permitted to delete the resource. + OpenMLServerError + If deletion fails for an unknown reason. + OpenMLServerException + For other server-side errors. + """ resource_type = self._get_endpoint_name() legal_resources = {"data", "flow", "task", "run", "study", "user"} @@ -40,6 +100,28 @@ def delete(self, resource_id: int) -> bool: raise def tag(self, resource_id: int, tag: str) -> list[str]: + """ + Add a tag to a resource using the V1 API. + + Parameters + ---------- + resource_id : int + Identifier of the resource to tag. + tag : str + Tag to associate with the resource. + + Returns + ------- + list of str + Updated list of tags assigned to the resource. + + Raises + ------ + ValueError + If the resource type does not support tagging. + OpenMLServerException + If the server returns an error. + """ resource_type = self._get_endpoint_name() legal_resources = {"data", "task", "flow", "setup", "run"} @@ -58,6 +140,28 @@ def tag(self, resource_id: int, tag: str) -> list[str]: return tags def untag(self, resource_id: int, tag: str) -> list[str]: + """ + Remove a tag from a resource using the V1 API. + + Parameters + ---------- + resource_id : int + Identifier of the resource to untag. + tag : str + Tag to remove from the resource. + + Returns + ------- + list of str + Updated list of tags assigned to the resource. + + Raises + ------ + ValueError + If the resource type does not support tagging. + OpenMLServerException + If the server returns an error. + """ resource_type = self._get_endpoint_name() legal_resources = {"data", "task", "flow", "setup", "run"} @@ -76,6 +180,19 @@ def untag(self, resource_id: int, tag: str) -> list[str]: return tags def _get_endpoint_name(self) -> str: + """ + Return the V1 endpoint name for the current resource type. + + Returns + ------- + str + Endpoint segment used in V1 API paths. + + Notes + ----- + Datasets use the special endpoint name ``"data"`` instead of + their enum value. + """ if self.resource_type == ResourceType.DATASET: return "data" return cast("str", self.resource_type.value) @@ -83,6 +200,26 @@ def _get_endpoint_name(self) -> str: def _handle_delete_exception( self, resource_type: str, exception: OpenMLServerException ) -> None: + """ + Map V1 deletion error codes to more specific exceptions. + + Parameters + ---------- + resource_type : str + Endpoint name of the resource type. + exception : OpenMLServerException + Original exception raised during deletion. + + Raises + ------ + OpenMLNotAuthorizedError + If the resource cannot be deleted due to ownership or + dependent entities. + OpenMLServerError + If deletion fails for an unknown reason. + OpenMLServerException + If the error code is not specially handled. + """ # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php # Most exceptions are descriptive enough to be raised as their standard # OpenMLServerException, however there are two cases where we add information: @@ -116,6 +253,25 @@ def _handle_delete_exception( raise exception def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: + """ + Extract the resource identifier from an XML upload response. + + Parameters + ---------- + parsed : Mapping of str to Any + Parsed XML response as returned by ``xmltodict.parse``. + + Returns + ------- + int + Extracted resource identifier. + + Raises + ------ + ValueError + If the response structure is unexpected or no identifier + can be found. + """ # reads id from upload response # actual parsed dict: {"oml:upload_flow": {"@xmlns:oml": "...", "oml:id": "42"}} @@ -140,6 +296,14 @@ def _extract_id_from_upload(self, parsed: Mapping[str, Any]) -> int: class ResourceV2API(ResourceAPI): + """ + Version 2 implementation of the OpenML resource API. + + This class represents the V2 API for resources. Operations such as + publishing, deleting, tagging, and untagging are currently not + supported and will raise ``OpenMLNotSupportedError``. + """ + api_version: APIVersion = APIVersion.V2 def publish(self, path: str, files: Mapping[str, Any] | None) -> int: # noqa: ARG002 diff --git a/openml/_api/resources/dataset.py b/openml/_api/resources/dataset.py index 51688a2fd..520594df9 100644 --- a/openml/_api/resources/dataset.py +++ b/openml/_api/resources/dataset.py @@ -4,8 +4,8 @@ class DatasetV1API(ResourceV1API, DatasetAPI): - pass + """Version 1 API implementation for dataset resources.""" class DatasetV2API(ResourceV2API, DatasetAPI): - pass + """Version 2 API implementation for dataset resources.""" diff --git a/openml/_api/resources/estimation_procedure.py b/openml/_api/resources/estimation_procedure.py index b8ea7d2c3..a45f7af66 100644 --- a/openml/_api/resources/estimation_procedure.py +++ b/openml/_api/resources/estimation_procedure.py @@ -4,8 +4,8 @@ class EstimationProcedureV1API(ResourceV1API, EstimationProcedureAPI): - pass + """Version 1 API implementation for estimation procedure resources.""" class EstimationProcedureV2API(ResourceV2API, EstimationProcedureAPI): - pass + """Version 2 API implementation for estimation procedure resources.""" diff --git a/openml/_api/resources/evaluation.py b/openml/_api/resources/evaluation.py index 07877e14e..fe7e360a6 100644 --- a/openml/_api/resources/evaluation.py +++ b/openml/_api/resources/evaluation.py @@ -4,8 +4,8 @@ class EvaluationV1API(ResourceV1API, EvaluationAPI): - pass + """Version 1 API implementation for evaluation resources.""" class EvaluationV2API(ResourceV2API, EvaluationAPI): - pass + """Version 2 API implementation for evaluation resources.""" diff --git a/openml/_api/resources/evaluation_measure.py b/openml/_api/resources/evaluation_measure.py index 63cf16c77..4ed5097f7 100644 --- a/openml/_api/resources/evaluation_measure.py +++ b/openml/_api/resources/evaluation_measure.py @@ -4,8 +4,8 @@ class EvaluationMeasureV1API(ResourceV1API, EvaluationMeasureAPI): - pass + """Version 1 API implementation for evaluation measure resources.""" class EvaluationMeasureV2API(ResourceV2API, EvaluationMeasureAPI): - pass + """Version 2 API implementation for evaluation measure resources.""" diff --git a/openml/_api/resources/flow.py b/openml/_api/resources/flow.py index ad2e05bd9..1716d89d3 100644 --- a/openml/_api/resources/flow.py +++ b/openml/_api/resources/flow.py @@ -4,8 +4,8 @@ class FlowV1API(ResourceV1API, FlowAPI): - pass + """Version 1 API implementation for flow resources.""" class FlowV2API(ResourceV2API, FlowAPI): - pass + """Version 2 API implementation for flow resources.""" diff --git a/openml/_api/resources/run.py b/openml/_api/resources/run.py index 151c69e35..4caccb0b6 100644 --- a/openml/_api/resources/run.py +++ b/openml/_api/resources/run.py @@ -4,8 +4,8 @@ class RunV1API(ResourceV1API, RunAPI): - pass + """Version 1 API implementation for run resources.""" class RunV2API(ResourceV2API, RunAPI): - pass + """Version 2 API implementation for run resources.""" diff --git a/openml/_api/resources/setup.py b/openml/_api/resources/setup.py index 78a36cecc..2896d3d9f 100644 --- a/openml/_api/resources/setup.py +++ b/openml/_api/resources/setup.py @@ -4,8 +4,8 @@ class SetupV1API(ResourceV1API, SetupAPI): - pass + """Version 1 API implementation for setup resources.""" class SetupV2API(ResourceV2API, SetupAPI): - pass + """Version 2 API implementation for setup resources.""" diff --git a/openml/_api/resources/study.py b/openml/_api/resources/study.py index cefd55004..fb073555c 100644 --- a/openml/_api/resources/study.py +++ b/openml/_api/resources/study.py @@ -4,8 +4,8 @@ class StudyV1API(ResourceV1API, StudyAPI): - pass + """Version 1 API implementation for study resources.""" class StudyV2API(ResourceV2API, StudyAPI): - pass + """Version 2 API implementation for study resources.""" diff --git a/openml/_api/resources/task.py b/openml/_api/resources/task.py index a367c9aa1..1f62aa3f3 100644 --- a/openml/_api/resources/task.py +++ b/openml/_api/resources/task.py @@ -4,8 +4,8 @@ class TaskV1API(ResourceV1API, TaskAPI): - pass + """Version 1 API implementation for task resources.""" class TaskV2API(ResourceV2API, TaskAPI): - pass + """Version 2 API implementation for task resources.""" diff --git a/openml/_api/setup/_utils.py b/openml/_api/setup/_utils.py index ddcf5b41c..6606140f9 100644 --- a/openml/_api/setup/_utils.py +++ b/openml/_api/setup/_utils.py @@ -12,6 +12,30 @@ def _resolve_default_cache_dir() -> Path: + """ + Determine the default cache directory for OpenML data. + + This function checks for user-defined environment variables and + platform-specific defaults to resolve where cached files should + be stored. It also provides backward-compatibility warnings if + legacy directories are detected. + + Returns + ------- + Path + Path to the cache directory that should be used. + + Notes + ----- + - If the environment variable ``OPENML_CACHE_DIR`` is set, its value + is used as the cache directory. + - On non-Linux systems, the default is ``~/.openml``. + - On Linux, the function follows the XDG Base Directory Specification: + - Uses ``$XDG_CACHE_HOME/openml`` if ``XDG_CACHE_HOME`` is set. + - Falls back to ``~/.cache/openml`` if ``XDG_CACHE_HOME`` is not set. + - If an old cache directory exists at ``$XDG_CACHE_HOME/org/openml``, + a warning is logged for backward compatibility. + """ user_defined_cache_dir = os.environ.get("OPENML_CACHE_DIR") if user_defined_cache_dir is not None: return Path(user_defined_cache_dir) diff --git a/openml/_api/setup/backend.py b/openml/_api/setup/backend.py index c29d1dbad..56f689c03 100644 --- a/openml/_api/setup/backend.py +++ b/openml/_api/setup/backend.py @@ -21,6 +21,42 @@ class APIBackend: + """ + Central backend for accessing all OpenML API resource interfaces. + + This class provides a singleton interface to dataset, task, flow, + evaluation, run, setup, study, and other resource APIs. It also + manages configuration through a nested ``Config`` object and + allows dynamic retrieval and updating of configuration values. + + Parameters + ---------- + config : Config, optional + Optional configuration object. If not provided, a default + ``Config`` instance is created. + + Attributes + ---------- + dataset : DatasetAPI + Interface for dataset-related API operations. + task : TaskAPI + Interface for task-related API operations. + evaluation_measure : EvaluationMeasureAPI + Interface for evaluation measure-related API operations. + estimation_procedure : EstimationProcedureAPI + Interface for estimation procedure-related API operations. + evaluation : EvaluationAPI + Interface for evaluation-related API operations. + flow : FlowAPI + Interface for flow-related API operations. + study : StudyAPI + Interface for study-related API operations. + run : RunAPI + Interface for run-related API operations. + setup : SetupAPI + Interface for setup-related API operations. + """ + _instance: APIBackend | None = None def __init__(self, config: Config | None = None): @@ -65,22 +101,62 @@ def setup(self) -> SetupAPI: @classmethod def get_instance(cls) -> APIBackend: + """ + Get the singleton instance of the APIBackend. + + Returns + ------- + APIBackend + Singleton instance of the backend. + """ if cls._instance is None: cls._instance = cls() return cls._instance @classmethod def get_config(cls) -> Config: + """ + Get a deep copy of the current configuration. + + Returns + ------- + Config + Current configuration object. + """ return deepcopy(cls.get_instance()._config) @classmethod def set_config(cls, config: Config) -> None: + """ + Set a new configuration for the backend. + + This updates both the internal ``_config`` object and rebuilds + the internal API backend using ``APIBackendBuilder``. + + Parameters + ---------- + config : Config + Configuration object to set. + """ instance = cls.get_instance() instance._config = config instance._backend = APIBackendBuilder.build(config) @classmethod def get_config_value(cls, key: str) -> Any: + """ + Retrieve a specific configuration value by key. + + Parameters + ---------- + key : str + Dot-separated key specifying the configuration field. + + Returns + ------- + Any + Deep copy of the requested configuration value. + """ keys = key.split(".") config_value = cls.get_instance()._config for k in keys: @@ -92,6 +168,16 @@ def get_config_value(cls, key: str) -> Any: @classmethod def set_config_value(cls, key: str, value: Any) -> None: + """ + Set a specific configuration value by key. + + Parameters + ---------- + key : str + Dot-separated key specifying the configuration field. + value : Any + Value to assign to the configuration field. + """ keys = key.split(".") config = cls.get_instance()._config parent = config @@ -105,6 +191,19 @@ def set_config_value(cls, key: str, value: Any) -> None: @classmethod def get_config_values(cls, keys: list[str]) -> list[Any]: + """ + Retrieve multiple configuration values by a list of keys. + + Parameters + ---------- + keys : list of str + List of dot-separated keys specifying configuration fields. + + Returns + ------- + list of Any + List of deep copies of the requested configuration values. + """ values = [] for key in keys: value = cls.get_config_value(key) @@ -113,6 +212,14 @@ def get_config_values(cls, keys: list[str]) -> list[Any]: @classmethod def set_config_values(cls, config_dict: dict[str, Any]) -> None: + """ + Set multiple configuration values using a dictionary. + + Parameters + ---------- + config_dict : dict of str to Any + Mapping of dot-separated configuration keys to their values. + """ config = cls.get_instance()._config for key, value in config_dict.items(): diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index f801fe525..6263066b2 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -13,6 +13,41 @@ class APIBackendBuilder: + """ + Builder class for constructing API backend instances. + + This class organizes resource-specific API objects (datasets, tasks, + flows, evaluations, runs, setups, studies, etc.) and provides a + centralized access point for both primary and optional fallback APIs. + + Parameters + ---------- + resource_apis : Mapping[ResourceType, ResourceAPI | FallbackProxy] + Mapping of resource types to their corresponding API instances + or fallback proxies. + + Attributes + ---------- + dataset : ResourceAPI | FallbackProxy + API interface for dataset resources. + task : ResourceAPI | FallbackProxy + API interface for task resources. + evaluation_measure : ResourceAPI | FallbackProxy + API interface for evaluation measure resources. + estimation_procedure : ResourceAPI | FallbackProxy + API interface for estimation procedure resources. + evaluation : ResourceAPI | FallbackProxy + API interface for evaluation resources. + flow : ResourceAPI | FallbackProxy + API interface for flow resources. + study : ResourceAPI | FallbackProxy + API interface for study resources. + run : ResourceAPI | FallbackProxy + API interface for run resources. + setup : ResourceAPI | FallbackProxy + API interface for setup resources. + """ + def __init__( self, resource_apis: Mapping[ResourceType, ResourceAPI | FallbackProxy], @@ -29,6 +64,24 @@ def __init__( @classmethod def build(cls, config: Config) -> APIBackendBuilder: + """ + Construct an APIBackendBuilder instance from a configuration. + + This method initializes HTTP and MinIO clients, creates resource-specific + API instances for the primary API version, and optionally wraps them + with fallback proxies if a fallback API version is configured. + + Parameters + ---------- + config : Config + Configuration object containing API versions, endpoints, cache + settings, and connection parameters. + + Returns + ------- + APIBackendBuilder + Builder instance with all resource API interfaces initialized. + """ cache_dir = Path(config.cache.dir).expanduser() http_cache = HTTPCache(path=cache_dir, ttl=config.cache.ttl) diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 4108227aa..002beabe0 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -10,6 +10,19 @@ @dataclass class APIConfig: + """ + Configuration for a specific OpenML API version. + + Parameters + ---------- + server : str + Base server URL for the API. + base_url : str + API-specific base path appended to the server URL. + api_key : str + API key used for authentication. + """ + server: str base_url: str api_key: str @@ -17,18 +30,59 @@ class APIConfig: @dataclass class ConnectionConfig: + """ + Configuration for HTTP connection behavior. + + Parameters + ---------- + retries : int + Number of retry attempts for failed requests. + retry_policy : RetryPolicy + Policy for determining delays between retries (human-like or robot-like). + """ + retries: int retry_policy: RetryPolicy @dataclass class CacheConfig: + """ + Configuration for caching API responses locally. + + Parameters + ---------- + dir : str + Path to the directory where cached files will be stored. + ttl : int + Time-to-live for cached entries, in seconds. + """ + dir: str ttl: int @dataclass class Config: + """ + Global configuration for the OpenML Python client. + + Includes API versions, connection settings, and caching options. + + Attributes + ---------- + api_version : APIVersion + Primary API version to use (default is V1). + fallback_api_version : APIVersion or None + Optional fallback API version if the primary API does not support certain operations. + api_configs : dict of APIVersion to APIConfig + Mapping from API version to its server/base URL and API key configuration. + connection : ConnectionConfig + Settings for request retries and retry policy. + cache : CacheConfig + Settings for local caching of API responses. + """ + api_version: APIVersion = APIVersion.V1 fallback_api_version: APIVersion | None = None From 53bee943aba0d564170f824de5108e569e937cc7 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Thu, 12 Feb 2026 17:39:37 +0500 Subject: [PATCH 70/86] update minio --- openml/_api/clients/minio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/clients/minio.py b/openml/_api/clients/minio.py index 1e9b534fb..e6a94a6e4 100644 --- a/openml/_api/clients/minio.py +++ b/openml/_api/clients/minio.py @@ -29,6 +29,6 @@ class MinIOClient: OpenML Python client version. """ - def __init__(self, path: Path | None = None) -> None: + def __init__(self, path: Path) -> None: self.path = path self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} From 33b4ca0f103e0fa9d37368f6ee632d7e1f3217b9 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 21:37:36 +0500 Subject: [PATCH 71/86] make delay functions static --- openml/_api/clients/http.py | 6 +++--- openml/_api/clients/utils.py | 40 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 openml/_api/clients/utils.py diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index a1ccc5122..b90818921 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -27,6 +27,8 @@ OpenMLServerNoResult, ) +from .utils import human_delay, robot_delay + class HTTPCache: """ @@ -245,9 +247,7 @@ def __init__( # noqa: PLR0913 self.retry_policy = retry_policy self.cache = cache - self.retry_func = ( - self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay - ) + self.retry_func = human_delay if retry_policy == RetryPolicy.HUMAN else robot_delay self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} def _robot_delay(self, n: int) -> float: diff --git a/openml/_api/clients/utils.py b/openml/_api/clients/utils.py new file mode 100644 index 000000000..c21732504 --- /dev/null +++ b/openml/_api/clients/utils.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import math +import random + + +def robot_delay(n: int) -> float: + """ + Compute delay for automated retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + """ + wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60 + variation = random.gauss(0, wait / 10) + return max(1.0, wait + variation) + + +def human_delay(n: int) -> float: + """ + Compute delay for human-like retry policy. + + Parameters + ---------- + n : int + Current retry attempt number (1-based). + + Returns + ------- + float + Number of seconds to wait before the next retry. + """ + return max(1.0, n) From a6b9a45d6248dd9e24380d918b06d2b97edf0bbb Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 21:41:19 +0500 Subject: [PATCH 72/86] rename: retry_raise_e -> exception --- openml/_api/clients/http.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index b90818921..e344bcecb 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -446,7 +446,7 @@ def _validate_response( if response.status_code == requests.codes.URI_TOO_LONG: raise OpenMLServerError(f"URI too long! ({url})") - retry_raise_e: Exception | None = None + exception: Exception | None = None code: int | None = None message: str = "" @@ -461,7 +461,7 @@ def _validate_response( f"developers!\n{extra}" ) from e - retry_raise_e = e + exception = e except Exception as e: # If we failed to parse it out, @@ -480,10 +480,10 @@ def _validate_response( files=files, ) - if retry_raise_e is None: - retry_raise_e = OpenMLServerException(code=code, message=message, url=url) + if exception is None: + exception = OpenMLServerException(code=code, message=message, url=url) - return retry_raise_e + return exception def _request( # noqa: PLR0913 self, @@ -523,7 +523,7 @@ def _request( # noqa: PLR0913 tuple of (requests.Response or None, Exception or None) Response and potential retry exception. """ - retry_raise_e: Exception | None = None + exception: Exception | None = None response: Response | None = None try: @@ -541,17 +541,17 @@ def _request( # noqa: PLR0913 requests.exceptions.ConnectionError, requests.exceptions.SSLError, ) as e: - retry_raise_e = e + exception = e if response is not None: - retry_raise_e = self._validate_response( + exception = self._validate_response( method=method, url=url, files=files, response=response, ) - return response, retry_raise_e + return response, exception def request( # noqa: PLR0913, C901 self, @@ -626,7 +626,7 @@ def request( # noqa: PLR0913, C901 session = requests.Session() for retry_counter in range(1, retries + 1): - response, retry_raise_e = self._request( + response, exception = self._request( session=session, method=method, url=url, @@ -638,11 +638,11 @@ def request( # noqa: PLR0913, C901 ) # executed successfully - if retry_raise_e is None: + if exception is None: break # tries completed if retry_counter >= retries: - raise retry_raise_e + raise exception delay = self.retry_func(retry_counter) time.sleep(delay) From f924b3207037b47622415bc3b8ae6a8096683232 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 21:42:57 +0500 Subject: [PATCH 73/86] use context-manager for requests.Session --- openml/_api/clients/http.py | 42 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index e344bcecb..e624b2f54 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -624,30 +624,28 @@ def request( # noqa: PLR0913, C901 except Exception: raise # propagate unexpected cache errors - session = requests.Session() - for retry_counter in range(1, retries + 1): - response, exception = self._request( - session=session, - method=method, - url=url, - params=params, - data=data, - headers=headers, - files=files, - **request_kwargs, - ) - - # executed successfully - if exception is None: - break - # tries completed - if retry_counter >= retries: - raise exception + with requests.Session() as session: + for retry_counter in range(1, retries + 1): + response, exception = self._request( + session=session, + method=method, + url=url, + params=params, + data=data, + headers=headers, + files=files, + **request_kwargs, + ) - delay = self.retry_func(retry_counter) - time.sleep(delay) + # executed successfully + if exception is None: + break + # tries completed + if retry_counter >= retries: + raise exception - session.close() + delay = self.retry_func(retry_counter) + time.sleep(delay) assert response is not None From 541b0f26ff4a9fc565ad529712f2b38d700a1252 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 22:01:52 +0500 Subject: [PATCH 74/86] remove "assert response is not None" --- openml/_api/clients/http.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index e624b2f54..926829c71 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -9,7 +9,7 @@ import xml from collections.abc import Callable, Mapping from pathlib import Path -from typing import Any +from typing import Any, cast from urllib.parse import urlencode, urljoin, urlparse import requests @@ -647,7 +647,9 @@ def request( # noqa: PLR0913, C901 delay = self.retry_func(retry_counter) time.sleep(delay) - assert response is not None + # response is guaranteed to be not `None` + # otherwise an exception would have been raised before + response = cast("Response", response) if use_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) From acb173fa0e5e36464769eb069004a6cd02782811 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 22:07:04 +0500 Subject: [PATCH 75/86] verify checksum before caching --- openml/_api/clients/http.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index 926829c71..d2c5b124f 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -651,13 +651,13 @@ def request( # noqa: PLR0913, C901 # otherwise an exception would have been raised before response = cast("Response", response) + if md5_checksum is not None: + self._verify_checksum(response, md5_checksum) + if use_cache and self.cache is not None: cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) - if md5_checksum is not None: - self._verify_checksum(response, md5_checksum) - return response def _verify_checksum(self, response: Response, md5_checksum: str) -> None: From 3e8d1f0dc158d281a181000e5f35efe26b69d571 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 22:42:03 +0500 Subject: [PATCH 76/86] update tests --- tests/test_api/test_http.py | 37 ++++++++++++++------------------- tests/test_api/test_versions.py | 9 ++++++-- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 8dc6303d1..2a1f2dcd5 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -2,7 +2,7 @@ import time import xmltodict import pytest -from openml.testing import TestAPIBase +from openml.testing import TestBase, TestAPIBase import os from pathlib import Path from urllib.parse import urljoin @@ -155,27 +155,22 @@ def test_post_and_delete(self): 17 """ + # post + response = self.http_client.post( + "task", + files={"description": task_xml}, + ) + self.assertEqual(response.status_code, 200) + xml_resp = xmltodict.parse(response.content) + task_id = int(xml_resp["oml:upload_task"]["oml:id"]) - task_id = None - try: - # POST the task - post_response = self.http_client.post( - "task", - files={"description": task_xml}, - ) - self.assertEqual(post_response.status_code, 200) - xml_resp = xmltodict.parse(post_response.content) - task_id = int(xml_resp["oml:upload_task"]["oml:id"]) - - # GET the task to verify it exists - get_response = self.http_client.get(f"task/{task_id}") - self.assertEqual(get_response.status_code, 200) - - finally: - # DELETE the task if it was created - if task_id is not None: - del_response = self.http_client.delete(f"task/{task_id}") - self.assertEqual(del_response.status_code, 200) + # cleanup incase of failure + TestBase._mark_entity_for_removal("task", task_id) + TestBase.logger.info(f"collected from {__file__}: {task_id}") + + # delete + response = self.http_client.delete(f"task/{task_id}") + self.assertEqual(response.status_code, 200) def test_download_requires_cache(self): client = HTTPClient( diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 1313889bc..cdb37a0d3 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,6 +1,6 @@ from time import time import pytest -from openml.testing import TestAPIBase +from openml.testing import TestBase, TestAPIBase from openml._api import ResourceV1API, ResourceV2API, FallbackProxy, ResourceAPI from openml.enums import ResourceType, APIVersion from openml.exceptions import OpenMLNotSupportedError @@ -18,13 +18,18 @@ def _publish_and_delete(self): 17 """ - + # publish task_id = self.resource.publish( "task", files={"description": task_xml}, ) self.assertIsNotNone(task_id) + # cleanup incase of failure + TestBase._mark_entity_for_removal("task", task_id) + TestBase.logger.info(f"collected from {__file__}: {task_id}") + + # delete success = self.resource.delete(task_id) self.assertTrue(success) From f83bdb5c0d2fc09c38ce948ba2b49ed23207e547 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Fri, 13 Feb 2026 22:46:57 +0500 Subject: [PATCH 77/86] minor fix in ResourceV1API.untag --- openml/_api/resources/base/versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/resources/base/versions.py b/openml/_api/resources/base/versions.py index 51a958b90..dc41ba971 100644 --- a/openml/_api/resources/base/versions.py +++ b/openml/_api/resources/base/versions.py @@ -166,7 +166,7 @@ def untag(self, resource_id: int, tag: str) -> list[str]: legal_resources = {"data", "task", "flow", "setup", "run"} if resource_type not in legal_resources: - raise ValueError(f"Can't tag a {resource_type}") + raise ValueError(f"Can't untag a {resource_type}") path = f"{resource_type}/untag" data = {f"{resource_type}_id": resource_id, "tag": tag} From 2a42712d465c404a437b8f52ed49aa86a08f55e3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 16 Feb 2026 18:54:25 +0500 Subject: [PATCH 78/86] remove cache.ttl --- openml/_api/clients/http.py | 9 +-------- openml/_api/setup/builder.py | 2 +- openml/_api/setup/config.py | 5 ----- openml/testing.py | 2 -- tests/test_api/test_http.py | 23 ----------------------- 5 files changed, 2 insertions(+), 39 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index d2c5b124f..dba9cac6b 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -44,9 +44,6 @@ class HTTPCache: ---------- path : pathlib.Path Base directory where cache entries are stored. - ttl : int - Time-to-live in seconds. Cached entries older than this value are treated - as expired. Notes ----- @@ -54,9 +51,8 @@ class HTTPCache: parameters, excluding the ``api_key`` parameter. """ - def __init__(self, *, path: Path, ttl: int) -> None: + def __init__(self, *, path: Path) -> None: self.path = path - self.ttl = ttl def get_key(self, url: str, params: dict[str, Any]) -> str: """ @@ -144,9 +140,6 @@ def load(self, key: str) -> Response: if created_at is None: raise ValueError("Cache metadata missing 'created_at'") - if time.time() - created_at > self.ttl: - raise TimeoutError(f"Cache expired for {path}") - with headers_path.open("r", encoding="utf-8") as f: headers = json.load(f) diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 6263066b2..05c37807d 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -84,7 +84,7 @@ def build(cls, config: Config) -> APIBackendBuilder: """ cache_dir = Path(config.cache.dir).expanduser() - http_cache = HTTPCache(path=cache_dir, ttl=config.cache.ttl) + http_cache = HTTPCache(path=cache_dir) minio_client = MinIOClient(path=cache_dir) primary_api_config = config.api_configs[config.api_version] diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index 002beabe0..fb1fee3a9 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -1,7 +1,6 @@ from __future__ import annotations from dataclasses import dataclass, field -from datetime import timedelta from openml.enums import APIVersion, RetryPolicy @@ -54,12 +53,9 @@ class CacheConfig: ---------- dir : str Path to the directory where cached files will be stored. - ttl : int - Time-to-live for cached entries, in seconds. """ dir: str - ttl: int @dataclass @@ -111,6 +107,5 @@ class Config: cache: CacheConfig = field( default_factory=lambda: CacheConfig( dir=str(_resolve_default_cache_dir()), - ttl=int(timedelta(weeks=1).total_seconds()), ) ) diff --git a/openml/testing.py b/openml/testing.py index 5a1a4d10f..54b95d23d 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -291,12 +291,10 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: retries = self.connection_n_retries retry_policy = RetryPolicy.HUMAN if self.retry_policy == "human" else RetryPolicy.ROBOT - ttl = openml._backend.get_config_value("cache.ttl") cache_dir = self.static_cache_dir self.cache = HTTPCache( path=cache_dir, - ttl=ttl, ) self.http_clients = { APIVersion.V1: HTTPClient( diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 2a1f2dcd5..c83536119 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -105,29 +105,6 @@ def test_get_uses_cached_response(self): self.assertEqual(response1.content, response2.content) self.assertEqual(response1.status_code, response2.status_code) - @pytest.mark.uses_test_server() - def test_get_cache_expires(self): - # force short TTL - self.cache.ttl = 1 - path = "task/1" - - url = self._prepare_url(path=path) - key = self.cache.get_key(url, {}) - cache_path = self.cache._key_to_path(key) / "meta.json" - - response1 = self.http_client.get(path, use_cache=True) - response1_cache_time_stamp = cache_path.stat().st_ctime - - time.sleep(2) - - response2 = self.http_client.get(path, use_cache=True) - response2_cache_time_stamp = cache_path.stat().st_ctime - - # cache expired -> new request - self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) - self.assertEqual(response2.status_code, 200) - self.assertEqual(response1.content, response2.content) - @pytest.mark.uses_test_server() def test_get_reset_cache(self): path = "task/1" From 001caad5669af089319af306a8c3d9d4bdb108b3 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 16 Feb 2026 19:14:57 +0500 Subject: [PATCH 79/86] replace config.cache.dir with config.cache_dir --- openml/_api/__init__.py | 2 -- openml/_api/setup/__init__.py | 3 +-- openml/_api/setup/builder.py | 2 +- openml/_api/setup/config.py | 25 +++---------------------- openml/config.py | 2 +- 5 files changed, 6 insertions(+), 28 deletions(-) diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 926fee3d4..b7846fd39 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -41,7 +41,6 @@ APIBackend, APIBackendBuilder, APIConfig, - CacheConfig, Config, ConnectionConfig, _backend, @@ -52,7 +51,6 @@ "APIBackend", "APIBackendBuilder", "APIConfig", - "CacheConfig", "Config", "ConnectionConfig", "DatasetAPI", diff --git a/openml/_api/setup/__init__.py b/openml/_api/setup/__init__.py index 1c28cfa9e..1f6e60ecb 100644 --- a/openml/_api/setup/__init__.py +++ b/openml/_api/setup/__init__.py @@ -1,13 +1,12 @@ from ._instance import _backend from .backend import APIBackend from .builder import APIBackendBuilder -from .config import APIConfig, CacheConfig, Config, ConnectionConfig +from .config import APIConfig, Config, ConnectionConfig __all__ = [ "APIBackend", "APIBackendBuilder", "APIConfig", - "CacheConfig", "Config", "ConnectionConfig", "_backend", diff --git a/openml/_api/setup/builder.py b/openml/_api/setup/builder.py index 05c37807d..aa6ed4bba 100644 --- a/openml/_api/setup/builder.py +++ b/openml/_api/setup/builder.py @@ -82,7 +82,7 @@ def build(cls, config: Config) -> APIBackendBuilder: APIBackendBuilder Builder instance with all resource API interfaces initialized. """ - cache_dir = Path(config.cache.dir).expanduser() + cache_dir = Path(config.cache_dir).expanduser() http_cache = HTTPCache(path=cache_dir) minio_client = MinIOClient(path=cache_dir) diff --git a/openml/_api/setup/config.py b/openml/_api/setup/config.py index fb1fee3a9..5f6cd7891 100644 --- a/openml/_api/setup/config.py +++ b/openml/_api/setup/config.py @@ -44,20 +44,6 @@ class ConnectionConfig: retry_policy: RetryPolicy -@dataclass -class CacheConfig: - """ - Configuration for caching API responses locally. - - Parameters - ---------- - dir : str - Path to the directory where cached files will be stored. - """ - - dir: str - - @dataclass class Config: """ @@ -71,16 +57,17 @@ class Config: Primary API version to use (default is V1). fallback_api_version : APIVersion or None Optional fallback API version if the primary API does not support certain operations. + cache_dir : str + Path to the directory where cached files will be stored. api_configs : dict of APIVersion to APIConfig Mapping from API version to its server/base URL and API key configuration. connection : ConnectionConfig Settings for request retries and retry policy. - cache : CacheConfig - Settings for local caching of API responses. """ api_version: APIVersion = APIVersion.V1 fallback_api_version: APIVersion | None = None + cache_dir: str = str(_resolve_default_cache_dir()) api_configs: dict[APIVersion, APIConfig] = field( default_factory=lambda: { @@ -103,9 +90,3 @@ class Config: retry_policy=RetryPolicy.HUMAN, ) ) - - cache: CacheConfig = field( - default_factory=lambda: CacheConfig( - dir=str(_resolve_default_cache_dir()), - ) - ) diff --git a/openml/config.py b/openml/config.py index 692543a00..1c34f6949 100644 --- a/openml/config.py +++ b/openml/config.py @@ -540,10 +540,10 @@ def _sync_api_config() -> None: APIBackend.set_config_values( { + "cache_dir": cache_dir, "api_configs.v1.server": v1_server, "api_configs.v1.base_url": v1_base_url, "api_configs.v1.api_key": apikey, - "cache.dir": cache_dir, "connection.retry_policy": connection_retry_policy, "connection.retries": connection_n_retries, } From fb38a2d3affdcac8ba9c15ab315371a8415b1e1d Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 10:46:24 +0500 Subject: [PATCH 80/86] make HTTPClient.cache compulsory --- openml/_api/clients/http.py | 13 +++---------- tests/test_api/test_http.py | 14 -------------- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index dba9cac6b..e9f881e2e 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -19,7 +19,6 @@ from openml.__version__ import __version__ from openml.enums import RetryPolicy from openml.exceptions import ( - OpenMLCacheRequiredError, OpenMLHashException, OpenMLNotAuthorizedError, OpenMLServerError, @@ -231,7 +230,7 @@ def __init__( # noqa: PLR0913 api_key: str, retries: int, retry_policy: RetryPolicy, - cache: HTTPCache | None = None, + cache: HTTPCache, ) -> None: self.server = server self.base_url = base_url @@ -608,7 +607,7 @@ def request( # noqa: PLR0913, C901 files = request_kwargs.pop("files", None) - if use_cache and not reset_cache and self.cache is not None: + if use_cache and not reset_cache: cache_key = self.cache.get_key(url, params) try: return self.cache.load(cache_key) @@ -647,7 +646,7 @@ def request( # noqa: PLR0913, C901 if md5_checksum is not None: self._verify_checksum(response, md5_checksum) - if use_cache and self.cache is not None: + if use_cache: cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) @@ -812,15 +811,9 @@ def download( Raises ------ - OpenMLCacheRequiredError - If no cache instance is configured. OpenMLHashException If checksum verification fails. """ - if self.cache is None: - raise OpenMLCacheRequiredError( - "A cache object is required for download, but none was provided in the HTTPClient." - ) base = self.cache.path file_path = base / "downloads" / urlparse(url).path.lstrip("/") / file_name file_path = file_path.expanduser() diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index c83536119..ef20bd4ca 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -8,7 +8,6 @@ from urllib.parse import urljoin from openml.enums import APIVersion from openml._api import HTTPClient -from openml.exceptions import OpenMLCacheRequiredError class TestHTTPClient(TestAPIBase): @@ -149,19 +148,6 @@ def test_post_and_delete(self): response = self.http_client.delete(f"task/{task_id}") self.assertEqual(response.status_code, 200) - def test_download_requires_cache(self): - client = HTTPClient( - server=self.http_client.server, - base_url=self.http_client.base_url, - api_key=self.http_client.api_key, - retries=1, - retry_policy=self.http_client.retry_policy, - cache=None, - ) - - with pytest.raises(OpenMLCacheRequiredError): - client.download("https://www.openml.org") - @pytest.mark.uses_test_server() def test_download_creates_file(self): # small stable resource From 03c4ca9d93693fc59341e4c1c00d8d8585079a4b Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 10:47:12 +0500 Subject: [PATCH 81/86] remove unused OpenMLCacheRequiredError --- openml/exceptions.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/openml/exceptions.py b/openml/exceptions.py index 10f693648..26c2d2591 100644 --- a/openml/exceptions.py +++ b/openml/exceptions.py @@ -69,7 +69,3 @@ class ObjectNotPublishedError(PyOpenMLError): class OpenMLNotSupportedError(PyOpenMLError): """Raised when an API operation is not supported for a resource/version.""" - - -class OpenMLCacheRequiredError(PyOpenMLError): - """Raised when a cache object is required but not provided.""" From 8d708fd287611964309993faf8094a4d3f08f5b9 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 11:00:56 +0500 Subject: [PATCH 82/86] implement and use TestAPIBase._create_resource --- openml/testing.py | 9 +++++++-- tests/test_api/test_versions.py | 30 ++++++++++++++++-------------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index 54b95d23d..9c31e9288 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -15,8 +15,8 @@ import requests import openml -from openml._api import HTTPCache, HTTPClient, MinIOClient -from openml.enums import APIVersion, RetryPolicy +from openml._api import API_REGISTRY, HTTPCache, HTTPClient, MinIOClient, ResourceAPI +from openml.enums import APIVersion, ResourceType, RetryPolicy from openml.exceptions import OpenMLServerException from openml.tasks import TaskType @@ -316,6 +316,11 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: } self.minio_client = MinIOClient(path=cache_dir) + def _create_resource(self, api_version: APIVersion, resource_type: ResourceType) -> ResourceAPI: + http_client = self.http_clients[api_version] + resource_cls = API_REGISTRY[api_version][resource_type] + return resource_cls(http=http_client, minio=self.minio_client) + def check_task_existence( task_type: TaskType, diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index cdb37a0d3..2be35ba5c 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -48,9 +48,10 @@ def _tag_and_untag(self): class TestResourceV1API(TestResourceAPIBase): def setUp(self): super().setUp() - http_client = self.http_clients[APIVersion.V1] - self.resource = ResourceV1API(http_client) - self.resource.resource_type = ResourceType.TASK + self.resource = self._create_resource( + api_version=APIVersion.V1, + resource_type=ResourceType.TASK, + ) def test_publish_and_delete(self): self._publish_and_delete() @@ -62,9 +63,10 @@ def test_tag_and_untag(self): class TestResourceV2API(TestResourceAPIBase): def setUp(self): super().setUp() - http_client = self.http_clients[APIVersion.V2] - self.resource = ResourceV2API(http_client) - self.resource.resource_type = ResourceType.TASK + self.resource = self._create_resource( + api_version=APIVersion.V2, + resource_type=ResourceType.TASK, + ) def test_publish_and_delete(self): with pytest.raises(OpenMLNotSupportedError): @@ -78,14 +80,14 @@ def test_tag_and_untag(self): class TestResourceFallbackAPI(TestResourceAPIBase): def setUp(self): super().setUp() - http_client_v1 = self.http_clients[APIVersion.V1] - resource_v1 = ResourceV1API(http_client_v1) - resource_v1.resource_type = ResourceType.TASK - - http_client_v2 = self.http_clients[APIVersion.V2] - resource_v2 = ResourceV2API(http_client_v2) - resource_v2.resource_type = ResourceType.TASK - + resource_v1 = self._create_resource( + api_version=APIVersion.V1, + resource_type=ResourceType.TASK, + ) + resource_v2 = self._create_resource( + api_version=APIVersion.V2, + resource_type=ResourceType.TASK, + ) self.resource = FallbackProxy(resource_v2, resource_v1) def test_publish_and_delete(self): From 4f75bbadff265a9aa38284dad7af7409687eb24c Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 11:01:48 +0500 Subject: [PATCH 83/86] make ResourceAPI.minio compulsory --- openml/_api/resources/base/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/_api/resources/base/base.py b/openml/_api/resources/base/base.py index 5a2c1faa6..51e41a0c8 100644 --- a/openml/_api/resources/base/base.py +++ b/openml/_api/resources/base/base.py @@ -44,7 +44,7 @@ class ResourceAPI(ABC): api_version: APIVersion resource_type: ResourceType - def __init__(self, http: HTTPClient, minio: MinIOClient | None = None): + def __init__(self, http: HTTPClient, minio: MinIOClient): self._http = http self._minio = minio From c4dae4362d2e7a46d387bbf315b3b25c1ba71493 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 12:43:13 +0500 Subject: [PATCH 84/86] rename: use_cache -> enable_cache; reset_cache -> refresh_cache --- openml/_api/clients/http.py | 33 +++++++++++++++++---------------- tests/test_api/test_http.py | 12 ++++++------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index e9f881e2e..3ab0def4f 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -550,8 +550,8 @@ def request( # noqa: PLR0913, C901 method: str, path: str, *, - use_cache: bool = False, - reset_cache: bool = False, + enable_cache: bool = False, + refresh_cache: bool = False, use_api_key: bool = False, md5_checksum: str | None = None, **request_kwargs: Any, @@ -565,10 +565,11 @@ def request( # noqa: PLR0913, C901 HTTP method to use. path : str API path relative to the base URL. - use_cache : bool, optional - Whether to load/store responses from cache. - reset_cache : bool, optional - If True, bypass existing cache entries. + enable_cache : bool, optional + Whether to load/store response from cache. + refresh_cache : bool, optional + Only used when `enable_cache=True`. If True, ignore any existing + cached response and overwrite it with a fresh one. use_api_key : bool, optional Whether to include the API key in query parameters. md5_checksum : str or None, optional @@ -607,7 +608,7 @@ def request( # noqa: PLR0913, C901 files = request_kwargs.pop("files", None) - if use_cache and not reset_cache: + if enable_cache and not refresh_cache: cache_key = self.cache.get_key(url, params) try: return self.cache.load(cache_key) @@ -646,7 +647,7 @@ def request( # noqa: PLR0913, C901 if md5_checksum is not None: self._verify_checksum(response, md5_checksum) - if use_cache: + if enable_cache: cache_key = self.cache.get_key(url, params) self.cache.save(cache_key, response) @@ -680,8 +681,8 @@ def get( self, path: str, *, - use_cache: bool = False, - reset_cache: bool = False, + enable_cache: bool = False, + refresh_cache: bool = False, use_api_key: bool = False, md5_checksum: str | None = None, **request_kwargs: Any, @@ -693,9 +694,9 @@ def get( ---------- path : str API path relative to the base URL. - use_cache : bool, optional + enable_cache : bool, optional Whether to use the response cache. - reset_cache : bool, optional + refresh_cache : bool, optional Whether to ignore existing cached entries. use_api_key : bool, optional Whether to include the API key. @@ -712,8 +713,8 @@ def get( return self.request( method="GET", path=path, - use_cache=use_cache, - reset_cache=reset_cache, + enable_cache=enable_cache, + refresh_cache=refresh_cache, use_api_key=use_api_key, md5_checksum=md5_checksum, **request_kwargs, @@ -746,7 +747,7 @@ def post( return self.request( method="POST", path=path, - use_cache=False, + enable_cache=False, use_api_key=use_api_key, **request_kwargs, ) @@ -774,7 +775,7 @@ def delete( return self.request( method="DELETE", path=path, - use_cache=False, + enable_cache=False, use_api_key=True, **request_kwargs, ) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index ef20bd4ca..5ecd225d3 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -77,7 +77,7 @@ def test_get(self): @pytest.mark.uses_test_server() def test_get_with_cache_creates_cache(self): - response = self.http_client.get("task/1", use_cache=True) + response = self.http_client.get("task/1", enable_cache=True) self.assertEqual(response.status_code, 200) self.assertTrue(self.cache.path.exists()) @@ -96,26 +96,26 @@ def test_get_with_cache_creates_cache(self): @pytest.mark.uses_test_server() def test_get_uses_cached_response(self): # first request populates cache - response1 = self.http_client.get("task/1", use_cache=True) + response1 = self.http_client.get("task/1", enable_cache=True) # second request should load from cache - response2 = self.http_client.get("task/1", use_cache=True) + response2 = self.http_client.get("task/1", enable_cache=True) self.assertEqual(response1.content, response2.content) self.assertEqual(response1.status_code, response2.status_code) @pytest.mark.uses_test_server() - def test_get_reset_cache(self): + def test_get_refresh_cache(self): path = "task/1" url = self._prepare_url(path=path) key = self.cache.get_key(url, {}) cache_path = self.cache._key_to_path(key) / "meta.json" - response1 = self.http_client.get(path, use_cache=True) + response1 = self.http_client.get(path, enable_cache=True) response1_cache_time_stamp = cache_path.stat().st_ctime - response2 = self.http_client.get(path, use_cache=True, reset_cache=True) + response2 = self.http_client.get(path, enable_cache=True, refresh_cache=True) response2_cache_time_stamp = cache_path.stat().st_ctime self.assertNotEqual(response1_cache_time_stamp, response2_cache_time_stamp) From 36c20a2e0ddecf99b33f1c334729367cc67d7ed9 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 14:28:11 +0500 Subject: [PATCH 85/86] use server config from TestBase --- openml/testing.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/openml/testing.py b/openml/testing.py index dbb7945bc..a971275d9 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -293,14 +293,18 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: retry_policy = RetryPolicy.HUMAN if self.retry_policy == "human" else RetryPolicy.ROBOT cache_dir = self.static_cache_dir + v1_server = self.test_server.split("api/")[0] + v1_base_url = self.test_server.replace(v1_server, "").rstrip("/") + "/" + v1_api_key = self.user_key + self.cache = HTTPCache( path=cache_dir, ) self.http_clients = { APIVersion.V1: HTTPClient( - server="https://test.openml.org/", - base_url="api/v1/xml/", - api_key="normaluser", + server=v1_server, + base_url=v1_base_url, + api_key=v1_api_key, retries=retries, retry_policy=retry_policy, cache=self.cache, From ab3c1eb674233f773a52e31fcbea6d20aec88017 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 17 Feb 2026 14:28:55 +0500 Subject: [PATCH 86/86] tests: mock HTTP post calls to prevent race conditions Previously, multiple tests were publishing the same task concurrently, which increased the likelihood of race conditions and flaky failures. This update replaces real HTTP post calls with mocks, making the tests deterministic and isolated from the server. --- tests/test_api/test_http.py | 74 +++++++------ tests/test_api/test_versions.py | 182 +++++++++++++++++++++++--------- 2 files changed, 176 insertions(+), 80 deletions(-) diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 5ecd225d3..73a29264d 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -1,8 +1,7 @@ -from requests import Response, Request -import time -import xmltodict +from requests import Response, Request, Session +from unittest.mock import patch import pytest -from openml.testing import TestBase, TestAPIBase +from openml.testing import TestAPIBase import os from pathlib import Path from urllib.parse import urljoin @@ -122,32 +121,6 @@ def test_get_refresh_cache(self): self.assertEqual(response2.status_code, 200) self.assertEqual(response1.content, response2.content) - @pytest.mark.uses_test_server() - def test_post_and_delete(self): - task_xml = """ - - 5 - 193 - 17 - - """ - # post - response = self.http_client.post( - "task", - files={"description": task_xml}, - ) - self.assertEqual(response.status_code, 200) - xml_resp = xmltodict.parse(response.content) - task_id = int(xml_resp["oml:upload_task"]["oml:id"]) - - # cleanup incase of failure - TestBase._mark_entity_for_removal("task", task_id) - TestBase.logger.info(f"collected from {__file__}: {task_id}") - - # delete - response = self.http_client.delete(f"task/{task_id}") - self.assertEqual(response.status_code, 200) - @pytest.mark.uses_test_server() def test_download_creates_file(self): # small stable resource @@ -198,3 +171,44 @@ def handler(response, path: Path, encoding: str): assert path.exists() assert path.read_text() == "HANDLED" + + def test_post(self): + resource_name = "resource" + resource_files = {"description": """Resource Description File"""} + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + + self.http_client.post( + resource_name, + files=resource_files, + ) + + mock_request.assert_called_once_with( + method="POST", + url=self.http_client.server + self.http_client.base_url + resource_name, + params={}, + data={'api_key': self.http_client.api_key}, + headers=self.http_client.headers, + files=resource_files, + ) + + def test_delete(self): + resource_name = "resource" + resource_id = 123 + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + + self.http_client.delete(f"{resource_name}/{resource_id}") + + mock_request.assert_called_once_with( + method="DELETE", + url=self.http_client.server + self.http_client.base_url + resource_name + "/" + str(resource_id), + params={'api_key': self.http_client.api_key}, + data={}, + headers=self.http_client.headers, + files=None, + ) diff --git a/tests/test_api/test_versions.py b/tests/test_api/test_versions.py index 2be35ba5c..fd953f3ac 100644 --- a/tests/test_api/test_versions.py +++ b/tests/test_api/test_versions.py @@ -1,49 +1,106 @@ -from time import time import pytest -from openml.testing import TestBase, TestAPIBase -from openml._api import ResourceV1API, ResourceV2API, FallbackProxy, ResourceAPI +from requests import Session, Response +from unittest.mock import patch +from openml.testing import TestAPIBase +from openml._api import FallbackProxy, ResourceAPI from openml.enums import ResourceType, APIVersion from openml.exceptions import OpenMLNotSupportedError -@pytest.mark.uses_test_server() class TestResourceAPIBase(TestAPIBase): resource: ResourceAPI | FallbackProxy - def _publish_and_delete(self): - task_xml = """ - - 5 - 193 - 17 - - """ - # publish - task_id = self.resource.publish( - "task", - files={"description": task_xml}, - ) - self.assertIsNotNone(task_id) - - # cleanup incase of failure - TestBase._mark_entity_for_removal("task", task_id) - TestBase.logger.info(f"collected from {__file__}: {task_id}") - - # delete - success = self.resource.delete(task_id) - self.assertTrue(success) - - def _tag_and_untag(self): - resource_id = 1 - unique_indicator = str(time()).replace(".", "") - tag = f"{self.__class__.__name__}_test_tag_and_untag_{unique_indicator}" - - tags = self.resource.tag(resource_id, tag) - self.assertIn(tag, tags) - - tags = self.resource.untag(resource_id, tag) - self.assertNotIn(tag, tags) - + @property + def http_client(self): + return self.resource._http + + def _publish(self): + resource_name = "task" + resource_files = {"description": """Resource Description File"""} + resource_id = 123 + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = f'\n\t{resource_id}\n\n'.encode("utf-8") + + published_resource_id = self.resource.publish( + resource_name, + files=resource_files, + ) + + self.assertEqual(resource_id, published_resource_id) + + mock_request.assert_called_once_with( + method="POST", + url=self.http_client.server + self.http_client.base_url + resource_name, + params={}, + data={'api_key': self.http_client.api_key}, + headers=self.http_client.headers, + files=resource_files, + ) + + def _delete(self): + resource_name = "task" + resource_id = 123 + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = f'\n {resource_id}\n\n'.encode("utf-8") + + self.resource.delete(resource_id) + + mock_request.assert_called_once_with( + method="DELETE", + url=self.http_client.server + self.http_client.base_url + resource_name + "/" + str(resource_id), + params={'api_key': self.http_client.api_key}, + data={}, + headers=self.http_client.headers, + files=None, + ) + + def _tag(self): + resource_id = 123 + resource_tag = "TAG" + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = f'{resource_id}{resource_tag}'.encode("utf-8") + + tags = self.resource.tag(resource_id, resource_tag) + self.assertIn(resource_tag, tags) + + mock_request.assert_called_once_with( + method="POST", + url=self.http_client.server + self.http_client.base_url + self.resource.resource_type + "/tag", + params={}, + data={'api_key': self.http_client.api_key, 'task_id': resource_id, 'tag': resource_tag}, + headers=self.http_client.headers, + files=None, + ) + + def _untag(self): + resource_id = 123 + resource_tag = "TAG" + + with patch.object(Session, "request") as mock_request: + mock_request.return_value = Response() + mock_request.return_value.status_code = 200 + mock_request.return_value._content = f'{resource_id}'.encode("utf-8") + + tags = self.resource.untag(resource_id, resource_tag) + self.assertNotIn(resource_tag, tags) + + mock_request.assert_called_once_with( + method="POST", + url=self.http_client.server + self.http_client.base_url + self.resource.resource_type + "/untag", + params={}, + data={'api_key': self.http_client.api_key, 'task_id': resource_id, 'tag': resource_tag}, + headers=self.http_client.headers, + files=None, + ) class TestResourceV1API(TestResourceAPIBase): def setUp(self): @@ -53,11 +110,17 @@ def setUp(self): resource_type=ResourceType.TASK, ) - def test_publish_and_delete(self): - self._publish_and_delete() + def test_publish(self): + self._publish() + + def test_delete(self): + self._delete() - def test_tag_and_untag(self): - self._tag_and_untag() + def test_tag(self): + self._tag() + + def test_untag(self): + self._untag() class TestResourceV2API(TestResourceAPIBase): @@ -68,16 +131,29 @@ def setUp(self): resource_type=ResourceType.TASK, ) - def test_publish_and_delete(self): + def test_publish(self): + with pytest.raises(OpenMLNotSupportedError): + self._publish() + + def test_delete(self): + with pytest.raises(OpenMLNotSupportedError): + self._delete() + + def test_tag(self): with pytest.raises(OpenMLNotSupportedError): - self._tag_and_untag() + self._tag() - def test_tag_and_untag(self): + def test_untag(self): with pytest.raises(OpenMLNotSupportedError): - self._tag_and_untag() + self._untag() class TestResourceFallbackAPI(TestResourceAPIBase): + @property + def http_client(self): + # since these methods are not implemented for v2, they will fallback to v1 api + return self.http_clients[APIVersion.V1] + def setUp(self): super().setUp() resource_v1 = self._create_resource( @@ -90,8 +166,14 @@ def setUp(self): ) self.resource = FallbackProxy(resource_v2, resource_v1) - def test_publish_and_delete(self): - self._publish_and_delete() + def test_publish(self): + self._publish() + + def test_delete(self): + self._delete() + + def test_tag(self): + self._tag() - def test_tag_and_untag(self): - self._tag_and_untag() + def test_untag(self): + self._untag()