Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openml/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def check_server(server: str) -> str:

def replace_shorthand(server: str) -> str:
if server == "test":
return "https://test.openml.org/api/v1/xml"
return f"{config.TEST_SERVER_URL}/api/v1/xml"
if server == "production":
return "https://www.openml.org/api/v1/xml"
return server
Expand Down
7 changes: 5 additions & 2 deletions openml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR = "OPENML_TEST_SERVER_ADMIN_KEY"
_TEST_SERVER_NORMAL_USER_KEY = "normaluser"

TEST_SERVER_URL = "https://test.openml.org"


class _Config(TypedDict):
apikey: str
Expand Down Expand Up @@ -214,7 +216,7 @@ class ConfigurationForExamples:
_last_used_server = None
_last_used_key = None
_start_last_called = False
_test_server = "https://test.openml.org/api/v1/xml"
_test_server = f"{TEST_SERVER_URL}/api/v1/xml"
_test_apikey = _TEST_SERVER_NORMAL_USER_KEY

@classmethod
Expand Down Expand Up @@ -470,7 +472,8 @@ def get_cache_directory() -> str:

"""
url_suffix = urlparse(server).netloc
reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118
url_parts = url_suffix.replace(":", "_").split(".")[::-1]
reversed_url_suffix = os.sep.join(url_parts) # noqa: PTH118
return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118


Expand Down
11 changes: 6 additions & 5 deletions openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,9 +415,10 @@ def get_task(
if not isinstance(task_id, int):
raise TypeError(f"Task id should be integer, is {type(task_id)}")

cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
tid_cache_dir = cache_key_dir / str(task_id)
tid_cache_dir_existed = tid_cache_dir.exists()
task_cache_directory = openml.utils._create_cache_directory_for_id(
TASKS_CACHE_DIR_NAME, task_id
)
task_cache_directory_existed = task_cache_directory.exists()
try:
task = _get_task_description(task_id)
dataset = get_dataset(task.dataset_id, **get_dataset_kwargs)
Expand All @@ -431,8 +432,8 @@ def get_task(
if download_splits and isinstance(task, OpenMLSupervisedTask):
task.download_split()
except Exception as e:
if not tid_cache_dir_existed:
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
if not task_cache_directory_existed:
openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, task_cache_directory)
raise e

return task
Expand Down
2 changes: 1 addition & 1 deletion openml/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class TestBase(unittest.TestCase):
"user": [],
}
flow_name_tracker: ClassVar[list[str]] = []
test_server = "https://test.openml.org/api/v1/xml"
test_server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
admin_key = os.environ.get(openml.config.OPENML_TEST_SERVER_ADMIN_KEY_ENV_VAR)
user_key = openml.config._TEST_SERVER_NORMAL_USER_KEY

Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def with_server(request):
openml.config.apikey = None
yield
return
openml.config.server = "https://test.openml.org/api/v1/xml"
openml.config.server = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
openml.config.apikey = TestBase.user_key
yield

Expand Down
1 change: 1 addition & 0 deletions tests/files/localhost_8000
37 changes: 13 additions & 24 deletions tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,27 +527,20 @@ def test_deletion_of_cache_dir(self):
def test_deletion_of_cache_dir_faulty_download(self, patch):
patch.side_effect = Exception("Boom!")
self.assertRaisesRegex(Exception, "Boom!", openml.datasets.get_dataset, dataset_id=1)
datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets")
datasets_cache_dir = os.path.join(openml.config.get_cache_directory(), "datasets")
assert len(os.listdir(datasets_cache_dir)) == 0

@pytest.mark.uses_test_server()
def test_publish_dataset(self):
# lazy loading not possible as we need the arff-file.
openml.datasets.get_dataset(3, download_data=True)
file_path = os.path.join(
openml.config.get_cache_directory(),
"datasets",
"3",
"dataset.arff",
)
arff_file_path = self.static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
dataset = OpenMLDataset(
"anneal",
"test",
data_format="arff",
version=1,
licence="public",
default_target_attribute="class",
data_file=file_path,
data_file=arff_file_path,
)
dataset.publish()
TestBase._mark_entity_for_removal("data", dataset.dataset_id)
Expand Down Expand Up @@ -890,7 +883,7 @@ def test_create_invalid_dataset(self):

@pytest.mark.uses_test_server()
def test_get_online_dataset_arff(self):
dataset_id = 100 # Australian
dataset_id = 128 # iris -- one of the few datasets without parquet file
# lazy loading not used as arff file is checked.
dataset = openml.datasets.get_dataset(dataset_id, download_data=True)
decoder = arff.ArffDecoder()
Expand Down Expand Up @@ -1468,8 +1461,9 @@ def test_data_edit_critical_field(self):
raise e
time.sleep(10)
# Delete the cache dir to get the newer version of the dataset

shutil.rmtree(
os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)),
os.path.join(openml.config.get_cache_directory(), "datasets", str(did)),
)

@pytest.mark.uses_test_server()
Expand Down Expand Up @@ -1734,7 +1728,6 @@ def test_delete_dataset(self):

@mock.patch.object(requests.Session, "delete")
def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
)
Expand All @@ -1749,14 +1742,13 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
):
openml.datasets.delete_dataset(40_000)

dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
)
Expand All @@ -1771,14 +1763,13 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
):
openml.datasets.delete_dataset(40_000)

dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
)
Expand All @@ -1790,14 +1781,13 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
success = openml.datasets.delete_dataset(40000)
assert success

dataset_url = "https://test.openml.org/api/v1/xml/data/40000"
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/40000"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = (
test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
)
Expand All @@ -1812,7 +1802,7 @@ def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key)
):
openml.datasets.delete_dataset(9_999_999)

dataset_url = "https://test.openml.org/api/v1/xml/data/9999999"
dataset_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/9999999"
assert dataset_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")

Expand Down Expand Up @@ -1907,9 +1897,8 @@ def _dataset_features_is_downloaded(did: int):


def _dataset_data_file_is_downloaded(did: int):
parquet_present = _dataset_file_is_downloaded(did, "dataset.pq")
arff_present = _dataset_file_is_downloaded(did, "dataset.arff")
return parquet_present or arff_present
cache_directory = Path(openml.config.get_cache_directory()) / "datasets" / str(did)
return any(f.suffix in (".pq", ".arff") for f in cache_directory.iterdir())


def _assert_datasets_retrieved_successfully(
Expand Down Expand Up @@ -2014,7 +2003,7 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml"
)
# While the mocked example is from production, unit tests by default connect to the test server.
requests_mock.get("https://test.openml.org/api/v1/xml/data/61", text=content_file.read_text())
requests_mock.get(f"{openml.config.TEST_SERVER_URL}/api/v1/xml/data/61", text=content_file.read_text())
dataset = openml.datasets.get_dataset(61, download_data=True)
assert dataset._parquet_url is not None
assert dataset.parquet_file is not None
Expand Down
15 changes: 5 additions & 10 deletions tests/test_flows/test_flow_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,6 @@ def test_delete_flow(self):

@mock.patch.object(requests.Session, "delete")
def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -466,14 +465,13 @@ def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)

flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -486,14 +484,13 @@ def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)

flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -506,14 +503,13 @@ def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(40_000)

flow_url = "https://test.openml.org/api/v1/xml/flow/40000"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/40000"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
Expand All @@ -523,15 +519,14 @@ def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
success = openml.flows.delete_flow(33364)
assert success

flow_url = "https://test.openml.org/api/v1/xml/flow/33364"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/33364"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
@pytest.mark.xfail(reason="failures_issue_1544", strict=False)
def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -544,6 +539,6 @@ def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
):
openml.flows.delete_flow(9_999_999)

flow_url = "https://test.openml.org/api/v1/xml/flow/9999999"
flow_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/flow/9999999"
assert flow_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")
2 changes: 1 addition & 1 deletion tests/test_openml/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_get_config_as_dict(self):
config = openml.config.get_config_as_dict()
_config = {}
_config["apikey"] = TestBase.user_key
_config["server"] = "https://test.openml.org/api/v1/xml"
_config["server"] = f"{openml.config.TEST_SERVER_URL}/api/v1/xml"
_config["cachedir"] = self.workdir
_config["avoid_duplicate_runs"] = False
_config["connection_n_retries"] = 20
Expand Down
13 changes: 7 additions & 6 deletions tests/test_runs/test_run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1813,7 +1813,6 @@ def test_initialize_model_from_run_nonstrict(self):

@mock.patch.object(requests.Session, "delete")
def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -1826,14 +1825,13 @@ def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
):
openml.runs.delete_run(40_000)

run_url = "https://test.openml.org/api/v1/xml/run/40000"
run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/40000"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml"
mock_delete.return_value = create_request_response(
status_code=200,
Expand All @@ -1843,14 +1841,13 @@ def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
success = openml.runs.delete_run(10591880)
assert success

run_url = "https://test.openml.org/api/v1/xml/run/10591880"
run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/10591880"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")


@mock.patch.object(requests.Session, "delete")
def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
openml.config.start_using_configuration_for_example()
content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml"
mock_delete.return_value = create_request_response(
status_code=412,
Expand All @@ -1863,7 +1860,7 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
):
openml.runs.delete_run(9_999_999)

run_url = "https://test.openml.org/api/v1/xml/run/9999999"
run_url = f"{openml.config.TEST_SERVER_URL}/api/v1/xml/run/9999999"
assert run_url == mock_delete.call_args.args[0]
assert test_api_key == mock_delete.call_args.kwargs.get("params", {}).get("api_key")

Expand All @@ -1873,6 +1870,10 @@ def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
Version(sklearn.__version__) < Version("0.21"),
reason="couldn't perform local tests successfully w/o bloating RAM",
)
@unittest.skipIf(
Version(sklearn.__version__) >= Version("1.8"),
reason="predictions differ significantly",
)
@mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
@pytest.mark.uses_test_server()
def test__run_task_get_arffcontent_2(parallel_mock):
Expand Down
Loading