From 509eccf15f1fd1f866b3a4ee2610945a88afe0ab Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 10 Feb 2026 16:37:12 +0200 Subject: [PATCH 01/13] Update to newer database image --- docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 324350f..4f8cbdf 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,7 +1,7 @@ services: database: profiles: ["python", "php", "all"] - image: "openml/test-database:20240105" + image: "openml/test-database:v0.1.20260204" container_name: "openml-test-database" environment: MYSQL_ROOT_PASSWORD: ok From c4a459e14c02eb7baddeccb28e2293cfd9c25c10 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 10 Feb 2026 16:38:25 +0200 Subject: [PATCH 02/13] Update validation to work with new user keys (temporarily) --- src/database/users.py | 5 ++++- tests/users.py | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/database/users.py b/src/database/users.py index a045f5d..6bf40e3 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -6,7 +6,10 @@ from sqlalchemy import Connection, text # Enforces str is 32 hexadecimal characters, does not check validity. -APIKey = Annotated[str, StringConstraints(pattern=r"^[0-9a-fA-F]{32}$")] +APIKey = Annotated[ + str, + StringConstraints(pattern=r"^([0-9a-fA-F]{32})|(abc)|(normaluser)|(normaluser2)$"), +] class UserGroup(IntEnum): diff --git a/tests/users.py b/tests/users.py index 23bc325..47f34a9 100644 --- a/tests/users.py +++ b/tests/users.py @@ -9,7 +9,7 @@ class ApiKey(StrEnum): - ADMIN = "AD000000000000000000000000000000" - SOME_USER = "00000000000000000000000000000000" - OWNER_USER = "DA1A0000000000000000000000000000" + ADMIN = "abc" + SOME_USER = "normaluser" + OWNER_USER = "normaluser2" INVALID = "11111111111111111111111111111111" From d327685b0918493d755871b2c2b885218290f10f Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 10 Feb 2026 16:51:51 +0200 Subject: [PATCH 03/13] Update constant with new unprocessed datasets --- tests/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/constants.py b/tests/constants.py index 6881f88..3c847f7 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -1,7 +1,7 @@ PRIVATE_DATASET_ID = {130} -IN_PREPARATION_ID = {33} +IN_PREPARATION_ID = {33, 161, 162, 163} DEACTIVATED_DATASETS = {131} -DATASETS = set(range(1, 132)) +DATASETS = set(range(1, 132)) | {161, 162, 163} NUMBER_OF_DATASETS = len(DATASETS) NUMBER_OF_DEACTIVATED_DATASETS = len(DEACTIVATED_DATASETS) From c7a220b9361e85ff8461edbff6461abf7475349e Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 10 Feb 2026 17:09:20 +0200 Subject: [PATCH 04/13] Update test users with their new IDs --- tests/routers/openml/users_test.py | 2 +- tests/users.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index 7ce9768..45b330a 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -18,7 +18,7 @@ def test_fetch_user(api_key: str, user: User, user_test: Connection) -> None: db_user = fetch_user(api_key, user_data=user_test) assert db_user is not None assert user.user_id == db_user.user_id - assert user.groups == db_user.groups + assert set(user.groups) == set(db_user.groups) def test_fetch_user_invalid_key_returns_none(user_test: Connection) -> None: diff --git a/tests/users.py b/tests/users.py index 47f34a9..62389b9 100644 --- a/tests/users.py +++ b/tests/users.py @@ -4,12 +4,12 @@ NO_USER = None SOME_USER = User(user_id=2, _database=None, _groups=[UserGroup.READ_WRITE]) -OWNER_USER = User(user_id=16, _database=None, _groups=[UserGroup.READ_WRITE]) -ADMIN_USER = User(user_id=1, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE]) +OWNER_USER = User(user_id=3229, _database=None, _groups=[UserGroup.READ_WRITE]) +ADMIN_USER = User(user_id=1159, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE]) class ApiKey(StrEnum): ADMIN = "abc" - SOME_USER = "normaluser" - OWNER_USER = "normaluser2" + SOME_USER = "normaluser2" + OWNER_USER = "normaluser" INVALID = "11111111111111111111111111111111" From 17b1794648b999b082518b9128516bdfeb5ea392 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 10 Feb 2026 17:20:48 +0200 Subject: [PATCH 05/13] Update for new ownership of studies --- tests/routers/openml/study_test.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py index f32b6b7..a9a8ed4 100644 --- a/tests/routers/openml/study_test.py +++ b/tests/routers/openml/study_test.py @@ -6,6 +6,7 @@ from starlette.testclient import TestClient from schemas.study import StudyType +from tests.users import ApiKey def test_get_task_study_by_id(py_api: TestClient) -> None: @@ -458,7 +459,7 @@ def test_get_task_study_by_alias(py_api: TestClient) -> None: def test_create_task_study(py_api: TestClient) -> None: response = py_api.post( - "/studies?api_key=00000000000000000000000000000000", + f"/studies?api_key={ApiKey.SOME_USER}", json={ "name": "Test Study", "alias": "test-study", @@ -518,27 +519,28 @@ def _attach_tasks_to_study( def test_attach_task_to_study(py_api: TestClient, expdb_test: Connection) -> None: + expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 7")) response = _attach_tasks_to_study( - study_id=1, - task_ids=[2, 3, 4], - api_key="AD000000000000000000000000000000", + study_id=7, + task_ids=[50], + api_key=ApiKey.OWNER_USER, py_api=py_api, expdb_test=expdb_test, ) - assert response.status_code == HTTPStatus.OK - assert response.json() == {"study_id": 1, "main_entity_type": StudyType.TASK} + assert response.status_code == HTTPStatus.OK, response.content + assert response.json() == {"study_id": 7, "main_entity_type": StudyType.TASK} def test_attach_task_to_study_needs_owner(py_api: TestClient, expdb_test: Connection) -> None: - expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 1")) + expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 7")) response = _attach_tasks_to_study( study_id=1, task_ids=[2, 3, 4], - api_key="00000000000000000000000000000000", + api_key=ApiKey.OWNER_USER, py_api=py_api, expdb_test=expdb_test, ) - assert response.status_code == HTTPStatus.FORBIDDEN + assert response.status_code == HTTPStatus.FORBIDDEN, response.content def test_attach_task_to_study_already_linked_raises( @@ -549,11 +551,11 @@ def test_attach_task_to_study_already_linked_raises( response = _attach_tasks_to_study( study_id=1, task_ids=[1, 3, 4], - api_key="AD000000000000000000000000000000", + api_key=ApiKey.ADMIN, py_api=py_api, expdb_test=expdb_test, ) - assert response.status_code == HTTPStatus.CONFLICT + assert response.status_code == HTTPStatus.CONFLICT, response.content assert response.json() == {"detail": "Task 1 is already attached to study 1."} @@ -565,7 +567,7 @@ def test_attach_task_to_study_but_task_not_exist_raises( response = _attach_tasks_to_study( study_id=1, task_ids=[80123, 78914], - api_key="AD000000000000000000000000000000", + api_key=ApiKey.ADMIN, py_api=py_api, expdb_test=expdb_test, ) From 89d623a2728146f4479ba448d50ba1a3ccd0c11c Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 10 Feb 2026 17:38:23 +0200 Subject: [PATCH 06/13] Ignore ontology of features for now as it is not in Python API --- tests/routers/openml/migration/datasets_migration_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index 812bde7..8883570 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -225,4 +225,7 @@ def test_datasets_feature_is_identical( else: # The old API formats bool as string in lower-case feature[key] = str(value) if not isinstance(value, bool) else str(value).lower() - assert python_body == original.json()["data_features"]["feature"] + original_features = original.json()["data_features"]["feature"] + for feature in original_features: + feature.pop("ontology", None) + assert python_body == original_features From f87ecf505346fe0342e047622bb967af1629f43f Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 10 Feb 2026 17:52:05 +0200 Subject: [PATCH 07/13] Add back in key for private dataset owner --- tests/routers/openml/migration/datasets_migration_test.py | 2 +- tests/users.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index 8883570..011d8db 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -123,7 +123,7 @@ def test_private_dataset_no_user_no_access( @pytest.mark.parametrize( "api_key", - [ApiKey.OWNER_USER, ApiKey.ADMIN], + [ApiKey.DATASET_130_OWNER, ApiKey.ADMIN], ) def test_private_dataset_owner_access( py_api: TestClient, diff --git a/tests/users.py b/tests/users.py index 62389b9..54f48a0 100644 --- a/tests/users.py +++ b/tests/users.py @@ -12,4 +12,5 @@ class ApiKey(StrEnum): ADMIN = "abc" SOME_USER = "normaluser2" OWNER_USER = "normaluser" + DATASET_130_OWNER = "DA1A0000000000000000000000000000" INVALID = "11111111111111111111111111111111" From d86625332301ffd427b8f7f87545afafb8fb9c13 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 10 Feb 2026 17:56:07 +0200 Subject: [PATCH 08/13] User private dataset owner --- tests/routers/openml/datasets_test.py | 6 +++--- tests/users.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py index b463d3d..4ba5ad8 100644 --- a/tests/routers/openml/datasets_test.py +++ b/tests/routers/openml/datasets_test.py @@ -9,7 +9,7 @@ from routers.openml.datasets import get_dataset from schemas.datasets.openml import DatasetMetadata, DatasetStatus from tests import constants -from tests.users import ADMIN_USER, NO_USER, OWNER_USER, SOME_USER, ApiKey +from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey @pytest.mark.parametrize( @@ -92,7 +92,7 @@ def test_private_dataset_no_access( @pytest.mark.parametrize( - "user", [OWNER_USER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)] + "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)] ) def test_private_dataset_access(user: User, expdb_test: Connection, user_test: Connection) -> None: dataset = get_dataset( @@ -165,7 +165,7 @@ def test_dataset_features_no_access(py_api: TestClient) -> None: @pytest.mark.parametrize( "api_key", - [ApiKey.ADMIN, ApiKey.OWNER_USER], + [ApiKey.ADMIN, ApiKey.DATASET_130_OWNER], ) def test_dataset_features_access_to_private(api_key: ApiKey, py_api: TestClient) -> None: response = py_api.get(f"/datasets/features/130?api_key={api_key}") diff --git a/tests/users.py b/tests/users.py index 54f48a0..c98ffb0 100644 --- a/tests/users.py +++ b/tests/users.py @@ -5,6 +5,7 @@ NO_USER = None SOME_USER = User(user_id=2, _database=None, _groups=[UserGroup.READ_WRITE]) OWNER_USER = User(user_id=3229, _database=None, _groups=[UserGroup.READ_WRITE]) +DATASET_130_OWNER = User(user_id=16, _database=None, _groups=[UserGroup.READ_WRITE]) ADMIN_USER = User(user_id=1159, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE]) From cb1689a7b0684b46548a6aa423e76ba02e2c043e Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 11 Feb 2026 09:07:37 +0200 Subject: [PATCH 09/13] Update constants to match new test database state --- .../openml/datasets_list_datasets_test.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py index e60d8ad..e1ff17b 100644 --- a/tests/routers/openml/datasets_list_datasets_test.py +++ b/tests/routers/openml/datasets_list_datasets_test.py @@ -47,7 +47,7 @@ def test_list_filter_active(status: str, amount: int, py_api: TestClient) -> Non ("api_key", "amount"), [ (ApiKey.ADMIN, constants.NUMBER_OF_DATASETS), - (ApiKey.OWNER_USER, constants.NUMBER_OF_DATASETS), + (ApiKey.DATASET_130_OWNER, constants.NUMBER_OF_DATASETS), (ApiKey.SOME_USER, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS), (None, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS), ], @@ -91,13 +91,15 @@ def test_list_data_name_absent(name: str, py_api: TestClient) -> None: @pytest.mark.parametrize("limit", [None, 5, 10, 200]) -@pytest.mark.parametrize("offset", [None, 0, 5, 129, 130, 200]) +@pytest.mark.parametrize("offset", [None, 0, 5, 129, 140, 200]) def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClient) -> None: + # dataset ids are contiguous until 131, then there are 161, 162, and 163. + extra_datasets = [161, 162, 163] all_ids = [ did - for did in range(1, 1 + constants.NUMBER_OF_DATASETS) + for did in range(1, 1 + constants.NUMBER_OF_DATASETS - len(extra_datasets)) if did not in constants.PRIVATE_DATASET_ID - ] + ] + extra_datasets start = 0 if offset is None else offset end = start + (100 if limit is None else limit) @@ -108,7 +110,7 @@ def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClie filters = {"status": "all", "pagination": offset_body | limit_body} response = py_api.post("/datasets/list", json=filters) - if offset in [130, 200]: + if offset in [140, 200]: _assert_empty_result(response) return @@ -119,7 +121,7 @@ def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClie @pytest.mark.parametrize( ("version", "count"), - [(1, 100), (2, 6), (5, 1)], + [(1, 100), (2, 7), (5, 1)], ) def test_list_data_version(version: int, count: int, py_api: TestClient) -> None: response = py_api.post( @@ -133,16 +135,17 @@ def test_list_data_version(version: int, count: int, py_api: TestClient) -> None def test_list_data_version_no_result(py_api: TestClient) -> None: + version_with_no_datasets = 42 response = py_api.post( f"/datasets/list?api_key={ApiKey.ADMIN}", - json={"status": "all", "data_version": 4}, + json={"status": "all", "data_version": version_with_no_datasets}, ) _assert_empty_result(response) @pytest.mark.parametrize( "key", - [ApiKey.SOME_USER, ApiKey.OWNER_USER, ApiKey.ADMIN], + [ApiKey.SOME_USER, ApiKey.DATASET_130_OWNER, ApiKey.ADMIN], ) @pytest.mark.parametrize( ("user_id", "count"), @@ -211,7 +214,7 @@ def test_list_data_tag_empty(py_api: TestClient) -> None: ("number_classes", "2", 51), ("number_classes", "2..3", 56), ("number_missing_values", "2", 1), - ("number_missing_values", "2..100000", 22), + ("number_missing_values", "2..100000", 23), ], ) def test_list_data_quality(quality: str, range_: str, count: int, py_api: TestClient) -> None: From 10a17d633dde7a9c129a8a4d962c2f1f7bce5ebe Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 11 Feb 2026 09:31:03 +0200 Subject: [PATCH 10/13] Database setup script not needed with new image --- docker-compose.yaml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 4f8cbdf..4383884 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -15,17 +15,6 @@ services: interval: 5s retries: 10 - database-setup: - profiles: ["python", "php", "all"] - image: mysql - container_name: "openml-test-database-setup" - volumes: - - ./docker/database/update.sh:/database-update.sh - command: /bin/sh -c "/database-update.sh" - depends_on: - database: - condition: service_healthy - docs: profiles: ["all"] build: From a31bd5cb1f4097952211ab4288024fdf2c27b0de Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 11 Feb 2026 09:56:42 +0200 Subject: [PATCH 11/13] Allow more lenient api key through configuration --- src/config.toml | 3 +++ src/database/users.py | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/config.toml b/src/config.toml index 0812add..10d7553 100644 --- a/src/config.toml +++ b/src/config.toml @@ -1,6 +1,9 @@ arff_base_url="https://test.openml.org" minio_base_url="https://openml1.win.tue.nl" +[development] +allow_test_api_keys=true + [fastapi] root_path="" diff --git a/src/database/users.py b/src/database/users.py index 6bf40e3..ad823e6 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -5,10 +5,16 @@ from pydantic import StringConstraints from sqlalchemy import Connection, text +from config import load_configuration + # Enforces str is 32 hexadecimal characters, does not check validity. +api_key_pattern = r"^[0-9a-fA-F]{32}$" +if load_configuration()["development"].get("allow_test_api_keys"): + api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$" + APIKey = Annotated[ str, - StringConstraints(pattern=r"^([0-9a-fA-F]{32})|(abc)|(normaluser)|(normaluser2)$"), + StringConstraints(pattern=api_key_pattern), ] From 9fb0bde1047dfce5c6ecf8e7adfe6916a36520b0 Mon Sep 17 00:00:00 2001 From: Pieter Gijsbers Date: Wed, 11 Feb 2026 15:15:01 +0200 Subject: [PATCH 12/13] Apply suggestion from @PGijsbers --- src/database/users.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/database/users.py b/src/database/users.py index ad823e6..8be02df 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -8,6 +8,7 @@ from config import load_configuration # Enforces str is 32 hexadecimal characters, does not check validity. +# If `allow_test_api_keys` is set, the key may also be one of `normaluser`, `normaluser2`, or `abc` (admin). api_key_pattern = r"^[0-9a-fA-F]{32}$" if load_configuration()["development"].get("allow_test_api_keys"): api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$" From 3a18200d4bb52c33e12387be701f39f483097d6d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 11 Feb 2026 15:40:57 +0200 Subject: [PATCH 13/13] Wrap line to adhere to line limit --- src/database/users.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/database/users.py b/src/database/users.py index 8be02df..b439be7 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -8,7 +8,8 @@ from config import load_configuration # Enforces str is 32 hexadecimal characters, does not check validity. -# If `allow_test_api_keys` is set, the key may also be one of `normaluser`, `normaluser2`, or `abc` (admin). +# If `allow_test_api_keys` is set, the key may also be one of `normaluser`, +# `normaluser2`, or `abc` (admin). api_key_pattern = r"^[0-9a-fA-F]{32}$" if load_configuration()["development"].get("allow_test_api_keys"): api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$"