From 509eccf15f1fd1f866b3a4ee2610945a88afe0ab Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 10 Feb 2026 16:37:12 +0200
Subject: [PATCH 01/13] Update to newer database image
---
docker-compose.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 324350f..4f8cbdf 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,7 +1,7 @@
services:
database:
profiles: ["python", "php", "all"]
- image: "openml/test-database:20240105"
+ image: "openml/test-database:v0.1.20260204"
container_name: "openml-test-database"
environment:
MYSQL_ROOT_PASSWORD: ok
From c4a459e14c02eb7baddeccb28e2293cfd9c25c10 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 10 Feb 2026 16:38:25 +0200
Subject: [PATCH 02/13] Update validation to work with new user keys
(temporarily)
---
src/database/users.py | 5 ++++-
tests/users.py | 6 +++---
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/src/database/users.py b/src/database/users.py
index a045f5d..6bf40e3 100644
--- a/src/database/users.py
+++ b/src/database/users.py
@@ -6,7 +6,10 @@
from sqlalchemy import Connection, text
# Enforces str is 32 hexadecimal characters, does not check validity.
-APIKey = Annotated[str, StringConstraints(pattern=r"^[0-9a-fA-F]{32}$")]
+APIKey = Annotated[
+ str,
+ StringConstraints(pattern=r"^([0-9a-fA-F]{32})|(abc)|(normaluser)|(normaluser2)$"),
+]
class UserGroup(IntEnum):
diff --git a/tests/users.py b/tests/users.py
index 23bc325..47f34a9 100644
--- a/tests/users.py
+++ b/tests/users.py
@@ -9,7 +9,7 @@
class ApiKey(StrEnum):
- ADMIN = "AD000000000000000000000000000000"
- SOME_USER = "00000000000000000000000000000000"
- OWNER_USER = "DA1A0000000000000000000000000000"
+ ADMIN = "abc"
+ SOME_USER = "normaluser"
+ OWNER_USER = "normaluser2"
INVALID = "11111111111111111111111111111111"
From d327685b0918493d755871b2c2b885218290f10f Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 10 Feb 2026 16:51:51 +0200
Subject: [PATCH 03/13] Update constant with new unprocessed datasets
---
tests/constants.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/constants.py b/tests/constants.py
index 6881f88..3c847f7 100644
--- a/tests/constants.py
+++ b/tests/constants.py
@@ -1,7 +1,7 @@
PRIVATE_DATASET_ID = {130}
-IN_PREPARATION_ID = {33}
+IN_PREPARATION_ID = {33, 161, 162, 163}
DEACTIVATED_DATASETS = {131}
-DATASETS = set(range(1, 132))
+DATASETS = set(range(1, 132)) | {161, 162, 163}
NUMBER_OF_DATASETS = len(DATASETS)
NUMBER_OF_DEACTIVATED_DATASETS = len(DEACTIVATED_DATASETS)
From c7a220b9361e85ff8461edbff6461abf7475349e Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 10 Feb 2026 17:09:20 +0200
Subject: [PATCH 04/13] Update test users with their new IDs
---
tests/routers/openml/users_test.py | 2 +-
tests/users.py | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py
index 7ce9768..45b330a 100644
--- a/tests/routers/openml/users_test.py
+++ b/tests/routers/openml/users_test.py
@@ -18,7 +18,7 @@ def test_fetch_user(api_key: str, user: User, user_test: Connection) -> None:
db_user = fetch_user(api_key, user_data=user_test)
assert db_user is not None
assert user.user_id == db_user.user_id
- assert user.groups == db_user.groups
+ assert set(user.groups) == set(db_user.groups)
def test_fetch_user_invalid_key_returns_none(user_test: Connection) -> None:
diff --git a/tests/users.py b/tests/users.py
index 47f34a9..62389b9 100644
--- a/tests/users.py
+++ b/tests/users.py
@@ -4,12 +4,12 @@
NO_USER = None
SOME_USER = User(user_id=2, _database=None, _groups=[UserGroup.READ_WRITE])
-OWNER_USER = User(user_id=16, _database=None, _groups=[UserGroup.READ_WRITE])
-ADMIN_USER = User(user_id=1, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE])
+OWNER_USER = User(user_id=3229, _database=None, _groups=[UserGroup.READ_WRITE])
+ADMIN_USER = User(user_id=1159, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE])
class ApiKey(StrEnum):
ADMIN = "abc"
- SOME_USER = "normaluser"
- OWNER_USER = "normaluser2"
+ SOME_USER = "normaluser2"
+ OWNER_USER = "normaluser"
INVALID = "11111111111111111111111111111111"
From 17b1794648b999b082518b9128516bdfeb5ea392 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 10 Feb 2026 17:20:48 +0200
Subject: [PATCH 05/13] Update for new ownership of studies
---
tests/routers/openml/study_test.py | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py
index f32b6b7..a9a8ed4 100644
--- a/tests/routers/openml/study_test.py
+++ b/tests/routers/openml/study_test.py
@@ -6,6 +6,7 @@
from starlette.testclient import TestClient
from schemas.study import StudyType
+from tests.users import ApiKey
def test_get_task_study_by_id(py_api: TestClient) -> None:
@@ -458,7 +459,7 @@ def test_get_task_study_by_alias(py_api: TestClient) -> None:
def test_create_task_study(py_api: TestClient) -> None:
response = py_api.post(
- "/studies?api_key=00000000000000000000000000000000",
+ f"/studies?api_key={ApiKey.SOME_USER}",
json={
"name": "Test Study",
"alias": "test-study",
@@ -518,27 +519,28 @@ def _attach_tasks_to_study(
def test_attach_task_to_study(py_api: TestClient, expdb_test: Connection) -> None:
+ expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 7"))
response = _attach_tasks_to_study(
- study_id=1,
- task_ids=[2, 3, 4],
- api_key="AD000000000000000000000000000000",
+ study_id=7,
+ task_ids=[50],
+ api_key=ApiKey.OWNER_USER,
py_api=py_api,
expdb_test=expdb_test,
)
- assert response.status_code == HTTPStatus.OK
- assert response.json() == {"study_id": 1, "main_entity_type": StudyType.TASK}
+ assert response.status_code == HTTPStatus.OK, response.content
+ assert response.json() == {"study_id": 7, "main_entity_type": StudyType.TASK}
def test_attach_task_to_study_needs_owner(py_api: TestClient, expdb_test: Connection) -> None:
- expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 1"))
+ expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 7"))
response = _attach_tasks_to_study(
study_id=1,
task_ids=[2, 3, 4],
- api_key="00000000000000000000000000000000",
+ api_key=ApiKey.OWNER_USER,
py_api=py_api,
expdb_test=expdb_test,
)
- assert response.status_code == HTTPStatus.FORBIDDEN
+ assert response.status_code == HTTPStatus.FORBIDDEN, response.content
def test_attach_task_to_study_already_linked_raises(
@@ -549,11 +551,11 @@ def test_attach_task_to_study_already_linked_raises(
response = _attach_tasks_to_study(
study_id=1,
task_ids=[1, 3, 4],
- api_key="AD000000000000000000000000000000",
+ api_key=ApiKey.ADMIN,
py_api=py_api,
expdb_test=expdb_test,
)
- assert response.status_code == HTTPStatus.CONFLICT
+ assert response.status_code == HTTPStatus.CONFLICT, response.content
assert response.json() == {"detail": "Task 1 is already attached to study 1."}
@@ -565,7 +567,7 @@ def test_attach_task_to_study_but_task_not_exist_raises(
response = _attach_tasks_to_study(
study_id=1,
task_ids=[80123, 78914],
- api_key="AD000000000000000000000000000000",
+ api_key=ApiKey.ADMIN,
py_api=py_api,
expdb_test=expdb_test,
)
From 89d623a2728146f4479ba448d50ba1a3ccd0c11c Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 10 Feb 2026 17:38:23 +0200
Subject: [PATCH 06/13] Ignore ontology of features for now as it is not in
Python API
---
tests/routers/openml/migration/datasets_migration_test.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
index 812bde7..8883570 100644
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ b/tests/routers/openml/migration/datasets_migration_test.py
@@ -225,4 +225,7 @@ def test_datasets_feature_is_identical(
else:
# The old API formats bool as string in lower-case
feature[key] = str(value) if not isinstance(value, bool) else str(value).lower()
- assert python_body == original.json()["data_features"]["feature"]
+ original_features = original.json()["data_features"]["feature"]
+ for feature in original_features:
+ feature.pop("ontology", None)
+ assert python_body == original_features
From f87ecf505346fe0342e047622bb967af1629f43f Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 10 Feb 2026 17:52:05 +0200
Subject: [PATCH 07/13] Add back in key for private dataset owner
---
tests/routers/openml/migration/datasets_migration_test.py | 2 +-
tests/users.py | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
index 8883570..011d8db 100644
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ b/tests/routers/openml/migration/datasets_migration_test.py
@@ -123,7 +123,7 @@ def test_private_dataset_no_user_no_access(
@pytest.mark.parametrize(
"api_key",
- [ApiKey.OWNER_USER, ApiKey.ADMIN],
+ [ApiKey.DATASET_130_OWNER, ApiKey.ADMIN],
)
def test_private_dataset_owner_access(
py_api: TestClient,
diff --git a/tests/users.py b/tests/users.py
index 62389b9..54f48a0 100644
--- a/tests/users.py
+++ b/tests/users.py
@@ -12,4 +12,5 @@ class ApiKey(StrEnum):
ADMIN = "abc"
SOME_USER = "normaluser2"
OWNER_USER = "normaluser"
+ DATASET_130_OWNER = "DA1A0000000000000000000000000000"
INVALID = "11111111111111111111111111111111"
From d86625332301ffd427b8f7f87545afafb8fb9c13 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 10 Feb 2026 17:56:07 +0200
Subject: [PATCH 08/13] User private dataset owner
---
tests/routers/openml/datasets_test.py | 6 +++---
tests/users.py | 1 +
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index b463d3d..4ba5ad8 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -9,7 +9,7 @@
from routers.openml.datasets import get_dataset
from schemas.datasets.openml import DatasetMetadata, DatasetStatus
from tests import constants
-from tests.users import ADMIN_USER, NO_USER, OWNER_USER, SOME_USER, ApiKey
+from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey
@pytest.mark.parametrize(
@@ -92,7 +92,7 @@ def test_private_dataset_no_access(
@pytest.mark.parametrize(
- "user", [OWNER_USER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
+ "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
)
def test_private_dataset_access(user: User, expdb_test: Connection, user_test: Connection) -> None:
dataset = get_dataset(
@@ -165,7 +165,7 @@ def test_dataset_features_no_access(py_api: TestClient) -> None:
@pytest.mark.parametrize(
"api_key",
- [ApiKey.ADMIN, ApiKey.OWNER_USER],
+ [ApiKey.ADMIN, ApiKey.DATASET_130_OWNER],
)
def test_dataset_features_access_to_private(api_key: ApiKey, py_api: TestClient) -> None:
response = py_api.get(f"/datasets/features/130?api_key={api_key}")
diff --git a/tests/users.py b/tests/users.py
index 54f48a0..c98ffb0 100644
--- a/tests/users.py
+++ b/tests/users.py
@@ -5,6 +5,7 @@
NO_USER = None
SOME_USER = User(user_id=2, _database=None, _groups=[UserGroup.READ_WRITE])
OWNER_USER = User(user_id=3229, _database=None, _groups=[UserGroup.READ_WRITE])
+DATASET_130_OWNER = User(user_id=16, _database=None, _groups=[UserGroup.READ_WRITE])
ADMIN_USER = User(user_id=1159, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE])
From cb1689a7b0684b46548a6aa423e76ba02e2c043e Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 11 Feb 2026 09:07:37 +0200
Subject: [PATCH 09/13] Update constants to match new test database state
---
.../openml/datasets_list_datasets_test.py | 21 +++++++++++--------
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index e60d8ad..e1ff17b 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -47,7 +47,7 @@ def test_list_filter_active(status: str, amount: int, py_api: TestClient) -> Non
("api_key", "amount"),
[
(ApiKey.ADMIN, constants.NUMBER_OF_DATASETS),
- (ApiKey.OWNER_USER, constants.NUMBER_OF_DATASETS),
+ (ApiKey.DATASET_130_OWNER, constants.NUMBER_OF_DATASETS),
(ApiKey.SOME_USER, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
(None, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
],
@@ -91,13 +91,15 @@ def test_list_data_name_absent(name: str, py_api: TestClient) -> None:
@pytest.mark.parametrize("limit", [None, 5, 10, 200])
-@pytest.mark.parametrize("offset", [None, 0, 5, 129, 130, 200])
+@pytest.mark.parametrize("offset", [None, 0, 5, 129, 140, 200])
def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClient) -> None:
+ # dataset ids are contiguous until 131, then there are 161, 162, and 163.
+ extra_datasets = [161, 162, 163]
all_ids = [
did
- for did in range(1, 1 + constants.NUMBER_OF_DATASETS)
+ for did in range(1, 1 + constants.NUMBER_OF_DATASETS - len(extra_datasets))
if did not in constants.PRIVATE_DATASET_ID
- ]
+ ] + extra_datasets
start = 0 if offset is None else offset
end = start + (100 if limit is None else limit)
@@ -108,7 +110,7 @@ def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClie
filters = {"status": "all", "pagination": offset_body | limit_body}
response = py_api.post("/datasets/list", json=filters)
- if offset in [130, 200]:
+ if offset in [140, 200]:
_assert_empty_result(response)
return
@@ -119,7 +121,7 @@ def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClie
@pytest.mark.parametrize(
("version", "count"),
- [(1, 100), (2, 6), (5, 1)],
+ [(1, 100), (2, 7), (5, 1)],
)
def test_list_data_version(version: int, count: int, py_api: TestClient) -> None:
response = py_api.post(
@@ -133,16 +135,17 @@ def test_list_data_version(version: int, count: int, py_api: TestClient) -> None
def test_list_data_version_no_result(py_api: TestClient) -> None:
+ version_with_no_datasets = 42
response = py_api.post(
f"/datasets/list?api_key={ApiKey.ADMIN}",
- json={"status": "all", "data_version": 4},
+ json={"status": "all", "data_version": version_with_no_datasets},
)
_assert_empty_result(response)
@pytest.mark.parametrize(
"key",
- [ApiKey.SOME_USER, ApiKey.OWNER_USER, ApiKey.ADMIN],
+ [ApiKey.SOME_USER, ApiKey.DATASET_130_OWNER, ApiKey.ADMIN],
)
@pytest.mark.parametrize(
("user_id", "count"),
@@ -211,7 +214,7 @@ def test_list_data_tag_empty(py_api: TestClient) -> None:
("number_classes", "2", 51),
("number_classes", "2..3", 56),
("number_missing_values", "2", 1),
- ("number_missing_values", "2..100000", 22),
+ ("number_missing_values", "2..100000", 23),
],
)
def test_list_data_quality(quality: str, range_: str, count: int, py_api: TestClient) -> None:
From 10a17d633dde7a9c129a8a4d962c2f1f7bce5ebe Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 11 Feb 2026 09:31:03 +0200
Subject: [PATCH 10/13] Database setup script not needed with new image
---
docker-compose.yaml | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 4f8cbdf..4383884 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -15,17 +15,6 @@ services:
interval: 5s
retries: 10
- database-setup:
- profiles: ["python", "php", "all"]
- image: mysql
- container_name: "openml-test-database-setup"
- volumes:
- - ./docker/database/update.sh:/database-update.sh
- command: /bin/sh -c "/database-update.sh"
- depends_on:
- database:
- condition: service_healthy
-
docs:
profiles: ["all"]
build:
From a31bd5cb1f4097952211ab4288024fdf2c27b0de Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 11 Feb 2026 09:56:42 +0200
Subject: [PATCH 11/13] Allow more lenient api key through configuration
---
src/config.toml | 3 +++
src/database/users.py | 8 +++++++-
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/src/config.toml b/src/config.toml
index 0812add..10d7553 100644
--- a/src/config.toml
+++ b/src/config.toml
@@ -1,6 +1,9 @@
arff_base_url="https://test.openml.org"
minio_base_url="https://openml1.win.tue.nl"
+[development]
+allow_test_api_keys=true
+
[fastapi]
root_path=""
diff --git a/src/database/users.py b/src/database/users.py
index 6bf40e3..ad823e6 100644
--- a/src/database/users.py
+++ b/src/database/users.py
@@ -5,10 +5,16 @@
from pydantic import StringConstraints
from sqlalchemy import Connection, text
+from config import load_configuration
+
# Enforces str is 32 hexadecimal characters, does not check validity.
+api_key_pattern = r"^[0-9a-fA-F]{32}$"
+if load_configuration()["development"].get("allow_test_api_keys"):
+ api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$"
+
APIKey = Annotated[
str,
- StringConstraints(pattern=r"^([0-9a-fA-F]{32})|(abc)|(normaluser)|(normaluser2)$"),
+ StringConstraints(pattern=api_key_pattern),
]
From 9fb0bde1047dfce5c6ecf8e7adfe6916a36520b0 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers
Date: Wed, 11 Feb 2026 15:15:01 +0200
Subject: [PATCH 12/13] Apply suggestion from @PGijsbers
---
src/database/users.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/database/users.py b/src/database/users.py
index ad823e6..8be02df 100644
--- a/src/database/users.py
+++ b/src/database/users.py
@@ -8,6 +8,7 @@
from config import load_configuration
# Enforces str is 32 hexadecimal characters, does not check validity.
+# If `allow_test_api_keys` is set, the key may also be one of `normaluser`, `normaluser2`, or `abc` (admin).
api_key_pattern = r"^[0-9a-fA-F]{32}$"
if load_configuration()["development"].get("allow_test_api_keys"):
api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$"
From 3a18200d4bb52c33e12387be701f39f483097d6d Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 11 Feb 2026 15:40:57 +0200
Subject: [PATCH 13/13] Wrap line to adhere to line limit
---
src/database/users.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/database/users.py b/src/database/users.py
index 8be02df..b439be7 100644
--- a/src/database/users.py
+++ b/src/database/users.py
@@ -8,7 +8,8 @@
from config import load_configuration
# Enforces str is 32 hexadecimal characters, does not check validity.
-# If `allow_test_api_keys` is set, the key may also be one of `normaluser`, `normaluser2`, or `abc` (admin).
+# If `allow_test_api_keys` is set, the key may also be one of `normaluser`,
+# `normaluser2`, or `abc` (admin).
api_key_pattern = r"^[0-9a-fA-F]{32}$"
if load_configuration()["development"].get("allow_test_api_keys"):
api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$"