diff --git a/docker-compose.yaml b/docker-compose.yaml index 324350f..4383884 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,7 +1,7 @@ services: database: profiles: ["python", "php", "all"] - image: "openml/test-database:20240105" + image: "openml/test-database:v0.1.20260204" container_name: "openml-test-database" environment: MYSQL_ROOT_PASSWORD: ok @@ -15,17 +15,6 @@ services: interval: 5s retries: 10 - database-setup: - profiles: ["python", "php", "all"] - image: mysql - container_name: "openml-test-database-setup" - volumes: - - ./docker/database/update.sh:/database-update.sh - command: /bin/sh -c "/database-update.sh" - depends_on: - database: - condition: service_healthy - docs: profiles: ["all"] build: diff --git a/src/config.toml b/src/config.toml index 0812add..10d7553 100644 --- a/src/config.toml +++ b/src/config.toml @@ -1,6 +1,9 @@ arff_base_url="https://test.openml.org" minio_base_url="https://openml1.win.tue.nl" +[development] +allow_test_api_keys=true + [fastapi] root_path="" diff --git a/src/database/users.py b/src/database/users.py index a045f5d..b439be7 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -5,8 +5,19 @@ from pydantic import StringConstraints from sqlalchemy import Connection, text +from config import load_configuration + # Enforces str is 32 hexadecimal characters, does not check validity. -APIKey = Annotated[str, StringConstraints(pattern=r"^[0-9a-fA-F]{32}$")] +# If `allow_test_api_keys` is set, the key may also be one of `normaluser`, +# `normaluser2`, or `abc` (admin). +api_key_pattern = r"^[0-9a-fA-F]{32}$" +if load_configuration()["development"].get("allow_test_api_keys"): + api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$" + +APIKey = Annotated[ + str, + StringConstraints(pattern=api_key_pattern), +] class UserGroup(IntEnum): diff --git a/tests/constants.py b/tests/constants.py index 6881f88..3c847f7 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -1,7 +1,7 @@ PRIVATE_DATASET_ID = {130} -IN_PREPARATION_ID = {33} +IN_PREPARATION_ID = {33, 161, 162, 163} DEACTIVATED_DATASETS = {131} -DATASETS = set(range(1, 132)) +DATASETS = set(range(1, 132)) | {161, 162, 163} NUMBER_OF_DATASETS = len(DATASETS) NUMBER_OF_DEACTIVATED_DATASETS = len(DEACTIVATED_DATASETS) diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py index e60d8ad..e1ff17b 100644 --- a/tests/routers/openml/datasets_list_datasets_test.py +++ b/tests/routers/openml/datasets_list_datasets_test.py @@ -47,7 +47,7 @@ def test_list_filter_active(status: str, amount: int, py_api: TestClient) -> Non ("api_key", "amount"), [ (ApiKey.ADMIN, constants.NUMBER_OF_DATASETS), - (ApiKey.OWNER_USER, constants.NUMBER_OF_DATASETS), + (ApiKey.DATASET_130_OWNER, constants.NUMBER_OF_DATASETS), (ApiKey.SOME_USER, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS), (None, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS), ], @@ -91,13 +91,15 @@ def test_list_data_name_absent(name: str, py_api: TestClient) -> None: @pytest.mark.parametrize("limit", [None, 5, 10, 200]) -@pytest.mark.parametrize("offset", [None, 0, 5, 129, 130, 200]) +@pytest.mark.parametrize("offset", [None, 0, 5, 129, 140, 200]) def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClient) -> None: + # dataset ids are contiguous until 131, then there are 161, 162, and 163. + extra_datasets = [161, 162, 163] all_ids = [ did - for did in range(1, 1 + constants.NUMBER_OF_DATASETS) + for did in range(1, 1 + constants.NUMBER_OF_DATASETS - len(extra_datasets)) if did not in constants.PRIVATE_DATASET_ID - ] + ] + extra_datasets start = 0 if offset is None else offset end = start + (100 if limit is None else limit) @@ -108,7 +110,7 @@ def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClie filters = {"status": "all", "pagination": offset_body | limit_body} response = py_api.post("/datasets/list", json=filters) - if offset in [130, 200]: + if offset in [140, 200]: _assert_empty_result(response) return @@ -119,7 +121,7 @@ def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClie @pytest.mark.parametrize( ("version", "count"), - [(1, 100), (2, 6), (5, 1)], + [(1, 100), (2, 7), (5, 1)], ) def test_list_data_version(version: int, count: int, py_api: TestClient) -> None: response = py_api.post( @@ -133,16 +135,17 @@ def test_list_data_version(version: int, count: int, py_api: TestClient) -> None def test_list_data_version_no_result(py_api: TestClient) -> None: + version_with_no_datasets = 42 response = py_api.post( f"/datasets/list?api_key={ApiKey.ADMIN}", - json={"status": "all", "data_version": 4}, + json={"status": "all", "data_version": version_with_no_datasets}, ) _assert_empty_result(response) @pytest.mark.parametrize( "key", - [ApiKey.SOME_USER, ApiKey.OWNER_USER, ApiKey.ADMIN], + [ApiKey.SOME_USER, ApiKey.DATASET_130_OWNER, ApiKey.ADMIN], ) @pytest.mark.parametrize( ("user_id", "count"), @@ -211,7 +214,7 @@ def test_list_data_tag_empty(py_api: TestClient) -> None: ("number_classes", "2", 51), ("number_classes", "2..3", 56), ("number_missing_values", "2", 1), - ("number_missing_values", "2..100000", 22), + ("number_missing_values", "2..100000", 23), ], ) def test_list_data_quality(quality: str, range_: str, count: int, py_api: TestClient) -> None: diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py index b463d3d..4ba5ad8 100644 --- a/tests/routers/openml/datasets_test.py +++ b/tests/routers/openml/datasets_test.py @@ -9,7 +9,7 @@ from routers.openml.datasets import get_dataset from schemas.datasets.openml import DatasetMetadata, DatasetStatus from tests import constants -from tests.users import ADMIN_USER, NO_USER, OWNER_USER, SOME_USER, ApiKey +from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey @pytest.mark.parametrize( @@ -92,7 +92,7 @@ def test_private_dataset_no_access( @pytest.mark.parametrize( - "user", [OWNER_USER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)] + "user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)] ) def test_private_dataset_access(user: User, expdb_test: Connection, user_test: Connection) -> None: dataset = get_dataset( @@ -165,7 +165,7 @@ def test_dataset_features_no_access(py_api: TestClient) -> None: @pytest.mark.parametrize( "api_key", - [ApiKey.ADMIN, ApiKey.OWNER_USER], + [ApiKey.ADMIN, ApiKey.DATASET_130_OWNER], ) def test_dataset_features_access_to_private(api_key: ApiKey, py_api: TestClient) -> None: response = py_api.get(f"/datasets/features/130?api_key={api_key}") diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index 812bde7..011d8db 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -123,7 +123,7 @@ def test_private_dataset_no_user_no_access( @pytest.mark.parametrize( "api_key", - [ApiKey.OWNER_USER, ApiKey.ADMIN], + [ApiKey.DATASET_130_OWNER, ApiKey.ADMIN], ) def test_private_dataset_owner_access( py_api: TestClient, @@ -225,4 +225,7 @@ def test_datasets_feature_is_identical( else: # The old API formats bool as string in lower-case feature[key] = str(value) if not isinstance(value, bool) else str(value).lower() - assert python_body == original.json()["data_features"]["feature"] + original_features = original.json()["data_features"]["feature"] + for feature in original_features: + feature.pop("ontology", None) + assert python_body == original_features diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py index f32b6b7..a9a8ed4 100644 --- a/tests/routers/openml/study_test.py +++ b/tests/routers/openml/study_test.py @@ -6,6 +6,7 @@ from starlette.testclient import TestClient from schemas.study import StudyType +from tests.users import ApiKey def test_get_task_study_by_id(py_api: TestClient) -> None: @@ -458,7 +459,7 @@ def test_get_task_study_by_alias(py_api: TestClient) -> None: def test_create_task_study(py_api: TestClient) -> None: response = py_api.post( - "/studies?api_key=00000000000000000000000000000000", + f"/studies?api_key={ApiKey.SOME_USER}", json={ "name": "Test Study", "alias": "test-study", @@ -518,27 +519,28 @@ def _attach_tasks_to_study( def test_attach_task_to_study(py_api: TestClient, expdb_test: Connection) -> None: + expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 7")) response = _attach_tasks_to_study( - study_id=1, - task_ids=[2, 3, 4], - api_key="AD000000000000000000000000000000", + study_id=7, + task_ids=[50], + api_key=ApiKey.OWNER_USER, py_api=py_api, expdb_test=expdb_test, ) - assert response.status_code == HTTPStatus.OK - assert response.json() == {"study_id": 1, "main_entity_type": StudyType.TASK} + assert response.status_code == HTTPStatus.OK, response.content + assert response.json() == {"study_id": 7, "main_entity_type": StudyType.TASK} def test_attach_task_to_study_needs_owner(py_api: TestClient, expdb_test: Connection) -> None: - expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 1")) + expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 7")) response = _attach_tasks_to_study( study_id=1, task_ids=[2, 3, 4], - api_key="00000000000000000000000000000000", + api_key=ApiKey.OWNER_USER, py_api=py_api, expdb_test=expdb_test, ) - assert response.status_code == HTTPStatus.FORBIDDEN + assert response.status_code == HTTPStatus.FORBIDDEN, response.content def test_attach_task_to_study_already_linked_raises( @@ -549,11 +551,11 @@ def test_attach_task_to_study_already_linked_raises( response = _attach_tasks_to_study( study_id=1, task_ids=[1, 3, 4], - api_key="AD000000000000000000000000000000", + api_key=ApiKey.ADMIN, py_api=py_api, expdb_test=expdb_test, ) - assert response.status_code == HTTPStatus.CONFLICT + assert response.status_code == HTTPStatus.CONFLICT, response.content assert response.json() == {"detail": "Task 1 is already attached to study 1."} @@ -565,7 +567,7 @@ def test_attach_task_to_study_but_task_not_exist_raises( response = _attach_tasks_to_study( study_id=1, task_ids=[80123, 78914], - api_key="AD000000000000000000000000000000", + api_key=ApiKey.ADMIN, py_api=py_api, expdb_test=expdb_test, ) diff --git a/tests/routers/openml/users_test.py b/tests/routers/openml/users_test.py index 7ce9768..45b330a 100644 --- a/tests/routers/openml/users_test.py +++ b/tests/routers/openml/users_test.py @@ -18,7 +18,7 @@ def test_fetch_user(api_key: str, user: User, user_test: Connection) -> None: db_user = fetch_user(api_key, user_data=user_test) assert db_user is not None assert user.user_id == db_user.user_id - assert user.groups == db_user.groups + assert set(user.groups) == set(db_user.groups) def test_fetch_user_invalid_key_returns_none(user_test: Connection) -> None: diff --git a/tests/users.py b/tests/users.py index 23bc325..c98ffb0 100644 --- a/tests/users.py +++ b/tests/users.py @@ -4,12 +4,14 @@ NO_USER = None SOME_USER = User(user_id=2, _database=None, _groups=[UserGroup.READ_WRITE]) -OWNER_USER = User(user_id=16, _database=None, _groups=[UserGroup.READ_WRITE]) -ADMIN_USER = User(user_id=1, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE]) +OWNER_USER = User(user_id=3229, _database=None, _groups=[UserGroup.READ_WRITE]) +DATASET_130_OWNER = User(user_id=16, _database=None, _groups=[UserGroup.READ_WRITE]) +ADMIN_USER = User(user_id=1159, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE]) class ApiKey(StrEnum): - ADMIN = "AD000000000000000000000000000000" - SOME_USER = "00000000000000000000000000000000" - OWNER_USER = "DA1A0000000000000000000000000000" + ADMIN = "abc" + SOME_USER = "normaluser2" + OWNER_USER = "normaluser" + DATASET_130_OWNER = "DA1A0000000000000000000000000000" INVALID = "11111111111111111111111111111111"