From 855fa05e5d84f2e37a46453a272d54cd08bf805d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 12 Feb 2026 10:36:31 +0200 Subject: [PATCH 1/9] Allow more lenient api key through configuration --- src/database/users.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/database/users.py b/src/database/users.py index b439be7..220c34a 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -7,7 +7,6 @@ from config import load_configuration -# Enforces str is 32 hexadecimal characters, does not check validity. # If `allow_test_api_keys` is set, the key may also be one of `normaluser`, # `normaluser2`, or `abc` (admin). api_key_pattern = r"^[0-9a-fA-F]{32}$" From 5c551c7e4a687c98fbea30798bf620e4e5612976 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 11 Feb 2026 13:42:44 +0200 Subject: [PATCH 2/9] Added RFC with some failing linting/type checks --- src/core/errors.py | 215 ++++++++++++++++++ src/core/formatting.py | 6 - src/main.py | 3 + src/routers/mldcat_ap/dataset.py | 10 +- src/routers/openml/datasets.py | 133 ++++++----- src/routers/openml/flows.py | 12 +- src/routers/openml/qualities.py | 12 +- src/routers/openml/study.py | 52 +++-- src/routers/openml/tasks.py | 14 +- src/routers/openml/tasktype.py | 13 +- tests/routers/openml/dataset_tag_test.py | 24 +- .../openml/datasets_list_datasets_test.py | 26 ++- tests/routers/openml/datasets_test.py | 24 +- tests/routers/openml/flows_test.py | 11 +- .../migration/datasets_migration_test.py | 42 +++- .../openml/migration/flows_migration_test.py | 7 +- tests/routers/openml/qualities_test.py | 11 +- tests/routers/openml/study_test.py | 11 +- tests/routers/openml/task_type_test.py | 10 +- 19 files changed, 497 insertions(+), 139 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index 840cd75..a77e3e8 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -1,7 +1,222 @@ +"""RFC 9457 Problem Details for HTTP APIs. + +This module provides RFC 9457 compliant error handling for the OpenML REST API. +See: https://www.rfc-editor.org/rfc/rfc9457.html +""" + from enum import IntEnum +from http import HTTPStatus +from typing import NoReturn + +from fastapi import Request +from fastapi.responses import JSONResponse +from pydantic import BaseModel, ConfigDict, Field + +# JSON-serializable extension value type for RFC 9457 problem details +type ExtensionValue = str | int | float | bool | None | list[str] | list[int] class DatasetError(IntEnum): NOT_FOUND = 111 NO_ACCESS = 112 NO_DATA_FILE = 113 + + +class ProblemDetail(BaseModel): + """RFC 9457 Problem Details model. + + All fields are optional per the specification, but `type` defaults to "about:blank" + when not provided. The `status` field is advisory and should match the HTTP status code. + """ + + model_config = ConfigDict(populate_by_name=True) + + type_: str = Field( + default="about:blank", + alias="type", + serialization_alias="type", + description="A URI reference identifying the problem type. Defaults to 'about:blank'.", + ) + title: str | None = Field( + default=None, + description="A short, human-readable summary of the problem type.", + ) + status: int | None = Field( + default=None, + description="The HTTP status code. Advisory only, should match the actual status.", + ) + detail: str | None = Field( + default=None, + description="A human-readable explanation specific to this occurrence of the problem.", + ) + instance: str | None = Field( + default=None, + description="A URI reference identifying this specific occurrence of the problem.", + ) + + +class ProblemDetailError(Exception): + """Exception that produces RFC 9457 compliant error responses. + + Usage: + raise ProblemDetailException( + status_code=HTTPStatus.NOT_FOUND, + detail="Dataset 123 was not found.", + title="Dataset Not Found", + type_="https://openml.org/problems/dataset-not-found", + code="111", # Extension field for legacy error codes + ) + """ + + def __init__( + self, + status_code: HTTPStatus | int, + detail: str | None = None, + title: str | None = None, + type_: str = "about:blank", + instance: str | None = None, + **extensions: ExtensionValue, + ) -> None: + self.status_code = int(status_code) + self.problem = ProblemDetail( + type_=type_, + title=title, + status=self.status_code, + detail=detail, + instance=instance, + ) + self.extensions = extensions + super().__init__(detail or title or "An error occurred") + + +def problem_detail_exception_handler( + request: Request, # noqa: ARG001 + exc: ProblemDetailError, +) -> JSONResponse: + """FastAPI exception handler for ProblemDetailException. + + Returns a response with: + - Content-Type: application/problem+json + - RFC 9457 compliant JSON body + """ + content = exc.problem.model_dump(by_alias=True, exclude_none=True) + content.update(exc.extensions) + + return JSONResponse( + status_code=exc.status_code, + content=content, + media_type="application/problem+json", + ) + + +# Problem type URIs for OpenML-specific errors +# These should be documented at the corresponding URLs +class ProblemType: + """Problem type URIs for common OpenML errors.""" + + # Dataset errors + DATASET_NOT_FOUND = "https://openml.org/problems/dataset-not-found" + DATASET_NO_ACCESS = "https://openml.org/problems/dataset-no-access" + DATASET_NO_DATA_FILE = "https://openml.org/problems/dataset-no-data-file" + DATASET_NOT_PROCESSED = "https://openml.org/problems/dataset-not-processed" + DATASET_PROCESSING_ERROR = "https://openml.org/problems/dataset-processing-error" + DATASET_NO_FEATURES = "https://openml.org/problems/dataset-no-features" + DATASET_STATUS_TRANSITION = "https://openml.org/problems/dataset-status-transition" + DATASET_NOT_OWNED = "https://openml.org/problems/dataset-not-owned" + DATASET_ADMIN_ONLY = "https://openml.org/problems/dataset-admin-only" + + # Authentication/Authorization errors + AUTHENTICATION_REQUIRED = "https://openml.org/problems/authentication-required" + AUTHENTICATION_FAILED = "https://openml.org/problems/authentication-failed" + FORBIDDEN = "https://openml.org/problems/forbidden" + + # Tag errors + TAG_ALREADY_EXISTS = "https://openml.org/problems/tag-already-exists" + + # Search/List errors + NO_RESULTS = "https://openml.org/problems/no-results" + + # Study errors + STUDY_NOT_FOUND = "https://openml.org/problems/study-not-found" + STUDY_PRIVATE = "https://openml.org/problems/study-private" + STUDY_LEGACY = "https://openml.org/problems/study-legacy" + STUDY_ALIAS_EXISTS = "https://openml.org/problems/study-alias-exists" + STUDY_INVALID_TYPE = "https://openml.org/problems/study-invalid-type" + STUDY_NOT_EDITABLE = "https://openml.org/problems/study-not-editable" + STUDY_CONFLICT = "https://openml.org/problems/study-conflict" + + # Task errors + TASK_NOT_FOUND = "https://openml.org/problems/task-not-found" + TASK_TYPE_NOT_FOUND = "https://openml.org/problems/task-type-not-found" + + # Flow errors + FLOW_NOT_FOUND = "https://openml.org/problems/flow-not-found" + + # Service errors + SERVICE_NOT_FOUND = "https://openml.org/problems/service-not-found" + + # Internal errors + INTERNAL_ERROR = "https://openml.org/problems/internal-error" + + +# Human-readable titles for problem types +PROBLEM_TITLES: dict[str, str] = { + ProblemType.DATASET_NOT_FOUND: "Dataset Not Found", + ProblemType.DATASET_NO_ACCESS: "Dataset Access Denied", + ProblemType.DATASET_NO_DATA_FILE: "Dataset Data File Missing", + ProblemType.DATASET_NOT_PROCESSED: "Dataset Not Processed", + ProblemType.DATASET_PROCESSING_ERROR: "Dataset Processing Error", + ProblemType.DATASET_NO_FEATURES: "Dataset Features Not Available", + ProblemType.DATASET_STATUS_TRANSITION: "Invalid Status Transition", + ProblemType.DATASET_NOT_OWNED: "Dataset Not Owned", + ProblemType.DATASET_ADMIN_ONLY: "Administrator Only", + ProblemType.AUTHENTICATION_REQUIRED: "Authentication Required", + ProblemType.AUTHENTICATION_FAILED: "Authentication Failed", + ProblemType.FORBIDDEN: "Forbidden", + ProblemType.TAG_ALREADY_EXISTS: "Tag Already Exists", + ProblemType.NO_RESULTS: "No Results Found", + ProblemType.STUDY_NOT_FOUND: "Study Not Found", + ProblemType.STUDY_PRIVATE: "Study Is Private", + ProblemType.STUDY_LEGACY: "Legacy Study Not Supported", + ProblemType.STUDY_ALIAS_EXISTS: "Study Alias Already Exists", + ProblemType.STUDY_INVALID_TYPE: "Invalid Study Type", + ProblemType.STUDY_NOT_EDITABLE: "Study Not Editable", + ProblemType.STUDY_CONFLICT: "Study Conflict", + ProblemType.TASK_NOT_FOUND: "Task Not Found", + ProblemType.TASK_TYPE_NOT_FOUND: "Task Type Not Found", + ProblemType.FLOW_NOT_FOUND: "Flow Not Found", + ProblemType.SERVICE_NOT_FOUND: "Service Not Found", + ProblemType.INTERNAL_ERROR: "Internal Server Error", +} + + +def raise_problem( + status_code: HTTPStatus | int, + type_: str, + detail: str, + *, + instance: str | None = None, + code: int | str | None = None, + **extensions: ExtensionValue, +) -> NoReturn: + """Helper function to raise RFC 9457 compliant errors. + + Args: + status_code: HTTP status code for the response. + type_: Problem type URI identifying the error class. + detail: Human-readable explanation of this specific error occurrence. + instance: Optional URI identifying this specific error occurrence. + code: Optional legacy OpenML error code (for backwards compatibility). + **extensions: Additional extension fields to include in the response. + """ + title = PROBLEM_TITLES.get(type_) + if code is not None: + extensions["code"] = str(code) + raise ProblemDetailError( + status_code=status_code, + detail=detail, + title=title, + type_=type_, + instance=instance, + **extensions, + ) diff --git a/src/core/formatting.py b/src/core/formatting.py index 174261f..f954e81 100644 --- a/src/core/formatting.py +++ b/src/core/formatting.py @@ -3,7 +3,6 @@ from sqlalchemy.engine import Row from config import load_routing_configuration -from core.errors import DatasetError from schemas.datasets.openml import DatasetFileFormat @@ -16,11 +15,6 @@ def _str_to_bool(string: str) -> bool: raise ValueError(msg) -def _format_error(*, code: DatasetError, message: str) -> dict[str, str]: - """Formatter for JSON bodies of OpenML error codes.""" - return {"code": str(code), "message": message} - - def _format_parquet_url(dataset: Row) -> str | None: if dataset.format.lower() != DatasetFileFormat.ARFF: return None diff --git a/src/main.py b/src/main.py index d8e61b3..e3fe6e0 100644 --- a/src/main.py +++ b/src/main.py @@ -4,6 +4,7 @@ from fastapi import FastAPI from config import load_configuration +from core.errors import ProblemDetailError, problem_detail_exception_handler from routers.mldcat_ap.dataset import router as mldcat_ap_router from routers.openml.datasets import router as datasets_router from routers.openml.estimation_procedure import router as estimationprocedure_router @@ -45,6 +46,8 @@ def create_api() -> FastAPI: fastapi_kwargs = load_configuration()["fastapi"] app = FastAPI(**fastapi_kwargs) + app.add_exception_handler(ProblemDetailError, problem_detail_exception_handler) # type: ignore[arg-type] + app.include_router(datasets_router) app.include_router(qualities_router) app.include_router(mldcat_ap_router) diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py index db34e5c..eaa5652 100644 --- a/src/routers/mldcat_ap/dataset.py +++ b/src/routers/mldcat_ap/dataset.py @@ -4,12 +4,14 @@ Specific queries could be written to fetch e.g., a single feature or quality. """ +from http import HTTPStatus from typing import Annotated -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection import config +from core.errors import ProblemType, raise_problem from database.users import User from routers.dependencies import expdb_connection, fetch_user, userdb_connection from routers.openml.datasets import get_dataset, get_dataset_features @@ -121,7 +123,11 @@ def get_mldcat_ap_distribution( ) def get_dataservice(service_id: int) -> JsonLDGraph: if service_id != 1: - raise HTTPException(status_code=404, detail="Service not found.") + raise_problem( + status_code=HTTPStatus.NOT_FOUND, + type_=ProblemType.SERVICE_NOT_FOUND, + detail="Service not found.", + ) return JsonLDGraph( context="https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/context.jsonld", graph=[ diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index dda2511..b2ad65d 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -4,18 +4,17 @@ from http import HTTPStatus from typing import Annotated, Any, Literal, NamedTuple -from fastapi import APIRouter, Body, Depends, HTTPException +from fastapi import APIRouter, Body, Depends from sqlalchemy import Connection, text from sqlalchemy.engine import Row import database.datasets import database.qualities from core.access import _user_has_access -from core.errors import DatasetError +from core.errors import DatasetError, ProblemType, raise_problem from core.formatting import ( _csv_as_list, _format_dataset_url, - _format_error, _format_parquet_url, ) from database.users import User, UserGroup @@ -37,10 +36,20 @@ def tag_dataset( ) -> dict[str, dict[str, Any]]: tags = database.datasets.get_tags_for(data_id, expdb_db) if tag.casefold() in [t.casefold() for t in tags]: - raise create_tag_exists_error(data_id, tag) + raise_problem( + status_code=HTTPStatus.CONFLICT, + type_=ProblemType.TAG_ALREADY_EXISTS, + detail=f"Entity already tagged by this tag. id={data_id}; tag={tag}", + code=473, + ) if user is None: - raise create_authentication_failed_error() + raise_problem( + status_code=HTTPStatus.UNAUTHORIZED, + type_=ProblemType.AUTHENTICATION_FAILED, + detail="Authentication failed.", + code=103, + ) database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db) return { @@ -48,24 +57,6 @@ def tag_dataset( } -def create_authentication_failed_error() -> HTTPException: - return HTTPException( - status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": "103", "message": "Authentication failed"}, - ) - - -def create_tag_exists_error(data_id: int, tag: str) -> HTTPException: - return HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail={ - "code": "473", - "message": "Entity already tagged by this tag.", - "additional_information": f"id={data_id}; tag={tag}", - }, - ) - - class DatasetStatusFilter(StrEnum): ACTIVE = DatasetStatus.ACTIVE DEACTIVATED = DatasetStatus.DEACTIVATED @@ -203,10 +194,12 @@ def quality_clause(quality: str, range_: str | None) -> str: row.did: dict(zip(columns, row, strict=True)) for row in rows } if not datasets: - raise HTTPException( - status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": "372", "message": "No results"}, - ) from None + raise_problem( + status_code=HTTPStatus.NOT_FOUND, + type_=ProblemType.NO_RESULTS, + detail="No datasets match the search criteria.", + code=372, + ) for dataset in datasets.values(): # The old API does not actually provide the checksum but just an empty field @@ -266,15 +259,23 @@ def _get_dataset_raise_otherwise( ) -> Row: """Fetches the dataset from the database if it exists and the user has permissions. - Raises HTTPException if the dataset does not exist or the user can not access it. + Raises ProblemDetailException if the dataset does not exist or the user can not access it. """ if not (dataset := database.datasets.get(dataset_id, expdb)): - error = _format_error(code=DatasetError.NOT_FOUND, message="Unknown dataset") - raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=error) + raise_problem( + status_code=HTTPStatus.NOT_FOUND, + type_=ProblemType.DATASET_NOT_FOUND, + detail="Unknown dataset.", + code=DatasetError.NOT_FOUND, + ) if not _user_has_access(dataset=dataset, user=user): - error = _format_error(code=DatasetError.NO_ACCESS, message="No access granted") - raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail=error) + raise_problem( + status_code=HTTPStatus.FORBIDDEN, + type_=ProblemType.DATASET_NO_ACCESS, + detail="No access granted.", + code=DatasetError.NO_ACCESS, + ) return dataset @@ -297,21 +298,32 @@ def get_dataset_features( if not features: processing_state = database.datasets.get_latest_processing_update(dataset_id, expdb) if processing_state is None: - code, msg = ( - 273, - "Dataset not processed yet. The dataset was not processed yet, features are not yet available. Please wait for a few minutes.", # noqa: E501 + raise_problem( + status_code=HTTPStatus.PRECONDITION_FAILED, + type_=ProblemType.DATASET_NOT_PROCESSED, + detail=( + "Dataset not processed yet. The dataset was not processed yet, " + "features are not yet available. Please wait for a few minutes." + ), + code=273, ) elif processing_state.error: - code, msg = 274, "No features found. Additionally, dataset processed with error" + raise_problem( + status_code=HTTPStatus.PRECONDITION_FAILED, + type_=ProblemType.DATASET_PROCESSING_ERROR, + detail="No features found. Additionally, dataset processed with error.", + code=274, + ) else: - code, msg = ( - 272, - "No features found. The dataset did not contain any features, or we could not extract them.", # noqa: E501 + raise_problem( + status_code=HTTPStatus.PRECONDITION_FAILED, + type_=ProblemType.DATASET_NO_FEATURES, + detail=( + "No features found. " + "The dataset did not contain any features, or we could not extract them." + ), + code=272, ) - raise HTTPException( - status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": code, "message": msg}, - ) return features @@ -325,30 +337,37 @@ def update_dataset_status( expdb: Annotated[Connection, Depends(expdb_connection)], ) -> dict[str, str | int]: if user is None: - raise HTTPException( + raise_problem( status_code=HTTPStatus.UNAUTHORIZED, - detail="Updating dataset status required authorization", + type_=ProblemType.AUTHENTICATION_REQUIRED, + detail="Updating dataset status requires authentication.", ) dataset = _get_dataset_raise_otherwise(dataset_id, user, expdb) can_deactivate = dataset.uploader == user.user_id or UserGroup.ADMIN in user.groups if status == DatasetStatus.DEACTIVATED and not can_deactivate: - raise HTTPException( + raise_problem( status_code=HTTPStatus.FORBIDDEN, - detail={"code": 693, "message": "Dataset is not owned by you"}, + type_=ProblemType.DATASET_NOT_OWNED, + detail="Dataset is not owned by you.", + code=693, ) if status == DatasetStatus.ACTIVE and UserGroup.ADMIN not in user.groups: - raise HTTPException( + raise_problem( status_code=HTTPStatus.FORBIDDEN, - detail={"code": 696, "message": "Only administrators can activate datasets."}, + type_=ProblemType.DATASET_ADMIN_ONLY, + detail="Only administrators can activate datasets.", + code=696, ) current_status = database.datasets.get_status(dataset_id, expdb) if current_status and current_status.status == status: - raise HTTPException( + raise_problem( status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": 694, "message": "Illegal status transition."}, + type_=ProblemType.DATASET_STATUS_TRANSITION, + detail="Illegal status transition.", + code=694, ) # If current status is unknown, it is effectively "in preparation", @@ -362,9 +381,10 @@ def update_dataset_status( elif current_status.status == DatasetStatus.DEACTIVATED: database.datasets.remove_deactivated_status(dataset_id, expdb) else: - raise HTTPException( + raise_problem( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail={"message": f"Unknown status transition: {current_status} -> {status}"}, + type_=ProblemType.INTERNAL_ERROR, + detail=f"Unknown status transition: {current_status} -> {status}", ) return {"dataset_id": dataset_id, "status": status} @@ -384,11 +404,12 @@ def get_dataset( if not ( dataset_file := database.datasets.get_file(file_id=dataset.file_id, connection=user_db) ): - error = _format_error( + raise_problem( + status_code=HTTPStatus.PRECONDITION_FAILED, + type_=ProblemType.DATASET_NO_DATA_FILE, + detail="No data file found.", code=DatasetError.NO_DATA_FILE, - message="No data file found", ) - raise HTTPException(status_code=HTTPStatus.PRECONDITION_FAILED, detail=error) tags = database.datasets.get_tags_for(dataset_id, expdb_db) description = database.datasets.get_description(dataset_id, expdb_db) diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index cb6df5d..afd7e2a 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -1,11 +1,12 @@ from http import HTTPStatus from typing import Annotated, Literal -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection import database.flows from core.conversions import _str_to_num +from core.errors import ProblemType, raise_problem from routers.dependencies import expdb_connection from schemas.flows import Flow, Parameter, Subflow @@ -21,8 +22,9 @@ def flow_exists( """Check if a Flow with the name and version exists, if so, return the flow id.""" flow = database.flows.get_by_name(name=name, external_version=external_version, expdb=expdb) if flow is None: - raise HTTPException( + raise_problem( status_code=HTTPStatus.NOT_FOUND, + type_=ProblemType.FLOW_NOT_FOUND, detail="Flow not found.", ) return {"flow_id": flow.id} @@ -32,7 +34,11 @@ def flow_exists( def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection)] = None) -> Flow: flow = database.flows.get(flow_id, expdb) if not flow: - raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Flow not found") + raise_problem( + status_code=HTTPStatus.NOT_FOUND, + type_=ProblemType.FLOW_NOT_FOUND, + detail="Flow not found.", + ) parameter_rows = database.flows.get_parameters(flow_id, expdb) parameters = [ diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index 54181f8..77bf64b 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -1,13 +1,13 @@ from http import HTTPStatus from typing import Annotated, Literal -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection import database.datasets import database.qualities from core.access import _user_has_access -from core.errors import DatasetError +from core.errors import DatasetError, ProblemType, raise_problem from database.users import User from routers.dependencies import expdb_connection, fetch_user from schemas.datasets.openml import Quality @@ -35,10 +35,12 @@ def get_qualities( ) -> list[Quality]: dataset = database.datasets.get(dataset_id, expdb) if not dataset or not _user_has_access(dataset, user): - raise HTTPException( + raise_problem( status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": DatasetError.NO_DATA_FILE, "message": "Unknown dataset"}, - ) from None + type_=ProblemType.DATASET_NOT_FOUND, + detail="Unknown dataset.", + code=DatasetError.NO_DATA_FILE, + ) return database.qualities.get_for_dataset(dataset_id, expdb) # The PHP API provided (sometime) helpful error messages # if not qualities: diff --git a/src/routers/openml/study.py b/src/routers/openml/study.py index 6fe1dcc..0ff49e8 100644 --- a/src/routers/openml/study.py +++ b/src/routers/openml/study.py @@ -1,11 +1,12 @@ from http import HTTPStatus from typing import Annotated, Literal -from fastapi import APIRouter, Body, Depends, HTTPException +from fastapi import APIRouter, Body, Depends from pydantic import BaseModel from sqlalchemy import Connection, Row import database.studies +from core.errors import ProblemType, raise_problem from core.formatting import _str_to_bool from database.users import User, UserGroup from routers.dependencies import expdb_connection, fetch_user @@ -22,19 +23,29 @@ def _get_study_raise_otherwise(id_or_alias: int | str, user: User | None, expdb: study = database.studies.get_by_alias(id_or_alias, expdb) if study is None: - raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Study not found.") + raise_problem( + status_code=HTTPStatus.NOT_FOUND, + type_=ProblemType.STUDY_NOT_FOUND, + detail="Study not found.", + ) if study.visibility == Visibility.PRIVATE: if user is None: - raise HTTPException( + raise_problem( status_code=HTTPStatus.UNAUTHORIZED, + type_=ProblemType.AUTHENTICATION_REQUIRED, detail="Must authenticate for private study.", ) if study.creator != user.user_id and UserGroup.ADMIN not in user.groups: - raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail="Study is private.") + raise_problem( + status_code=HTTPStatus.FORBIDDEN, + type_=ProblemType.STUDY_PRIVATE, + detail="Study is private.", + ) if _str_to_bool(study.legacy): - raise HTTPException( + raise_problem( status_code=HTTPStatus.GONE, - detail="Legacy studies are no longer supported", + type_=ProblemType.STUDY_LEGACY, + detail="Legacy studies are no longer supported.", ) return study @@ -52,17 +63,23 @@ def attach_to_study( expdb: Annotated[Connection, Depends(expdb_connection)] = None, ) -> AttachDetachResponse: if user is None: - raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="User not found.") + raise_problem( + status_code=HTTPStatus.UNAUTHORIZED, + type_=ProblemType.AUTHENTICATION_REQUIRED, + detail="Authentication required.", + ) study = _get_study_raise_otherwise(study_id, user, expdb) # PHP lets *anyone* edit *any* study. We're not going to do that. if study.creator != user.user_id and UserGroup.ADMIN not in user.groups: - raise HTTPException( + raise_problem( status_code=HTTPStatus.FORBIDDEN, + type_=ProblemType.STUDY_NOT_EDITABLE, detail="Study can only be edited by its creator.", ) if study.status != StudyStatus.IN_PREPARATION: - raise HTTPException( + raise_problem( status_code=HTTPStatus.FORBIDDEN, + type_=ProblemType.STUDY_NOT_EDITABLE, detail="Study can only be edited while in preparation.", ) @@ -79,10 +96,11 @@ def attach_to_study( else: database.studies.attach_runs(run_ids=entity_ids, **attach_kwargs) except ValueError as e: - raise HTTPException( + raise_problem( status_code=HTTPStatus.CONFLICT, + type_=ProblemType.STUDY_CONFLICT, detail=str(e), - ) from None + ) return AttachDetachResponse(study_id=study_id, main_entity_type=study.type_) @@ -93,23 +111,27 @@ def create_study( expdb: Annotated[Connection, Depends(expdb_connection)] = None, ) -> dict[Literal["study_id"], int]: if user is None: - raise HTTPException( + raise_problem( status_code=HTTPStatus.UNAUTHORIZED, + type_=ProblemType.AUTHENTICATION_REQUIRED, detail="Creating a study requires authentication.", ) if study.main_entity_type == StudyType.RUN and study.tasks: - raise HTTPException( + raise_problem( status_code=HTTPStatus.BAD_REQUEST, + type_=ProblemType.STUDY_INVALID_TYPE, detail="Cannot create a run study with tasks.", ) if study.main_entity_type == StudyType.TASK and study.runs: - raise HTTPException( + raise_problem( status_code=HTTPStatus.BAD_REQUEST, + type_=ProblemType.STUDY_INVALID_TYPE, detail="Cannot create a task study with runs.", ) if study.alias and database.studies.get_by_alias(study.alias, expdb): - raise HTTPException( + raise_problem( status_code=HTTPStatus.CONFLICT, + type_=ProblemType.STUDY_ALIAS_EXISTS, detail="Study alias already exists.", ) study_id = database.studies.create(study, user, expdb) diff --git a/src/routers/openml/tasks.py b/src/routers/openml/tasks.py index 8397f1d..8007020 100644 --- a/src/routers/openml/tasks.py +++ b/src/routers/openml/tasks.py @@ -4,12 +4,13 @@ from typing import Annotated, cast import xmltodict -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection, RowMapping, text import config import database.datasets import database.tasks +from core.errors import ProblemType, raise_problem from routers.dependencies import expdb_connection from schemas.datasets.openml import Task @@ -155,11 +156,16 @@ def get_task( expdb: Annotated[Connection, Depends(expdb_connection)] = None, ) -> Task: if not (task := database.tasks.get(task_id, expdb)): - raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Task not found") + raise_problem( + status_code=HTTPStatus.NOT_FOUND, + type_=ProblemType.TASK_NOT_FOUND, + detail="Task not found.", + ) if not (task_type := database.tasks.get_task_type(task.ttid, expdb)): - raise HTTPException( + raise_problem( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Task type not found", + type_=ProblemType.INTERNAL_ERROR, + detail="Task type not found.", ) task_inputs = { diff --git a/src/routers/openml/tasktype.py b/src/routers/openml/tasktype.py index 5213f17..9916499 100644 --- a/src/routers/openml/tasktype.py +++ b/src/routers/openml/tasktype.py @@ -2,9 +2,10 @@ from http import HTTPStatus from typing import Annotated, Any, Literal, cast -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection, Row +from core.errors import ProblemType, raise_problem from database.tasks import get_input_for_task_type, get_task_types from database.tasks import get_task_type as db_get_task_type from routers.dependencies import expdb_connection @@ -45,10 +46,12 @@ def get_task_type( ) -> dict[Literal["task_type"], dict[str, str | None | list[str] | list[dict[str, str]]]]: task_type_record = db_get_task_type(task_type_id, expdb) if task_type_record is None: - raise HTTPException( - status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": "241", "message": "Unknown task type."}, - ) from None + raise_problem( + status_code=HTTPStatus.NOT_FOUND, + type_=ProblemType.TASK_TYPE_NOT_FOUND, + detail="Unknown task type.", + code=241, + ) task_type = _normalize_task_type(task_type_record) # Some names are quoted, or have typos in their comma-separation (e.g. 'A ,B') diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py index 5449862..7147eca 100644 --- a/tests/routers/openml/dataset_tag_test.py +++ b/tests/routers/openml/dataset_tag_test.py @@ -4,6 +4,7 @@ from sqlalchemy import Connection from starlette.testclient import TestClient +from core.errors import ProblemType from database.datasets import get_tags_for from tests import constants from tests.users import ApiKey @@ -20,8 +21,11 @@ def test_dataset_tag_rejects_unauthorized(key: ApiKey, py_api: TestClient) -> No f"/datasets/tag{apikey}", json={"data_id": next(iter(constants.PRIVATE_DATASET_ID)), "tag": "test"}, ) - assert response.status_code == HTTPStatus.PRECONDITION_FAILED - assert response.json()["detail"] == {"code": "103", "message": "Authentication failed"} + assert response.status_code == HTTPStatus.UNAUTHORIZED + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.AUTHENTICATION_FAILED + assert error["code"] == "103" @pytest.mark.parametrize( @@ -58,15 +62,13 @@ def test_dataset_tag_fails_if_tag_exists(py_api: TestClient) -> None: f"/datasets/tag?api_key={ApiKey.ADMIN}", json={"data_id": dataset_id, "tag": tag}, ) - assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - expected = { - "detail": { - "code": "473", - "message": "Entity already tagged by this tag.", - "additional_information": f"id={dataset_id}; tag={tag}", - }, - } - assert expected == response.json() + assert response.status_code == HTTPStatus.CONFLICT + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.TAG_ALREADY_EXISTS + assert error["code"] == "473" + assert f"id={dataset_id}" in error["detail"] + assert f"tag={tag}" in error["detail"] @pytest.mark.parametrize( diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py index e1ff17b..78f2d42 100644 --- a/tests/routers/openml/datasets_list_datasets_test.py +++ b/tests/routers/openml/datasets_list_datasets_test.py @@ -8,6 +8,7 @@ from hypothesis import strategies as st from starlette.testclient import TestClient +from core.errors import ProblemType from tests import constants from tests.users import ApiKey @@ -15,8 +16,11 @@ def _assert_empty_result( response: httpx.Response, ) -> None: - assert response.status_code == HTTPStatus.PRECONDITION_FAILED - assert response.json()["detail"] == {"code": "372", "message": "No results"} + assert response.status_code == HTTPStatus.NOT_FOUND + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.NO_RESULTS + assert error["code"] == "372" def test_list(py_api: TestClient) -> None: @@ -283,9 +287,21 @@ def test_list_data_identical( uri += api_key_query original = php_api.get(uri) - assert original.status_code == response.status_code, response.json() - if original.status_code == HTTPStatus.PRECONDITION_FAILED: - assert original.json()["error"] == response.json()["detail"] + # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results) + # and the error response format, so we can't compare error responses directly. + php_is_error = original.status_code == HTTPStatus.PRECONDITION_FAILED + py_is_error = response.status_code == HTTPStatus.NOT_FOUND + + if php_is_error or py_is_error: + # Both should be errors in the same cases + assert php_is_error == py_is_error, ( + f"PHP status={original.status_code}, Python status={response.status_code}" + ) + # Verify Python API returns RFC 9457 format + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.NO_RESULTS + assert error["code"] == "372" return None new_json = response.json() # Qualities in new response are typed diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py index 4ba5ad8..a1395f1 100644 --- a/tests/routers/openml/datasets_test.py +++ b/tests/routers/openml/datasets_test.py @@ -1,10 +1,10 @@ from http import HTTPStatus import pytest -from fastapi import HTTPException from sqlalchemy import Connection from starlette.testclient import TestClient +from core.errors import ProblemDetailError, ProblemType from database.users import User from routers.openml.datasets import get_dataset from schemas.datasets.openml import DatasetMetadata, DatasetStatus @@ -28,7 +28,13 @@ def test_error_unknown_dataset( response = py_api.get(f"/datasets/{dataset_id}") assert response.status_code == response_code - assert response.json()["detail"] == {"code": "111", "message": "Unknown dataset"} + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.DATASET_NOT_FOUND + assert error["title"] == "Dataset Not Found" + assert error["status"] == HTTPStatus.NOT_FOUND + assert error["detail"] == "Unknown dataset." + assert error["code"] == "111" def test_get_dataset(py_api: TestClient) -> None: @@ -80,7 +86,7 @@ def test_private_dataset_no_access( user: User | None, expdb_test: Connection, ) -> None: - with pytest.raises(HTTPException) as e: + with pytest.raises(ProblemDetailError) as e: get_dataset( dataset_id=130, user=user, @@ -88,7 +94,8 @@ def test_private_dataset_no_access( expdb_db=expdb_test, ) assert e.value.status_code == HTTPStatus.FORBIDDEN - assert e.value.detail == {"code": "112", "message": "No access granted"} # type: ignore[comparison-overlap] + assert e.value.problem.type_ == ProblemType.DATASET_NO_ACCESS + assert e.value.extensions.get("code") == "112" @pytest.mark.parametrize( @@ -177,10 +184,11 @@ def test_dataset_features_with_processing_error(py_api: TestClient) -> None: # In that case, no feature information will ever be available. response = py_api.get("/datasets/features/55") assert response.status_code == HTTPStatus.PRECONDITION_FAILED - assert response.json()["detail"] == { - "code": 274, - "message": "No features found. Additionally, dataset processed with error", - } + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.DATASET_PROCESSING_ERROR + assert error["code"] == "274" + assert "No features found" in error["detail"] def test_dataset_features_dataset_does_not_exist(py_api: TestClient) -> None: diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index d5188d0..2627053 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -2,11 +2,11 @@ import deepdiff.diff import pytest -from fastapi import HTTPException from pytest_mock import MockerFixture from sqlalchemy import Connection from starlette.testclient import TestClient +from core.errors import ProblemDetailError, ProblemType from routers.openml.flows import flow_exists from tests.conftest import Flow @@ -53,10 +53,10 @@ def test_flow_exists_processes_found( def test_flow_exists_handles_flow_not_found(mocker: MockerFixture, expdb_test: Connection) -> None: mocker.patch("database.flows.get_by_name", return_value=None) - with pytest.raises(HTTPException) as error: + with pytest.raises(ProblemDetailError) as error: flow_exists("foo", "bar", expdb_test) assert error.value.status_code == HTTPStatus.NOT_FOUND - assert error.value.detail == "Flow not found." + assert error.value.problem.type_ == ProblemType.FLOW_NOT_FOUND def test_flow_exists(flow: Flow, py_api: TestClient) -> None: @@ -68,7 +68,10 @@ def test_flow_exists(flow: Flow, py_api: TestClient) -> None: def test_flow_exists_not_exists(py_api: TestClient) -> None: response = py_api.get("/flows/exists/foo/bar") assert response.status_code == HTTPStatus.NOT_FOUND - assert response.json()["detail"] == "Flow not found." + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.FLOW_NOT_FOUND + assert error["detail"] == "Flow not found." def test_get_flow_no_subflow(py_api: TestClient) -> None: diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index 011d8db..1c514f6 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -7,6 +7,7 @@ import tests.constants from core.conversions import nested_remove_single_element_list +from core.errors import ProblemType from tests.users import ApiKey @@ -28,7 +29,10 @@ def test_dataset_response_is_identical( # noqa: C901, PLR0912 assert original.status_code == new.status_code if new.status_code != HTTPStatus.OK: - assert original.json()["error"] == new.json()["detail"] + # RFC 9457: Python API now returns problem+json format + assert new.headers["content-type"] == "application/problem+json" + # Both APIs should return error responses in the same cases + assert "error" in original.json() return try: @@ -102,7 +106,11 @@ def test_error_unknown_dataset( # The new API has "404 Not Found" instead of "412 PRECONDITION_FAILED" assert response.status_code == HTTPStatus.NOT_FOUND - assert response.json()["detail"] == {"code": "111", "message": "Unknown dataset"} + # RFC 9457: Python API now returns problem+json format + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.DATASET_NOT_FOUND + assert error["code"] == "111" @pytest.mark.parametrize( @@ -118,7 +126,10 @@ def test_private_dataset_no_user_no_access( # New response is 403: Forbidden instead of 412: PRECONDITION FAILED assert response.status_code == HTTPStatus.FORBIDDEN - assert response.json()["detail"] == {"code": "112", "message": "No access granted"} + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.DATASET_NO_ACCESS + assert error["code"] == "112" @pytest.mark.parametrize( @@ -184,9 +195,21 @@ def test_dataset_tag_response_is_identical( json={"data_id": dataset_id, "tag": tag}, ) + # RFC 9457: Tag conflict now returns 409 instead of 500 + if original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged: + assert new.status_code == HTTPStatus.CONFLICT + assert new.headers["content-type"] == "application/problem+json" + error = new.json() + assert error["type"] == ProblemType.TAG_ALREADY_EXISTS + assert error["code"] == "473" + return + assert original.status_code == new.status_code, original.json() if new.status_code != HTTPStatus.OK: - assert original.json()["error"] == new.json()["detail"] + # RFC 9457: Python API now returns problem+json format + assert new.headers["content-type"] == "application/problem+json" + # Both APIs should error in the same cases + assert "error" in original.json() return original = original.json() @@ -209,9 +232,14 @@ def test_datasets_feature_is_identical( assert response.status_code == original.status_code if response.status_code != HTTPStatus.OK: - error = response.json()["detail"] - error["code"] = str(error["code"]) - assert error == original.json()["error"] + # RFC 9457: Python API now returns problem+json format + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + # Verify Python API returns properly typed RFC 9457 response + assert "type" in error + assert "status" in error + # Both APIs should error in the same cases + assert "error" in original.json() return python_body = response.json() diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py index 674bc43..e3a559d 100644 --- a/tests/routers/openml/migration/flows_migration_test.py +++ b/tests/routers/openml/migration/flows_migration_test.py @@ -10,6 +10,7 @@ nested_remove_single_element_list, nested_str_to_num, ) +from core.errors import ProblemType from tests.conftest import Flow @@ -27,7 +28,11 @@ def test_flow_exists_not( expect_php = {"flow_exists": {"exists": "false", "id": str(-1)}} assert php_response.json() == expect_php - assert py_response.json() == {"detail": "Flow not found."} + # RFC 9457: Python API now returns problem+json format + assert py_response.headers["content-type"] == "application/problem+json" + error = py_response.json() + assert error["type"] == ProblemType.FLOW_NOT_FOUND + assert error["detail"] == "Flow not found." @pytest.mark.mut diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py index eed569e..814607c 100644 --- a/tests/routers/openml/qualities_test.py +++ b/tests/routers/openml/qualities_test.py @@ -6,6 +6,8 @@ from sqlalchemy import Connection, text from starlette.testclient import TestClient +from core.errors import ProblemType + def _remove_quality_from_database(quality_name: str, expdb_test: Connection) -> None: expdb_test.execute( @@ -313,6 +315,9 @@ def test_get_quality_identical_error( php_response = php_api.get(f"/data/qualities/{data_id}") python_response = py_api.get(f"/datasets/qualities/{data_id}") assert python_response.status_code == php_response.status_code - # The "dataset unknown" error currently has a separate code in PHP depending on - # where it occurs (e.g., get dataset->113 get quality->361) - assert python_response.json()["detail"]["message"] == php_response.json()["error"]["message"] + # RFC 9457: Python API now returns problem+json format + assert python_response.headers["content-type"] == "application/problem+json" + error = python_response.json() + assert error["type"] == ProblemType.DATASET_NOT_FOUND + # Verify the error message matches the PHP API semantically + assert "Unknown dataset" in error["detail"] diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py index a9a8ed4..b2dd862 100644 --- a/tests/routers/openml/study_test.py +++ b/tests/routers/openml/study_test.py @@ -5,6 +5,7 @@ from sqlalchemy import Connection, text from starlette.testclient import TestClient +from core.errors import ProblemType from schemas.study import StudyType from tests.users import ApiKey @@ -556,7 +557,10 @@ def test_attach_task_to_study_already_linked_raises( expdb_test=expdb_test, ) assert response.status_code == HTTPStatus.CONFLICT, response.content - assert response.json() == {"detail": "Task 1 is already attached to study 1."} + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.STUDY_CONFLICT + assert "Task 1 is already attached to study 1" in error["detail"] def test_attach_task_to_study_but_task_not_exist_raises( @@ -572,4 +576,7 @@ def test_attach_task_to_study_but_task_not_exist_raises( expdb_test=expdb_test, ) assert response.status_code == HTTPStatus.CONFLICT - assert response.json() == {"detail": "One or more of the tasks do not exist."} + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.STUDY_CONFLICT + assert "do not exist" in error["detail"] diff --git a/tests/routers/openml/task_type_test.py b/tests/routers/openml/task_type_test.py index d14929c..95d5c70 100644 --- a/tests/routers/openml/task_type_test.py +++ b/tests/routers/openml/task_type_test.py @@ -5,6 +5,8 @@ import pytest from starlette.testclient import TestClient +from core.errors import ProblemType + def test_list_task_type(py_api: TestClient, php_api: httpx.Client) -> None: response = py_api.get("/tasktype/list") @@ -36,5 +38,9 @@ def test_get_task_type(ttype_id: int, py_api: TestClient, php_api: httpx.Client) def test_get_task_type_unknown(py_api: TestClient) -> None: response = py_api.get("/tasktype/1000") - assert response.status_code == HTTPStatus.PRECONDITION_FAILED - assert response.json() == {"detail": {"code": "241", "message": "Unknown task type."}} + assert response.status_code == HTTPStatus.NOT_FOUND + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == ProblemType.TASK_TYPE_NOT_FOUND + assert error["code"] == "241" + assert "Unknown task type" in error["detail"] From 81b4f60e36ff486eaba561fd8b9dbb8ae2a33d47 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 11 Feb 2026 15:34:06 +0200 Subject: [PATCH 3/9] make access safe even if toml doesn't have dev section --- src/database/users.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/database/users.py b/src/database/users.py index 220c34a..1f66640 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -10,7 +10,7 @@ # If `allow_test_api_keys` is set, the key may also be one of `normaluser`, # `normaluser2`, or `abc` (admin). api_key_pattern = r"^[0-9a-fA-F]{32}$" -if load_configuration()["development"].get("allow_test_api_keys"): +if load_configuration().get("development", {}).get("allow_test_api_keys"): api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$" APIKey = Annotated[ From 50ed2357eb80c7f24b560785f816b49cade23365 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 12 Feb 2026 11:00:19 +0200 Subject: [PATCH 4/9] Simplify model definition --- src/core/errors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index a77e3e8..bd8f799 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -10,7 +10,7 @@ from fastapi import Request from fastapi.responses import JSONResponse -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, Field # JSON-serializable extension value type for RFC 9457 problem details type ExtensionValue = str | int | float | bool | None | list[str] | list[int] @@ -29,11 +29,8 @@ class ProblemDetail(BaseModel): when not provided. The `status` field is advisory and should match the HTTP status code. """ - model_config = ConfigDict(populate_by_name=True) - type_: str = Field( default="about:blank", - alias="type", serialization_alias="type", description="A URI reference identifying the problem type. Defaults to 'about:blank'.", ) From dcc5fcdc9aed8da0279c4e6a8edf41c501296e4a Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 12 Feb 2026 11:14:57 +0200 Subject: [PATCH 5/9] Update name in docstring --- src/core/errors.py | 4 ++-- src/routers/openml/datasets.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index bd8f799..149e5ed 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -56,7 +56,7 @@ class ProblemDetailError(Exception): """Exception that produces RFC 9457 compliant error responses. Usage: - raise ProblemDetailException( + raise ProblemDetailError( status_code=HTTPStatus.NOT_FOUND, detail="Dataset 123 was not found.", title="Dataset Not Found", @@ -90,7 +90,7 @@ def problem_detail_exception_handler( request: Request, # noqa: ARG001 exc: ProblemDetailError, ) -> JSONResponse: - """FastAPI exception handler for ProblemDetailException. + """FastAPI exception handler for ProblemDetailError. Returns a response with: - Content-Type: application/problem+json diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index b2ad65d..b18e987 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -259,7 +259,7 @@ def _get_dataset_raise_otherwise( ) -> Row: """Fetches the dataset from the database if it exists and the user has permissions. - Raises ProblemDetailException if the dataset does not exist or the user can not access it. + Raises ProblemDetailError if the dataset does not exist or the user can not access it. """ if not (dataset := database.datasets.get(dataset_id, expdb)): raise_problem( From b6db69076f71ff30d3c8584ce4ddff1dc70fdce5 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 13 Feb 2026 11:06:40 +0200 Subject: [PATCH 6/9] Rewrite errors to separate classes --- src/core/errors.py | 625 +++++++++++++++++++------- src/routers/mldcat_ap/dataset.py | 10 +- src/routers/openml/datasets.py | 142 +++--- src/routers/openml/flows.py | 17 +- src/routers/openml/qualities.py | 11 +- src/routers/openml/study.py | 96 ++-- src/routers/openml/tasks.py | 17 +- src/routers/openml/tasktype.py | 11 +- tests/routers/openml/datasets_test.py | 8 +- tests/routers/openml/flows_test.py | 6 +- 10 files changed, 571 insertions(+), 372 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index 149e5ed..8db4965 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -6,14 +6,9 @@ from enum import IntEnum from http import HTTPStatus -from typing import NoReturn from fastapi import Request from fastapi.responses import JSONResponse -from pydantic import BaseModel, Field - -# JSON-serializable extension value type for RFC 9457 problem details -type ExtensionValue = str | int | float | bool | None | list[str] | list[int] class DatasetError(IntEnum): @@ -22,68 +17,46 @@ class DatasetError(IntEnum): NO_DATA_FILE = 113 -class ProblemDetail(BaseModel): - """RFC 9457 Problem Details model. - - All fields are optional per the specification, but `type` defaults to "about:blank" - when not provided. The `status` field is advisory and should match the HTTP status code. - """ - - type_: str = Field( - default="about:blank", - serialization_alias="type", - description="A URI reference identifying the problem type. Defaults to 'about:blank'.", - ) - title: str | None = Field( - default=None, - description="A short, human-readable summary of the problem type.", - ) - status: int | None = Field( - default=None, - description="The HTTP status code. Advisory only, should match the actual status.", - ) - detail: str | None = Field( - default=None, - description="A human-readable explanation specific to this occurrence of the problem.", - ) - instance: str | None = Field( - default=None, - description="A URI reference identifying this specific occurrence of the problem.", - ) +# ============================================================================= +# Base Exception +# ============================================================================= class ProblemDetailError(Exception): - """Exception that produces RFC 9457 compliant error responses. + """Base exception for RFC 9457 compliant error responses. - Usage: - raise ProblemDetailError( - status_code=HTTPStatus.NOT_FOUND, - detail="Dataset 123 was not found.", - title="Dataset Not Found", - type_="https://openml.org/problems/dataset-not-found", - code="111", # Extension field for legacy error codes - ) + Subclasses should define class attributes: + - uri: The problem type URI + - title: Human-readable title + - status_code: HTTP status code + + The status_code can be overridden per-instance for backwards compatibility. """ + uri: str = "about:blank" + title: str = "An error occurred" + _default_status_code: HTTPStatus = HTTPStatus.INTERNAL_SERVER_ERROR + def __init__( self, - status_code: HTTPStatus | int, - detail: str | None = None, - title: str | None = None, - type_: str = "about:blank", + detail: str, + *, + code: int | str | None = None, instance: str | None = None, - **extensions: ExtensionValue, + status_code: HTTPStatus | None = None, ) -> None: - self.status_code = int(status_code) - self.problem = ProblemDetail( - type_=type_, - title=title, - status=self.status_code, - detail=detail, - instance=instance, - ) - self.extensions = extensions - super().__init__(detail or title or "An error occurred") + self.detail = detail + self.code = code + self.instance = instance + self._status_code_override = status_code + super().__init__(detail) + + @property + def status_code(self) -> HTTPStatus: + """Return the status code, preferring instance override over class default.""" + if self._status_code_override is not None: + return self._status_code_override + return self._default_status_code def problem_detail_exception_handler( @@ -96,124 +69,440 @@ def problem_detail_exception_handler( - Content-Type: application/problem+json - RFC 9457 compliant JSON body """ - content = exc.problem.model_dump(by_alias=True, exclude_none=True) - content.update(exc.extensions) + content: dict[str, str | int] = { + "type": exc.uri, + "title": exc.title, + "status": int(exc.status_code), + "detail": exc.detail, + } + if exc.code is not None: + content["code"] = str(exc.code) + if exc.instance is not None: + content["instance"] = exc.instance return JSONResponse( - status_code=exc.status_code, + status_code=int(exc.status_code), content=content, media_type="application/problem+json", ) -# Problem type URIs for OpenML-specific errors -# These should be documented at the corresponding URLs -class ProblemType: - """Problem type URIs for common OpenML errors.""" - - # Dataset errors - DATASET_NOT_FOUND = "https://openml.org/problems/dataset-not-found" - DATASET_NO_ACCESS = "https://openml.org/problems/dataset-no-access" - DATASET_NO_DATA_FILE = "https://openml.org/problems/dataset-no-data-file" - DATASET_NOT_PROCESSED = "https://openml.org/problems/dataset-not-processed" - DATASET_PROCESSING_ERROR = "https://openml.org/problems/dataset-processing-error" - DATASET_NO_FEATURES = "https://openml.org/problems/dataset-no-features" - DATASET_STATUS_TRANSITION = "https://openml.org/problems/dataset-status-transition" - DATASET_NOT_OWNED = "https://openml.org/problems/dataset-not-owned" - DATASET_ADMIN_ONLY = "https://openml.org/problems/dataset-admin-only" - - # Authentication/Authorization errors - AUTHENTICATION_REQUIRED = "https://openml.org/problems/authentication-required" - AUTHENTICATION_FAILED = "https://openml.org/problems/authentication-failed" - FORBIDDEN = "https://openml.org/problems/forbidden" - - # Tag errors - TAG_ALREADY_EXISTS = "https://openml.org/problems/tag-already-exists" - - # Search/List errors - NO_RESULTS = "https://openml.org/problems/no-results" - - # Study errors - STUDY_NOT_FOUND = "https://openml.org/problems/study-not-found" - STUDY_PRIVATE = "https://openml.org/problems/study-private" - STUDY_LEGACY = "https://openml.org/problems/study-legacy" - STUDY_ALIAS_EXISTS = "https://openml.org/problems/study-alias-exists" - STUDY_INVALID_TYPE = "https://openml.org/problems/study-invalid-type" - STUDY_NOT_EDITABLE = "https://openml.org/problems/study-not-editable" - STUDY_CONFLICT = "https://openml.org/problems/study-conflict" - - # Task errors - TASK_NOT_FOUND = "https://openml.org/problems/task-not-found" - TASK_TYPE_NOT_FOUND = "https://openml.org/problems/task-type-not-found" - - # Flow errors - FLOW_NOT_FOUND = "https://openml.org/problems/flow-not-found" - - # Service errors - SERVICE_NOT_FOUND = "https://openml.org/problems/service-not-found" - - # Internal errors - INTERNAL_ERROR = "https://openml.org/problems/internal-error" - - -# Human-readable titles for problem types -PROBLEM_TITLES: dict[str, str] = { - ProblemType.DATASET_NOT_FOUND: "Dataset Not Found", - ProblemType.DATASET_NO_ACCESS: "Dataset Access Denied", - ProblemType.DATASET_NO_DATA_FILE: "Dataset Data File Missing", - ProblemType.DATASET_NOT_PROCESSED: "Dataset Not Processed", - ProblemType.DATASET_PROCESSING_ERROR: "Dataset Processing Error", - ProblemType.DATASET_NO_FEATURES: "Dataset Features Not Available", - ProblemType.DATASET_STATUS_TRANSITION: "Invalid Status Transition", - ProblemType.DATASET_NOT_OWNED: "Dataset Not Owned", - ProblemType.DATASET_ADMIN_ONLY: "Administrator Only", - ProblemType.AUTHENTICATION_REQUIRED: "Authentication Required", - ProblemType.AUTHENTICATION_FAILED: "Authentication Failed", - ProblemType.FORBIDDEN: "Forbidden", - ProblemType.TAG_ALREADY_EXISTS: "Tag Already Exists", - ProblemType.NO_RESULTS: "No Results Found", - ProblemType.STUDY_NOT_FOUND: "Study Not Found", - ProblemType.STUDY_PRIVATE: "Study Is Private", - ProblemType.STUDY_LEGACY: "Legacy Study Not Supported", - ProblemType.STUDY_ALIAS_EXISTS: "Study Alias Already Exists", - ProblemType.STUDY_INVALID_TYPE: "Invalid Study Type", - ProblemType.STUDY_NOT_EDITABLE: "Study Not Editable", - ProblemType.STUDY_CONFLICT: "Study Conflict", - ProblemType.TASK_NOT_FOUND: "Task Not Found", - ProblemType.TASK_TYPE_NOT_FOUND: "Task Type Not Found", - ProblemType.FLOW_NOT_FOUND: "Flow Not Found", - ProblemType.SERVICE_NOT_FOUND: "Service Not Found", - ProblemType.INTERNAL_ERROR: "Internal Server Error", +# ============================================================================= +# Dataset Errors +# ============================================================================= + + +class DatasetNotFoundError(ProblemDetailError): + """Raised when a dataset cannot be found. + + # Future: detail=f"Dataset {dataset_id} not found." + # Future: validate dataset_id is positive int + """ + + uri = "https://openml.org/problems/dataset-not-found" + title = "Dataset Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +class DatasetNoAccessError(ProblemDetailError): + """Raised when user doesn't have access to a dataset. + + # Future: detail=f"Access denied to dataset {dataset_id}." + # Future: validate dataset_id is positive int + """ + + uri = "https://openml.org/problems/dataset-no-access" + title = "Dataset Access Denied" + _default_status_code = HTTPStatus.FORBIDDEN + + +class DatasetNoDataFileError(ProblemDetailError): + """Raised when a dataset's data file is missing. + + # Future: detail=f"Data file for dataset {dataset_id} not found." + # Future: validate dataset_id is positive int + """ + + uri = "https://openml.org/problems/dataset-no-data-file" + title = "Dataset Data File Missing" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + + +class DatasetNotProcessedError(ProblemDetailError): + """Raised when a dataset has not been processed yet. + + # Future: detail=f"Dataset {dataset_id} has not been processed yet." + # Future: validate dataset_id is positive int + """ + + uri = "https://openml.org/problems/dataset-not-processed" + title = "Dataset Not Processed" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + + +class DatasetProcessingError(ProblemDetailError): + """Raised when a dataset had an error during processing. + + # Future: detail=f"Dataset {dataset_id} encountered an error during processing." + # Future: validate dataset_id is positive int + """ + + uri = "https://openml.org/problems/dataset-processing-error" + title = "Dataset Processing Error" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + + +class DatasetNoFeaturesError(ProblemDetailError): + """Raised when a dataset has no features available. + + # Future: detail=f"No features found for dataset {dataset_id}." + # Future: validate dataset_id is positive int + """ + + uri = "https://openml.org/problems/dataset-no-features" + title = "Dataset Features Not Available" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + + +class DatasetStatusTransitionError(ProblemDetailError): + """Raised when an invalid dataset status transition is attempted. + + # Future: detail=f"Cannot transition dataset {dataset_id} from {from_status} to {to_status}." + # Future: validate statuses are valid DatasetStatus values + """ + + uri = "https://openml.org/problems/dataset-status-transition" + title = "Invalid Status Transition" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + + +class DatasetNotOwnedError(ProblemDetailError): + """Raised when user tries to modify a dataset they don't own. + + # Future: detail=f"Dataset {dataset_id} is not owned by you." + # Future: validate dataset_id is positive int + """ + + uri = "https://openml.org/problems/dataset-not-owned" + title = "Dataset Not Owned" + _default_status_code = HTTPStatus.FORBIDDEN + + +class DatasetAdminOnlyError(ProblemDetailError): + """Raised when a non-admin tries to perform an admin-only action. + + # Future: detail=f"Only administrators can {action}." + # Future: validate action is non-empty string + """ + + uri = "https://openml.org/problems/dataset-admin-only" + title = "Administrator Only" + _default_status_code = HTTPStatus.FORBIDDEN + + +# ============================================================================= +# Authentication/Authorization Errors +# ============================================================================= + + +class AuthenticationRequiredError(ProblemDetailError): + """Raised when authentication is required but not provided. + + # Future: detail=f"{action} requires authentication." + # Future: validate action is non-empty string + """ + + uri = "https://openml.org/problems/authentication-required" + title = "Authentication Required" + _default_status_code = HTTPStatus.UNAUTHORIZED + + +class AuthenticationFailedError(ProblemDetailError): + """Raised when authentication credentials are invalid. + + # Future: detail="Authentication failed. Invalid or expired credentials." + """ + + uri = "https://openml.org/problems/authentication-failed" + title = "Authentication Failed" + _default_status_code = HTTPStatus.UNAUTHORIZED + + +class ForbiddenError(ProblemDetailError): + """Raised when user is authenticated but not authorized. + + # Future: detail=f"You do not have permission to {action}." + # Future: validate action is non-empty string + """ + + uri = "https://openml.org/problems/forbidden" + title = "Forbidden" + _default_status_code = HTTPStatus.FORBIDDEN + + +# ============================================================================= +# Tag Errors +# ============================================================================= + + +class TagAlreadyExistsError(ProblemDetailError): + """Raised when trying to add a tag that already exists. + + # Future: detail=f"Entity {entity_id} is already tagged with '{tag}'." + # Future: validate entity_id is positive int, tag is non-empty string + """ + + uri = "https://openml.org/problems/tag-already-exists" + title = "Tag Already Exists" + _default_status_code = HTTPStatus.CONFLICT + + +# ============================================================================= +# Search/List Errors +# ============================================================================= + + +class NoResultsError(ProblemDetailError): + """Raised when a search returns no results. + + # Future: detail="No results match the search criteria." + """ + + uri = "https://openml.org/problems/no-results" + title = "No Results Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +# ============================================================================= +# Study Errors +# ============================================================================= + + +class StudyNotFoundError(ProblemDetailError): + """Raised when a study cannot be found. + + # Future: detail=f"Study {study_id} not found." + # Future: validate study_id is positive int or valid alias string + """ + + uri = "https://openml.org/problems/study-not-found" + title = "Study Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +class StudyPrivateError(ProblemDetailError): + """Raised when trying to access a private study without permission. + + # Future: detail=f"Study {study_id} is private." + # Future: validate study_id is positive int + """ + + uri = "https://openml.org/problems/study-private" + title = "Study Is Private" + _default_status_code = HTTPStatus.FORBIDDEN + + +class StudyLegacyError(ProblemDetailError): + """Raised when trying to access a legacy study that's no longer supported. + + # Future: detail=f"Study {study_id} is a legacy study and no longer supported." + # Future: validate study_id is positive int + """ + + uri = "https://openml.org/problems/study-legacy" + title = "Legacy Study Not Supported" + _default_status_code = HTTPStatus.GONE + + +class StudyAliasExistsError(ProblemDetailError): + """Raised when trying to create a study with an alias that already exists. + + # Future: detail=f"Study alias '{alias}' already exists." + # Future: validate alias is non-empty string + """ + + uri = "https://openml.org/problems/study-alias-exists" + title = "Study Alias Already Exists" + _default_status_code = HTTPStatus.CONFLICT + + +class StudyInvalidTypeError(ProblemDetailError): + """Raised when study type configuration is invalid. + + # Future: detail=f"Cannot create {study_type} study with {invalid_field}." + """ + + uri = "https://openml.org/problems/study-invalid-type" + title = "Invalid Study Type" + _default_status_code = HTTPStatus.BAD_REQUEST + + +class StudyNotEditableError(ProblemDetailError): + """Raised when trying to edit a study that cannot be edited. + + # Future: detail=f"Study {study_id} cannot be edited. {reason}" + # Future: validate study_id is positive int + """ + + uri = "https://openml.org/problems/study-not-editable" + title = "Study Not Editable" + _default_status_code = HTTPStatus.FORBIDDEN + + +class StudyConflictError(ProblemDetailError): + """Raised when there's a conflict with study data (e.g., duplicate attachment). + + # Future: detail=f"Conflict: {reason}" + """ + + uri = "https://openml.org/problems/study-conflict" + title = "Study Conflict" + _default_status_code = HTTPStatus.CONFLICT + + +# ============================================================================= +# Task Errors +# ============================================================================= + + +class TaskNotFoundError(ProblemDetailError): + """Raised when a task cannot be found. + + # Future: detail=f"Task {task_id} not found." + # Future: validate task_id is positive int + """ + + uri = "https://openml.org/problems/task-not-found" + title = "Task Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +class TaskTypeNotFoundError(ProblemDetailError): + """Raised when a task type cannot be found. + + # Future: detail=f"Task type {task_type_id} not found." + # Future: validate task_type_id is positive int + """ + + uri = "https://openml.org/problems/task-type-not-found" + title = "Task Type Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +# ============================================================================= +# Flow Errors +# ============================================================================= + + +class FlowNotFoundError(ProblemDetailError): + """Raised when a flow cannot be found. + + # Future: detail=f"Flow {flow_id} not found." or "Flow '{name}' version '{version}' not found." + # Future: validate flow_id is positive int + """ + + uri = "https://openml.org/problems/flow-not-found" + title = "Flow Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +# ============================================================================= +# Service Errors +# ============================================================================= + + +class ServiceNotFoundError(ProblemDetailError): + """Raised when a service cannot be found. + + # Future: detail=f"Service {service_id} not found." + # Future: validate service_id is positive int + """ + + uri = "https://openml.org/problems/service-not-found" + title = "Service Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +# ============================================================================= +# Internal Errors +# ============================================================================= + + +class InternalError(ProblemDetailError): + """Raised for unexpected internal server errors. + + # Future: detail="An unexpected error occurred. Please try again later." + """ + + uri = "https://openml.org/problems/internal-error" + title = "Internal Server Error" + _default_status_code = HTTPStatus.INTERNAL_SERVER_ERROR + + +# ============================================================================= +# Backwards Compatibility +# ============================================================================= + +# Mapping from old ProblemType strings to new exception classes +_PROBLEM_TYPE_TO_EXCEPTION: dict[str, type[ProblemDetailError]] = { + "https://openml.org/problems/dataset-not-found": DatasetNotFoundError, + "https://openml.org/problems/dataset-no-access": DatasetNoAccessError, + "https://openml.org/problems/dataset-no-data-file": DatasetNoDataFileError, + "https://openml.org/problems/dataset-not-processed": DatasetNotProcessedError, + "https://openml.org/problems/dataset-processing-error": DatasetProcessingError, + "https://openml.org/problems/dataset-no-features": DatasetNoFeaturesError, + "https://openml.org/problems/dataset-status-transition": DatasetStatusTransitionError, + "https://openml.org/problems/dataset-not-owned": DatasetNotOwnedError, + "https://openml.org/problems/dataset-admin-only": DatasetAdminOnlyError, + "https://openml.org/problems/authentication-required": AuthenticationRequiredError, + "https://openml.org/problems/authentication-failed": AuthenticationFailedError, + "https://openml.org/problems/forbidden": ForbiddenError, + "https://openml.org/problems/tag-already-exists": TagAlreadyExistsError, + "https://openml.org/problems/no-results": NoResultsError, + "https://openml.org/problems/study-not-found": StudyNotFoundError, + "https://openml.org/problems/study-private": StudyPrivateError, + "https://openml.org/problems/study-legacy": StudyLegacyError, + "https://openml.org/problems/study-alias-exists": StudyAliasExistsError, + "https://openml.org/problems/study-invalid-type": StudyInvalidTypeError, + "https://openml.org/problems/study-not-editable": StudyNotEditableError, + "https://openml.org/problems/study-conflict": StudyConflictError, + "https://openml.org/problems/task-not-found": TaskNotFoundError, + "https://openml.org/problems/task-type-not-found": TaskTypeNotFoundError, + "https://openml.org/problems/flow-not-found": FlowNotFoundError, + "https://openml.org/problems/service-not-found": ServiceNotFoundError, + "https://openml.org/problems/internal-error": InternalError, } -def raise_problem( - status_code: HTTPStatus | int, - type_: str, - detail: str, - *, - instance: str | None = None, - code: int | str | None = None, - **extensions: ExtensionValue, -) -> NoReturn: - """Helper function to raise RFC 9457 compliant errors. - - Args: - status_code: HTTP status code for the response. - type_: Problem type URI identifying the error class. - detail: Human-readable explanation of this specific error occurrence. - instance: Optional URI identifying this specific error occurrence. - code: Optional legacy OpenML error code (for backwards compatibility). - **extensions: Additional extension fields to include in the response. - """ - title = PROBLEM_TITLES.get(type_) - if code is not None: - extensions["code"] = str(code) - raise ProblemDetailError( - status_code=status_code, - detail=detail, - title=title, - type_=type_, - instance=instance, - **extensions, - ) +class ProblemType: + """Problem type URIs for common OpenML errors. + + Deprecated: Use the specific exception classes directly instead. + """ + + DATASET_NOT_FOUND = DatasetNotFoundError.uri + DATASET_NO_ACCESS = DatasetNoAccessError.uri + DATASET_NO_DATA_FILE = DatasetNoDataFileError.uri + DATASET_NOT_PROCESSED = DatasetNotProcessedError.uri + DATASET_PROCESSING_ERROR = DatasetProcessingError.uri + DATASET_NO_FEATURES = DatasetNoFeaturesError.uri + DATASET_STATUS_TRANSITION = DatasetStatusTransitionError.uri + DATASET_NOT_OWNED = DatasetNotOwnedError.uri + DATASET_ADMIN_ONLY = DatasetAdminOnlyError.uri + AUTHENTICATION_REQUIRED = AuthenticationRequiredError.uri + AUTHENTICATION_FAILED = AuthenticationFailedError.uri + FORBIDDEN = ForbiddenError.uri + TAG_ALREADY_EXISTS = TagAlreadyExistsError.uri + NO_RESULTS = NoResultsError.uri + STUDY_NOT_FOUND = StudyNotFoundError.uri + STUDY_PRIVATE = StudyPrivateError.uri + STUDY_LEGACY = StudyLegacyError.uri + STUDY_ALIAS_EXISTS = StudyAliasExistsError.uri + STUDY_INVALID_TYPE = StudyInvalidTypeError.uri + STUDY_NOT_EDITABLE = StudyNotEditableError.uri + STUDY_CONFLICT = StudyConflictError.uri + TASK_NOT_FOUND = TaskNotFoundError.uri + TASK_TYPE_NOT_FOUND = TaskTypeNotFoundError.uri + FLOW_NOT_FOUND = FlowNotFoundError.uri + SERVICE_NOT_FOUND = ServiceNotFoundError.uri + INTERNAL_ERROR = InternalError.uri diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py index eaa5652..00c7610 100644 --- a/src/routers/mldcat_ap/dataset.py +++ b/src/routers/mldcat_ap/dataset.py @@ -4,14 +4,13 @@ Specific queries could be written to fetch e.g., a single feature or quality. """ -from http import HTTPStatus from typing import Annotated from fastapi import APIRouter, Depends from sqlalchemy import Connection import config -from core.errors import ProblemType, raise_problem +from core.errors import ServiceNotFoundError from database.users import User from routers.dependencies import expdb_connection, fetch_user, userdb_connection from routers.openml.datasets import get_dataset, get_dataset_features @@ -123,11 +122,8 @@ def get_mldcat_ap_distribution( ) def get_dataservice(service_id: int) -> JsonLDGraph: if service_id != 1: - raise_problem( - status_code=HTTPStatus.NOT_FOUND, - type_=ProblemType.SERVICE_NOT_FOUND, - detail="Service not found.", - ) + msg = "Service not found." + raise ServiceNotFoundError(msg) return JsonLDGraph( context="https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/context.jsonld", graph=[ diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index b18e987..fed0bba 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -1,7 +1,6 @@ import re from datetime import datetime from enum import StrEnum -from http import HTTPStatus from typing import Annotated, Any, Literal, NamedTuple from fastapi import APIRouter, Body, Depends @@ -11,7 +10,23 @@ import database.datasets import database.qualities from core.access import _user_has_access -from core.errors import DatasetError, ProblemType, raise_problem +from core.errors import ( + AuthenticationFailedError, + AuthenticationRequiredError, + DatasetAdminOnlyError, + DatasetError, + DatasetNoAccessError, + DatasetNoDataFileError, + DatasetNoFeaturesError, + DatasetNotFoundError, + DatasetNotOwnedError, + DatasetNotProcessedError, + DatasetProcessingError, + DatasetStatusTransitionError, + InternalError, + NoResultsError, + TagAlreadyExistsError, +) from core.formatting import ( _csv_as_list, _format_dataset_url, @@ -36,20 +51,12 @@ def tag_dataset( ) -> dict[str, dict[str, Any]]: tags = database.datasets.get_tags_for(data_id, expdb_db) if tag.casefold() in [t.casefold() for t in tags]: - raise_problem( - status_code=HTTPStatus.CONFLICT, - type_=ProblemType.TAG_ALREADY_EXISTS, - detail=f"Entity already tagged by this tag. id={data_id}; tag={tag}", - code=473, - ) + msg = f"Entity already tagged by this tag. id={data_id}; tag={tag}" + raise TagAlreadyExistsError(msg, code=473) if user is None: - raise_problem( - status_code=HTTPStatus.UNAUTHORIZED, - type_=ProblemType.AUTHENTICATION_FAILED, - detail="Authentication failed.", - code=103, - ) + msg = "Authentication failed." + raise AuthenticationFailedError(msg, code=103) database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db) return { @@ -194,12 +201,8 @@ def quality_clause(quality: str, range_: str | None) -> str: row.did: dict(zip(columns, row, strict=True)) for row in rows } if not datasets: - raise_problem( - status_code=HTTPStatus.NOT_FOUND, - type_=ProblemType.NO_RESULTS, - detail="No datasets match the search criteria.", - code=372, - ) + msg = "No datasets match the search criteria." + raise NoResultsError(msg, code=372) for dataset in datasets.values(): # The old API does not actually provide the checksum but just an empty field @@ -262,20 +265,12 @@ def _get_dataset_raise_otherwise( Raises ProblemDetailError if the dataset does not exist or the user can not access it. """ if not (dataset := database.datasets.get(dataset_id, expdb)): - raise_problem( - status_code=HTTPStatus.NOT_FOUND, - type_=ProblemType.DATASET_NOT_FOUND, - detail="Unknown dataset.", - code=DatasetError.NOT_FOUND, - ) + msg = "Unknown dataset." + raise DatasetNotFoundError(msg, code=DatasetError.NOT_FOUND) if not _user_has_access(dataset=dataset, user=user): - raise_problem( - status_code=HTTPStatus.FORBIDDEN, - type_=ProblemType.DATASET_NO_ACCESS, - detail="No access granted.", - code=DatasetError.NO_ACCESS, - ) + msg = "No access granted." + raise DatasetNoAccessError(msg, code=DatasetError.NO_ACCESS) return dataset @@ -298,32 +293,19 @@ def get_dataset_features( if not features: processing_state = database.datasets.get_latest_processing_update(dataset_id, expdb) if processing_state is None: - raise_problem( - status_code=HTTPStatus.PRECONDITION_FAILED, - type_=ProblemType.DATASET_NOT_PROCESSED, - detail=( - "Dataset not processed yet. The dataset was not processed yet, " - "features are not yet available. Please wait for a few minutes." - ), - code=273, - ) - elif processing_state.error: - raise_problem( - status_code=HTTPStatus.PRECONDITION_FAILED, - type_=ProblemType.DATASET_PROCESSING_ERROR, - detail="No features found. Additionally, dataset processed with error.", - code=274, - ) - else: - raise_problem( - status_code=HTTPStatus.PRECONDITION_FAILED, - type_=ProblemType.DATASET_NO_FEATURES, - detail=( - "No features found. " - "The dataset did not contain any features, or we could not extract them." - ), - code=272, + msg = ( + "Dataset not processed yet. The dataset was not processed yet, " + "features are not yet available. Please wait for a few minutes." ) + raise DatasetNotProcessedError(msg, code=273) + if processing_state.error: + msg = "No features found. Additionally, dataset processed with error." + raise DatasetProcessingError(msg, code=274) + msg = ( + "No features found. " + "The dataset did not contain any features, or we could not extract them." + ) + raise DatasetNoFeaturesError(msg, code=272) return features @@ -337,38 +319,23 @@ def update_dataset_status( expdb: Annotated[Connection, Depends(expdb_connection)], ) -> dict[str, str | int]: if user is None: - raise_problem( - status_code=HTTPStatus.UNAUTHORIZED, - type_=ProblemType.AUTHENTICATION_REQUIRED, - detail="Updating dataset status requires authentication.", - ) + msg = "Updating dataset status requires authentication." + raise AuthenticationRequiredError(msg) dataset = _get_dataset_raise_otherwise(dataset_id, user, expdb) can_deactivate = dataset.uploader == user.user_id or UserGroup.ADMIN in user.groups if status == DatasetStatus.DEACTIVATED and not can_deactivate: - raise_problem( - status_code=HTTPStatus.FORBIDDEN, - type_=ProblemType.DATASET_NOT_OWNED, - detail="Dataset is not owned by you.", - code=693, - ) + msg = "Dataset is not owned by you." + raise DatasetNotOwnedError(msg, code=693) if status == DatasetStatus.ACTIVE and UserGroup.ADMIN not in user.groups: - raise_problem( - status_code=HTTPStatus.FORBIDDEN, - type_=ProblemType.DATASET_ADMIN_ONLY, - detail="Only administrators can activate datasets.", - code=696, - ) + msg = "Only administrators can activate datasets." + raise DatasetAdminOnlyError(msg, code=696) current_status = database.datasets.get_status(dataset_id, expdb) if current_status and current_status.status == status: - raise_problem( - status_code=HTTPStatus.PRECONDITION_FAILED, - type_=ProblemType.DATASET_STATUS_TRANSITION, - detail="Illegal status transition.", - code=694, - ) + msg = "Illegal status transition." + raise DatasetStatusTransitionError(msg, code=694) # If current status is unknown, it is effectively "in preparation", # So the following transitions are allowed (first 3 transitions are first clause) @@ -381,11 +348,8 @@ def update_dataset_status( elif current_status.status == DatasetStatus.DEACTIVATED: database.datasets.remove_deactivated_status(dataset_id, expdb) else: - raise_problem( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - type_=ProblemType.INTERNAL_ERROR, - detail=f"Unknown status transition: {current_status} -> {status}", - ) + msg = f"Unknown status transition: {current_status} -> {status}" + raise InternalError(msg) return {"dataset_id": dataset_id, "status": status} @@ -404,12 +368,8 @@ def get_dataset( if not ( dataset_file := database.datasets.get_file(file_id=dataset.file_id, connection=user_db) ): - raise_problem( - status_code=HTTPStatus.PRECONDITION_FAILED, - type_=ProblemType.DATASET_NO_DATA_FILE, - detail="No data file found.", - code=DatasetError.NO_DATA_FILE, - ) + msg = "No data file found." + raise DatasetNoDataFileError(msg, code=DatasetError.NO_DATA_FILE) tags = database.datasets.get_tags_for(dataset_id, expdb_db) description = database.datasets.get_description(dataset_id, expdb_db) diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index afd7e2a..64dd083 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -1,4 +1,3 @@ -from http import HTTPStatus from typing import Annotated, Literal from fastapi import APIRouter, Depends @@ -6,7 +5,7 @@ import database.flows from core.conversions import _str_to_num -from core.errors import ProblemType, raise_problem +from core.errors import FlowNotFoundError from routers.dependencies import expdb_connection from schemas.flows import Flow, Parameter, Subflow @@ -22,11 +21,8 @@ def flow_exists( """Check if a Flow with the name and version exists, if so, return the flow id.""" flow = database.flows.get_by_name(name=name, external_version=external_version, expdb=expdb) if flow is None: - raise_problem( - status_code=HTTPStatus.NOT_FOUND, - type_=ProblemType.FLOW_NOT_FOUND, - detail="Flow not found.", - ) + msg = "Flow not found." + raise FlowNotFoundError(msg) return {"flow_id": flow.id} @@ -34,11 +30,8 @@ def flow_exists( def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection)] = None) -> Flow: flow = database.flows.get(flow_id, expdb) if not flow: - raise_problem( - status_code=HTTPStatus.NOT_FOUND, - type_=ProblemType.FLOW_NOT_FOUND, - detail="Flow not found.", - ) + msg = "Flow not found." + raise FlowNotFoundError(msg) parameter_rows = database.flows.get_parameters(flow_id, expdb) parameters = [ diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index 77bf64b..a90a793 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -7,7 +7,7 @@ import database.datasets import database.qualities from core.access import _user_has_access -from core.errors import DatasetError, ProblemType, raise_problem +from core.errors import DatasetError, DatasetNotFoundError from database.users import User from routers.dependencies import expdb_connection, fetch_user from schemas.datasets.openml import Quality @@ -35,11 +35,12 @@ def get_qualities( ) -> list[Quality]: dataset = database.datasets.get(dataset_id, expdb) if not dataset or not _user_has_access(dataset, user): - raise_problem( - status_code=HTTPStatus.PRECONDITION_FAILED, - type_=ProblemType.DATASET_NOT_FOUND, - detail="Unknown dataset.", + # Backwards compatibility: PHP API returns 412 PRECONDITION_FAILED + msg = "Unknown dataset." + raise DatasetNotFoundError( + msg, code=DatasetError.NO_DATA_FILE, + status_code=HTTPStatus.PRECONDITION_FAILED, ) return database.qualities.get_for_dataset(dataset_id, expdb) # The PHP API provided (sometime) helpful error messages diff --git a/src/routers/openml/study.py b/src/routers/openml/study.py index 0ff49e8..9e9a6c9 100644 --- a/src/routers/openml/study.py +++ b/src/routers/openml/study.py @@ -1,4 +1,3 @@ -from http import HTTPStatus from typing import Annotated, Literal from fastapi import APIRouter, Body, Depends @@ -6,7 +5,16 @@ from sqlalchemy import Connection, Row import database.studies -from core.errors import ProblemType, raise_problem +from core.errors import ( + AuthenticationRequiredError, + StudyAliasExistsError, + StudyConflictError, + StudyInvalidTypeError, + StudyLegacyError, + StudyNotEditableError, + StudyNotFoundError, + StudyPrivateError, +) from core.formatting import _str_to_bool from database.users import User, UserGroup from routers.dependencies import expdb_connection, fetch_user @@ -23,30 +31,18 @@ def _get_study_raise_otherwise(id_or_alias: int | str, user: User | None, expdb: study = database.studies.get_by_alias(id_or_alias, expdb) if study is None: - raise_problem( - status_code=HTTPStatus.NOT_FOUND, - type_=ProblemType.STUDY_NOT_FOUND, - detail="Study not found.", - ) + msg = "Study not found." + raise StudyNotFoundError(msg) if study.visibility == Visibility.PRIVATE: if user is None: - raise_problem( - status_code=HTTPStatus.UNAUTHORIZED, - type_=ProblemType.AUTHENTICATION_REQUIRED, - detail="Must authenticate for private study.", - ) + msg = "Must authenticate for private study." + raise AuthenticationRequiredError(msg) if study.creator != user.user_id and UserGroup.ADMIN not in user.groups: - raise_problem( - status_code=HTTPStatus.FORBIDDEN, - type_=ProblemType.STUDY_PRIVATE, - detail="Study is private.", - ) + msg = "Study is private." + raise StudyPrivateError(msg) if _str_to_bool(study.legacy): - raise_problem( - status_code=HTTPStatus.GONE, - type_=ProblemType.STUDY_LEGACY, - detail="Legacy studies are no longer supported.", - ) + msg = "Legacy studies are no longer supported." + raise StudyLegacyError(msg) return study @@ -63,25 +59,16 @@ def attach_to_study( expdb: Annotated[Connection, Depends(expdb_connection)] = None, ) -> AttachDetachResponse: if user is None: - raise_problem( - status_code=HTTPStatus.UNAUTHORIZED, - type_=ProblemType.AUTHENTICATION_REQUIRED, - detail="Authentication required.", - ) + msg = "Authentication required." + raise AuthenticationRequiredError(msg) study = _get_study_raise_otherwise(study_id, user, expdb) # PHP lets *anyone* edit *any* study. We're not going to do that. if study.creator != user.user_id and UserGroup.ADMIN not in user.groups: - raise_problem( - status_code=HTTPStatus.FORBIDDEN, - type_=ProblemType.STUDY_NOT_EDITABLE, - detail="Study can only be edited by its creator.", - ) + msg = "Study can only be edited by its creator." + raise StudyNotEditableError(msg) if study.status != StudyStatus.IN_PREPARATION: - raise_problem( - status_code=HTTPStatus.FORBIDDEN, - type_=ProblemType.STUDY_NOT_EDITABLE, - detail="Study can only be edited while in preparation.", - ) + msg = "Study can only be edited while in preparation." + raise StudyNotEditableError(msg) # We let the database handle the constraints on whether # the entity is already attached or if it even exists. @@ -96,11 +83,8 @@ def attach_to_study( else: database.studies.attach_runs(run_ids=entity_ids, **attach_kwargs) except ValueError as e: - raise_problem( - status_code=HTTPStatus.CONFLICT, - type_=ProblemType.STUDY_CONFLICT, - detail=str(e), - ) + msg = str(e) + raise StudyConflictError(msg) from e return AttachDetachResponse(study_id=study_id, main_entity_type=study.type_) @@ -111,29 +95,17 @@ def create_study( expdb: Annotated[Connection, Depends(expdb_connection)] = None, ) -> dict[Literal["study_id"], int]: if user is None: - raise_problem( - status_code=HTTPStatus.UNAUTHORIZED, - type_=ProblemType.AUTHENTICATION_REQUIRED, - detail="Creating a study requires authentication.", - ) + msg = "Creating a study requires authentication." + raise AuthenticationRequiredError(msg) if study.main_entity_type == StudyType.RUN and study.tasks: - raise_problem( - status_code=HTTPStatus.BAD_REQUEST, - type_=ProblemType.STUDY_INVALID_TYPE, - detail="Cannot create a run study with tasks.", - ) + msg = "Cannot create a run study with tasks." + raise StudyInvalidTypeError(msg) if study.main_entity_type == StudyType.TASK and study.runs: - raise_problem( - status_code=HTTPStatus.BAD_REQUEST, - type_=ProblemType.STUDY_INVALID_TYPE, - detail="Cannot create a task study with runs.", - ) + msg = "Cannot create a task study with runs." + raise StudyInvalidTypeError(msg) if study.alias and database.studies.get_by_alias(study.alias, expdb): - raise_problem( - status_code=HTTPStatus.CONFLICT, - type_=ProblemType.STUDY_ALIAS_EXISTS, - detail="Study alias already exists.", - ) + msg = "Study alias already exists." + raise StudyAliasExistsError(msg) study_id = database.studies.create(study, user, expdb) if study.main_entity_type == StudyType.TASK: for task_id in study.tasks: diff --git a/src/routers/openml/tasks.py b/src/routers/openml/tasks.py index 8007020..52999b1 100644 --- a/src/routers/openml/tasks.py +++ b/src/routers/openml/tasks.py @@ -1,6 +1,5 @@ import json import re -from http import HTTPStatus from typing import Annotated, cast import xmltodict @@ -10,7 +9,7 @@ import config import database.datasets import database.tasks -from core.errors import ProblemType, raise_problem +from core.errors import InternalError, TaskNotFoundError from routers.dependencies import expdb_connection from schemas.datasets.openml import Task @@ -156,17 +155,11 @@ def get_task( expdb: Annotated[Connection, Depends(expdb_connection)] = None, ) -> Task: if not (task := database.tasks.get(task_id, expdb)): - raise_problem( - status_code=HTTPStatus.NOT_FOUND, - type_=ProblemType.TASK_NOT_FOUND, - detail="Task not found.", - ) + msg = "Task not found." + raise TaskNotFoundError(msg) if not (task_type := database.tasks.get_task_type(task.ttid, expdb)): - raise_problem( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - type_=ProblemType.INTERNAL_ERROR, - detail="Task type not found.", - ) + msg = "Task type not found." + raise InternalError(msg) task_inputs = { row.input: int(row.value) if row.value.isdigit() else row.value diff --git a/src/routers/openml/tasktype.py b/src/routers/openml/tasktype.py index 9916499..18903c7 100644 --- a/src/routers/openml/tasktype.py +++ b/src/routers/openml/tasktype.py @@ -1,11 +1,10 @@ import json -from http import HTTPStatus from typing import Annotated, Any, Literal, cast from fastapi import APIRouter, Depends from sqlalchemy import Connection, Row -from core.errors import ProblemType, raise_problem +from core.errors import TaskTypeNotFoundError from database.tasks import get_input_for_task_type, get_task_types from database.tasks import get_task_type as db_get_task_type from routers.dependencies import expdb_connection @@ -46,12 +45,8 @@ def get_task_type( ) -> dict[Literal["task_type"], dict[str, str | None | list[str] | list[dict[str, str]]]]: task_type_record = db_get_task_type(task_type_id, expdb) if task_type_record is None: - raise_problem( - status_code=HTTPStatus.NOT_FOUND, - type_=ProblemType.TASK_TYPE_NOT_FOUND, - detail="Unknown task type.", - code=241, - ) + msg = "Unknown task type." + raise TaskTypeNotFoundError(msg, code=241) task_type = _normalize_task_type(task_type_record) # Some names are quoted, or have typos in their comma-separation (e.g. 'A ,B') diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py index a1395f1..6d83780 100644 --- a/tests/routers/openml/datasets_test.py +++ b/tests/routers/openml/datasets_test.py @@ -4,7 +4,7 @@ from sqlalchemy import Connection from starlette.testclient import TestClient -from core.errors import ProblemDetailError, ProblemType +from core.errors import DatasetError, DatasetNoAccessError, ProblemType from database.users import User from routers.openml.datasets import get_dataset from schemas.datasets.openml import DatasetMetadata, DatasetStatus @@ -86,7 +86,7 @@ def test_private_dataset_no_access( user: User | None, expdb_test: Connection, ) -> None: - with pytest.raises(ProblemDetailError) as e: + with pytest.raises(DatasetNoAccessError) as e: get_dataset( dataset_id=130, user=user, @@ -94,8 +94,8 @@ def test_private_dataset_no_access( expdb_db=expdb_test, ) assert e.value.status_code == HTTPStatus.FORBIDDEN - assert e.value.problem.type_ == ProblemType.DATASET_NO_ACCESS - assert e.value.extensions.get("code") == "112" + assert e.value.uri == ProblemType.DATASET_NO_ACCESS + assert e.value.code == DatasetError.NO_ACCESS @pytest.mark.parametrize( diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index 2627053..2f70705 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -6,7 +6,7 @@ from sqlalchemy import Connection from starlette.testclient import TestClient -from core.errors import ProblemDetailError, ProblemType +from core.errors import FlowNotFoundError, ProblemType from routers.openml.flows import flow_exists from tests.conftest import Flow @@ -53,10 +53,10 @@ def test_flow_exists_processes_found( def test_flow_exists_handles_flow_not_found(mocker: MockerFixture, expdb_test: Connection) -> None: mocker.patch("database.flows.get_by_name", return_value=None) - with pytest.raises(ProblemDetailError) as error: + with pytest.raises(FlowNotFoundError) as error: flow_exists("foo", "bar", expdb_test) assert error.value.status_code == HTTPStatus.NOT_FOUND - assert error.value.problem.type_ == ProblemType.FLOW_NOT_FOUND + assert error.value.uri == ProblemType.FLOW_NOT_FOUND def test_flow_exists(flow: Flow, py_api: TestClient) -> None: From 5e12e748e631a655e960df0e8814bf86df6ccc7f Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 13 Feb 2026 11:17:50 +0200 Subject: [PATCH 7/9] Remove unused dictionary --- src/core/errors.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index 8db4965..c6f0d3e 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -443,36 +443,6 @@ class InternalError(ProblemDetailError): # Backwards Compatibility # ============================================================================= -# Mapping from old ProblemType strings to new exception classes -_PROBLEM_TYPE_TO_EXCEPTION: dict[str, type[ProblemDetailError]] = { - "https://openml.org/problems/dataset-not-found": DatasetNotFoundError, - "https://openml.org/problems/dataset-no-access": DatasetNoAccessError, - "https://openml.org/problems/dataset-no-data-file": DatasetNoDataFileError, - "https://openml.org/problems/dataset-not-processed": DatasetNotProcessedError, - "https://openml.org/problems/dataset-processing-error": DatasetProcessingError, - "https://openml.org/problems/dataset-no-features": DatasetNoFeaturesError, - "https://openml.org/problems/dataset-status-transition": DatasetStatusTransitionError, - "https://openml.org/problems/dataset-not-owned": DatasetNotOwnedError, - "https://openml.org/problems/dataset-admin-only": DatasetAdminOnlyError, - "https://openml.org/problems/authentication-required": AuthenticationRequiredError, - "https://openml.org/problems/authentication-failed": AuthenticationFailedError, - "https://openml.org/problems/forbidden": ForbiddenError, - "https://openml.org/problems/tag-already-exists": TagAlreadyExistsError, - "https://openml.org/problems/no-results": NoResultsError, - "https://openml.org/problems/study-not-found": StudyNotFoundError, - "https://openml.org/problems/study-private": StudyPrivateError, - "https://openml.org/problems/study-legacy": StudyLegacyError, - "https://openml.org/problems/study-alias-exists": StudyAliasExistsError, - "https://openml.org/problems/study-invalid-type": StudyInvalidTypeError, - "https://openml.org/problems/study-not-editable": StudyNotEditableError, - "https://openml.org/problems/study-conflict": StudyConflictError, - "https://openml.org/problems/task-not-found": TaskNotFoundError, - "https://openml.org/problems/task-type-not-found": TaskTypeNotFoundError, - "https://openml.org/problems/flow-not-found": FlowNotFoundError, - "https://openml.org/problems/service-not-found": ServiceNotFoundError, - "https://openml.org/problems/internal-error": InternalError, -} - class ProblemType: """Problem type URIs for common OpenML errors. From 4a0d5cb01e1d97af7835903ae755cdd91e26b551 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 13 Feb 2026 11:26:48 +0200 Subject: [PATCH 8/9] Remove the ProblemType class as it was confusing and only for tests --- src/core/errors.py | 39 ------------------- tests/routers/openml/dataset_tag_test.py | 6 +-- .../openml/datasets_list_datasets_test.py | 6 +-- tests/routers/openml/datasets_test.py | 13 +++++-- tests/routers/openml/flows_test.py | 6 +-- .../migration/datasets_migration_test.py | 12 ++++-- .../openml/migration/flows_migration_test.py | 4 +- tests/routers/openml/qualities_test.py | 4 +- tests/routers/openml/study_test.py | 6 +-- tests/routers/openml/task_type_test.py | 4 +- 10 files changed, 35 insertions(+), 65 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index c6f0d3e..bc45db7 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -437,42 +437,3 @@ class InternalError(ProblemDetailError): uri = "https://openml.org/problems/internal-error" title = "Internal Server Error" _default_status_code = HTTPStatus.INTERNAL_SERVER_ERROR - - -# ============================================================================= -# Backwards Compatibility -# ============================================================================= - - -class ProblemType: - """Problem type URIs for common OpenML errors. - - Deprecated: Use the specific exception classes directly instead. - """ - - DATASET_NOT_FOUND = DatasetNotFoundError.uri - DATASET_NO_ACCESS = DatasetNoAccessError.uri - DATASET_NO_DATA_FILE = DatasetNoDataFileError.uri - DATASET_NOT_PROCESSED = DatasetNotProcessedError.uri - DATASET_PROCESSING_ERROR = DatasetProcessingError.uri - DATASET_NO_FEATURES = DatasetNoFeaturesError.uri - DATASET_STATUS_TRANSITION = DatasetStatusTransitionError.uri - DATASET_NOT_OWNED = DatasetNotOwnedError.uri - DATASET_ADMIN_ONLY = DatasetAdminOnlyError.uri - AUTHENTICATION_REQUIRED = AuthenticationRequiredError.uri - AUTHENTICATION_FAILED = AuthenticationFailedError.uri - FORBIDDEN = ForbiddenError.uri - TAG_ALREADY_EXISTS = TagAlreadyExistsError.uri - NO_RESULTS = NoResultsError.uri - STUDY_NOT_FOUND = StudyNotFoundError.uri - STUDY_PRIVATE = StudyPrivateError.uri - STUDY_LEGACY = StudyLegacyError.uri - STUDY_ALIAS_EXISTS = StudyAliasExistsError.uri - STUDY_INVALID_TYPE = StudyInvalidTypeError.uri - STUDY_NOT_EDITABLE = StudyNotEditableError.uri - STUDY_CONFLICT = StudyConflictError.uri - TASK_NOT_FOUND = TaskNotFoundError.uri - TASK_TYPE_NOT_FOUND = TaskTypeNotFoundError.uri - FLOW_NOT_FOUND = FlowNotFoundError.uri - SERVICE_NOT_FOUND = ServiceNotFoundError.uri - INTERNAL_ERROR = InternalError.uri diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py index 7147eca..d69a265 100644 --- a/tests/routers/openml/dataset_tag_test.py +++ b/tests/routers/openml/dataset_tag_test.py @@ -4,7 +4,7 @@ from sqlalchemy import Connection from starlette.testclient import TestClient -from core.errors import ProblemType +from core.errors import AuthenticationFailedError, TagAlreadyExistsError from database.datasets import get_tags_for from tests import constants from tests.users import ApiKey @@ -24,7 +24,7 @@ def test_dataset_tag_rejects_unauthorized(key: ApiKey, py_api: TestClient) -> No assert response.status_code == HTTPStatus.UNAUTHORIZED assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.AUTHENTICATION_FAILED + assert error["type"] == AuthenticationFailedError.uri assert error["code"] == "103" @@ -65,7 +65,7 @@ def test_dataset_tag_fails_if_tag_exists(py_api: TestClient) -> None: assert response.status_code == HTTPStatus.CONFLICT assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.TAG_ALREADY_EXISTS + assert error["type"] == TagAlreadyExistsError.uri assert error["code"] == "473" assert f"id={dataset_id}" in error["detail"] assert f"tag={tag}" in error["detail"] diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py index 78f2d42..eaaef29 100644 --- a/tests/routers/openml/datasets_list_datasets_test.py +++ b/tests/routers/openml/datasets_list_datasets_test.py @@ -8,7 +8,7 @@ from hypothesis import strategies as st from starlette.testclient import TestClient -from core.errors import ProblemType +from core.errors import NoResultsError from tests import constants from tests.users import ApiKey @@ -19,7 +19,7 @@ def _assert_empty_result( assert response.status_code == HTTPStatus.NOT_FOUND assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.NO_RESULTS + assert error["type"] == NoResultsError.uri assert error["code"] == "372" @@ -300,7 +300,7 @@ def test_list_data_identical( # Verify Python API returns RFC 9457 format assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.NO_RESULTS + assert error["type"] == NoResultsError.uri assert error["code"] == "372" return None new_json = response.json() diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py index 6d83780..01c17b9 100644 --- a/tests/routers/openml/datasets_test.py +++ b/tests/routers/openml/datasets_test.py @@ -4,7 +4,12 @@ from sqlalchemy import Connection from starlette.testclient import TestClient -from core.errors import DatasetError, DatasetNoAccessError, ProblemType +from core.errors import ( + DatasetError, + DatasetNoAccessError, + DatasetNotFoundError, + DatasetProcessingError, +) from database.users import User from routers.openml.datasets import get_dataset from schemas.datasets.openml import DatasetMetadata, DatasetStatus @@ -30,7 +35,7 @@ def test_error_unknown_dataset( assert response.status_code == response_code assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.DATASET_NOT_FOUND + assert error["type"] == DatasetNotFoundError.uri assert error["title"] == "Dataset Not Found" assert error["status"] == HTTPStatus.NOT_FOUND assert error["detail"] == "Unknown dataset." @@ -94,7 +99,7 @@ def test_private_dataset_no_access( expdb_db=expdb_test, ) assert e.value.status_code == HTTPStatus.FORBIDDEN - assert e.value.uri == ProblemType.DATASET_NO_ACCESS + assert e.value.uri == DatasetNoAccessError.uri assert e.value.code == DatasetError.NO_ACCESS @@ -186,7 +191,7 @@ def test_dataset_features_with_processing_error(py_api: TestClient) -> None: assert response.status_code == HTTPStatus.PRECONDITION_FAILED assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.DATASET_PROCESSING_ERROR + assert error["type"] == DatasetProcessingError.uri assert error["code"] == "274" assert "No features found" in error["detail"] diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index 2f70705..2c82ea3 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -6,7 +6,7 @@ from sqlalchemy import Connection from starlette.testclient import TestClient -from core.errors import FlowNotFoundError, ProblemType +from core.errors import FlowNotFoundError from routers.openml.flows import flow_exists from tests.conftest import Flow @@ -56,7 +56,7 @@ def test_flow_exists_handles_flow_not_found(mocker: MockerFixture, expdb_test: C with pytest.raises(FlowNotFoundError) as error: flow_exists("foo", "bar", expdb_test) assert error.value.status_code == HTTPStatus.NOT_FOUND - assert error.value.uri == ProblemType.FLOW_NOT_FOUND + assert error.value.uri == FlowNotFoundError.uri def test_flow_exists(flow: Flow, py_api: TestClient) -> None: @@ -70,7 +70,7 @@ def test_flow_exists_not_exists(py_api: TestClient) -> None: assert response.status_code == HTTPStatus.NOT_FOUND assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.FLOW_NOT_FOUND + assert error["type"] == FlowNotFoundError.uri assert error["detail"] == "Flow not found." diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index 1c514f6..99fb590 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -7,7 +7,11 @@ import tests.constants from core.conversions import nested_remove_single_element_list -from core.errors import ProblemType +from core.errors import ( + DatasetNoAccessError, + DatasetNotFoundError, + TagAlreadyExistsError, +) from tests.users import ApiKey @@ -109,7 +113,7 @@ def test_error_unknown_dataset( # RFC 9457: Python API now returns problem+json format assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.DATASET_NOT_FOUND + assert error["type"] == DatasetNotFoundError.uri assert error["code"] == "111" @@ -128,7 +132,7 @@ def test_private_dataset_no_user_no_access( assert response.status_code == HTTPStatus.FORBIDDEN assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.DATASET_NO_ACCESS + assert error["type"] == DatasetNoAccessError.uri assert error["code"] == "112" @@ -200,7 +204,7 @@ def test_dataset_tag_response_is_identical( assert new.status_code == HTTPStatus.CONFLICT assert new.headers["content-type"] == "application/problem+json" error = new.json() - assert error["type"] == ProblemType.TAG_ALREADY_EXISTS + assert error["type"] == TagAlreadyExistsError.uri assert error["code"] == "473" return diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py index e3a559d..5b82426 100644 --- a/tests/routers/openml/migration/flows_migration_test.py +++ b/tests/routers/openml/migration/flows_migration_test.py @@ -10,7 +10,7 @@ nested_remove_single_element_list, nested_str_to_num, ) -from core.errors import ProblemType +from core.errors import FlowNotFoundError from tests.conftest import Flow @@ -31,7 +31,7 @@ def test_flow_exists_not( # RFC 9457: Python API now returns problem+json format assert py_response.headers["content-type"] == "application/problem+json" error = py_response.json() - assert error["type"] == ProblemType.FLOW_NOT_FOUND + assert error["type"] == FlowNotFoundError.uri assert error["detail"] == "Flow not found." diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py index 814607c..54cf984 100644 --- a/tests/routers/openml/qualities_test.py +++ b/tests/routers/openml/qualities_test.py @@ -6,7 +6,7 @@ from sqlalchemy import Connection, text from starlette.testclient import TestClient -from core.errors import ProblemType +from core.errors import DatasetNotFoundError def _remove_quality_from_database(quality_name: str, expdb_test: Connection) -> None: @@ -318,6 +318,6 @@ def test_get_quality_identical_error( # RFC 9457: Python API now returns problem+json format assert python_response.headers["content-type"] == "application/problem+json" error = python_response.json() - assert error["type"] == ProblemType.DATASET_NOT_FOUND + assert error["type"] == DatasetNotFoundError.uri # Verify the error message matches the PHP API semantically assert "Unknown dataset" in error["detail"] diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py index b2dd862..ed7018f 100644 --- a/tests/routers/openml/study_test.py +++ b/tests/routers/openml/study_test.py @@ -5,7 +5,7 @@ from sqlalchemy import Connection, text from starlette.testclient import TestClient -from core.errors import ProblemType +from core.errors import StudyConflictError from schemas.study import StudyType from tests.users import ApiKey @@ -559,7 +559,7 @@ def test_attach_task_to_study_already_linked_raises( assert response.status_code == HTTPStatus.CONFLICT, response.content assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.STUDY_CONFLICT + assert error["type"] == StudyConflictError.uri assert "Task 1 is already attached to study 1" in error["detail"] @@ -578,5 +578,5 @@ def test_attach_task_to_study_but_task_not_exist_raises( assert response.status_code == HTTPStatus.CONFLICT assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.STUDY_CONFLICT + assert error["type"] == StudyConflictError.uri assert "do not exist" in error["detail"] diff --git a/tests/routers/openml/task_type_test.py b/tests/routers/openml/task_type_test.py index 95d5c70..a8e897e 100644 --- a/tests/routers/openml/task_type_test.py +++ b/tests/routers/openml/task_type_test.py @@ -5,7 +5,7 @@ import pytest from starlette.testclient import TestClient -from core.errors import ProblemType +from core.errors import TaskTypeNotFoundError def test_list_task_type(py_api: TestClient, php_api: httpx.Client) -> None: @@ -41,6 +41,6 @@ def test_get_task_type_unknown(py_api: TestClient) -> None: assert response.status_code == HTTPStatus.NOT_FOUND assert response.headers["content-type"] == "application/problem+json" error = response.json() - assert error["type"] == ProblemType.TASK_TYPE_NOT_FOUND + assert error["type"] == TaskTypeNotFoundError.uri assert error["code"] == "241" assert "Unknown task type" in error["detail"] From 97d53785ea628b43edec262d5ea7b4c699051a6c Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 13 Feb 2026 11:44:20 +0200 Subject: [PATCH 9/9] Provide default codes for the different errors based on PHP codes --- src/core/errors.py | 187 +++++++------------------- src/routers/openml/datasets.py | 25 ++-- src/routers/openml/qualities.py | 7 +- src/routers/openml/tasktype.py | 2 +- tests/routers/openml/datasets_test.py | 4 +- 5 files changed, 70 insertions(+), 155 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index bc45db7..e697831 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -4,19 +4,11 @@ See: https://www.rfc-editor.org/rfc/rfc9457.html """ -from enum import IntEnum from http import HTTPStatus from fastapi import Request from fastapi.responses import JSONResponse - -class DatasetError(IntEnum): - NOT_FOUND = 111 - NO_ACCESS = 112 - NO_DATA_FILE = 113 - - # ============================================================================= # Base Exception # ============================================================================= @@ -28,14 +20,16 @@ class ProblemDetailError(Exception): Subclasses should define class attributes: - uri: The problem type URI - title: Human-readable title - - status_code: HTTP status code + - _default_status_code: HTTP status code + - _default_code: Legacy error code (optional) - The status_code can be overridden per-instance for backwards compatibility. + The status_code and code can be overridden per-instance. """ uri: str = "about:blank" title: str = "An error occurred" _default_status_code: HTTPStatus = HTTPStatus.INTERNAL_SERVER_ERROR + _default_code: int | None = None def __init__( self, @@ -46,7 +40,7 @@ def __init__( status_code: HTTPStatus | None = None, ) -> None: self.detail = detail - self.code = code + self._code_override = code self.instance = instance self._status_code_override = status_code super().__init__(detail) @@ -58,6 +52,13 @@ def status_code(self) -> HTTPStatus: return self._status_code_override return self._default_status_code + @property + def code(self) -> int | str | None: + """Return the code, preferring instance override over class default.""" + if self._code_override is not None: + return self._code_override + return self._default_code + def problem_detail_exception_handler( request: Request, # noqa: ARG001 @@ -93,111 +94,84 @@ def problem_detail_exception_handler( class DatasetNotFoundError(ProblemDetailError): - """Raised when a dataset cannot be found. - - # Future: detail=f"Dataset {dataset_id} not found." - # Future: validate dataset_id is positive int - """ + """Raised when a dataset cannot be found.""" uri = "https://openml.org/problems/dataset-not-found" title = "Dataset Not Found" _default_status_code = HTTPStatus.NOT_FOUND + _default_code = 111 class DatasetNoAccessError(ProblemDetailError): - """Raised when user doesn't have access to a dataset. - - # Future: detail=f"Access denied to dataset {dataset_id}." - # Future: validate dataset_id is positive int - """ + """Raised when user doesn't have access to a dataset.""" uri = "https://openml.org/problems/dataset-no-access" title = "Dataset Access Denied" _default_status_code = HTTPStatus.FORBIDDEN + _default_code = 112 class DatasetNoDataFileError(ProblemDetailError): - """Raised when a dataset's data file is missing. - - # Future: detail=f"Data file for dataset {dataset_id} not found." - # Future: validate dataset_id is positive int - """ + """Raised when a dataset's data file is missing.""" uri = "https://openml.org/problems/dataset-no-data-file" title = "Dataset Data File Missing" _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 113 class DatasetNotProcessedError(ProblemDetailError): - """Raised when a dataset has not been processed yet. - - # Future: detail=f"Dataset {dataset_id} has not been processed yet." - # Future: validate dataset_id is positive int - """ + """Raised when a dataset has not been processed yet.""" uri = "https://openml.org/problems/dataset-not-processed" title = "Dataset Not Processed" _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 273 class DatasetProcessingError(ProblemDetailError): - """Raised when a dataset had an error during processing. - - # Future: detail=f"Dataset {dataset_id} encountered an error during processing." - # Future: validate dataset_id is positive int - """ + """Raised when a dataset had an error during processing.""" uri = "https://openml.org/problems/dataset-processing-error" title = "Dataset Processing Error" _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 274 class DatasetNoFeaturesError(ProblemDetailError): - """Raised when a dataset has no features available. - - # Future: detail=f"No features found for dataset {dataset_id}." - # Future: validate dataset_id is positive int - """ + """Raised when a dataset has no features available.""" uri = "https://openml.org/problems/dataset-no-features" title = "Dataset Features Not Available" _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 272 class DatasetStatusTransitionError(ProblemDetailError): - """Raised when an invalid dataset status transition is attempted. - - # Future: detail=f"Cannot transition dataset {dataset_id} from {from_status} to {to_status}." - # Future: validate statuses are valid DatasetStatus values - """ + """Raised when an invalid dataset status transition is attempted.""" uri = "https://openml.org/problems/dataset-status-transition" title = "Invalid Status Transition" _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 694 class DatasetNotOwnedError(ProblemDetailError): - """Raised when user tries to modify a dataset they don't own. - - # Future: detail=f"Dataset {dataset_id} is not owned by you." - # Future: validate dataset_id is positive int - """ + """Raised when user tries to modify a dataset they don't own.""" uri = "https://openml.org/problems/dataset-not-owned" title = "Dataset Not Owned" _default_status_code = HTTPStatus.FORBIDDEN + _default_code = 693 class DatasetAdminOnlyError(ProblemDetailError): - """Raised when a non-admin tries to perform an admin-only action. - - # Future: detail=f"Only administrators can {action}." - # Future: validate action is non-empty string - """ + """Raised when a non-admin tries to perform an admin-only action.""" uri = "https://openml.org/problems/dataset-admin-only" title = "Administrator Only" _default_status_code = HTTPStatus.FORBIDDEN + _default_code = 696 # ============================================================================= @@ -206,11 +180,7 @@ class DatasetAdminOnlyError(ProblemDetailError): class AuthenticationRequiredError(ProblemDetailError): - """Raised when authentication is required but not provided. - - # Future: detail=f"{action} requires authentication." - # Future: validate action is non-empty string - """ + """Raised when authentication is required but not provided.""" uri = "https://openml.org/problems/authentication-required" title = "Authentication Required" @@ -218,22 +188,16 @@ class AuthenticationRequiredError(ProblemDetailError): class AuthenticationFailedError(ProblemDetailError): - """Raised when authentication credentials are invalid. - - # Future: detail="Authentication failed. Invalid or expired credentials." - """ + """Raised when authentication credentials are invalid.""" uri = "https://openml.org/problems/authentication-failed" title = "Authentication Failed" _default_status_code = HTTPStatus.UNAUTHORIZED + _default_code = 103 class ForbiddenError(ProblemDetailError): - """Raised when user is authenticated but not authorized. - - # Future: detail=f"You do not have permission to {action}." - # Future: validate action is non-empty string - """ + """Raised when user is authenticated but not authorized.""" uri = "https://openml.org/problems/forbidden" title = "Forbidden" @@ -246,15 +210,12 @@ class ForbiddenError(ProblemDetailError): class TagAlreadyExistsError(ProblemDetailError): - """Raised when trying to add a tag that already exists. - - # Future: detail=f"Entity {entity_id} is already tagged with '{tag}'." - # Future: validate entity_id is positive int, tag is non-empty string - """ + """Raised when trying to add a tag that already exists.""" uri = "https://openml.org/problems/tag-already-exists" title = "Tag Already Exists" _default_status_code = HTTPStatus.CONFLICT + _default_code = 473 # ============================================================================= @@ -263,14 +224,12 @@ class TagAlreadyExistsError(ProblemDetailError): class NoResultsError(ProblemDetailError): - """Raised when a search returns no results. - - # Future: detail="No results match the search criteria." - """ + """Raised when a search returns no results.""" uri = "https://openml.org/problems/no-results" title = "No Results Found" _default_status_code = HTTPStatus.NOT_FOUND + _default_code = 372 # ============================================================================= @@ -279,11 +238,7 @@ class NoResultsError(ProblemDetailError): class StudyNotFoundError(ProblemDetailError): - """Raised when a study cannot be found. - - # Future: detail=f"Study {study_id} not found." - # Future: validate study_id is positive int or valid alias string - """ + """Raised when a study cannot be found.""" uri = "https://openml.org/problems/study-not-found" title = "Study Not Found" @@ -291,11 +246,7 @@ class StudyNotFoundError(ProblemDetailError): class StudyPrivateError(ProblemDetailError): - """Raised when trying to access a private study without permission. - - # Future: detail=f"Study {study_id} is private." - # Future: validate study_id is positive int - """ + """Raised when trying to access a private study without permission.""" uri = "https://openml.org/problems/study-private" title = "Study Is Private" @@ -303,11 +254,7 @@ class StudyPrivateError(ProblemDetailError): class StudyLegacyError(ProblemDetailError): - """Raised when trying to access a legacy study that's no longer supported. - - # Future: detail=f"Study {study_id} is a legacy study and no longer supported." - # Future: validate study_id is positive int - """ + """Raised when trying to access a legacy study that's no longer supported.""" uri = "https://openml.org/problems/study-legacy" title = "Legacy Study Not Supported" @@ -315,11 +262,7 @@ class StudyLegacyError(ProblemDetailError): class StudyAliasExistsError(ProblemDetailError): - """Raised when trying to create a study with an alias that already exists. - - # Future: detail=f"Study alias '{alias}' already exists." - # Future: validate alias is non-empty string - """ + """Raised when trying to create a study with an alias that already exists.""" uri = "https://openml.org/problems/study-alias-exists" title = "Study Alias Already Exists" @@ -327,10 +270,7 @@ class StudyAliasExistsError(ProblemDetailError): class StudyInvalidTypeError(ProblemDetailError): - """Raised when study type configuration is invalid. - - # Future: detail=f"Cannot create {study_type} study with {invalid_field}." - """ + """Raised when study type configuration is invalid.""" uri = "https://openml.org/problems/study-invalid-type" title = "Invalid Study Type" @@ -338,11 +278,7 @@ class StudyInvalidTypeError(ProblemDetailError): class StudyNotEditableError(ProblemDetailError): - """Raised when trying to edit a study that cannot be edited. - - # Future: detail=f"Study {study_id} cannot be edited. {reason}" - # Future: validate study_id is positive int - """ + """Raised when trying to edit a study that cannot be edited.""" uri = "https://openml.org/problems/study-not-editable" title = "Study Not Editable" @@ -350,10 +286,7 @@ class StudyNotEditableError(ProblemDetailError): class StudyConflictError(ProblemDetailError): - """Raised when there's a conflict with study data (e.g., duplicate attachment). - - # Future: detail=f"Conflict: {reason}" - """ + """Raised when there's a conflict with study data (e.g., duplicate attachment).""" uri = "https://openml.org/problems/study-conflict" title = "Study Conflict" @@ -366,11 +299,7 @@ class StudyConflictError(ProblemDetailError): class TaskNotFoundError(ProblemDetailError): - """Raised when a task cannot be found. - - # Future: detail=f"Task {task_id} not found." - # Future: validate task_id is positive int - """ + """Raised when a task cannot be found.""" uri = "https://openml.org/problems/task-not-found" title = "Task Not Found" @@ -378,15 +307,12 @@ class TaskNotFoundError(ProblemDetailError): class TaskTypeNotFoundError(ProblemDetailError): - """Raised when a task type cannot be found. - - # Future: detail=f"Task type {task_type_id} not found." - # Future: validate task_type_id is positive int - """ + """Raised when a task type cannot be found.""" uri = "https://openml.org/problems/task-type-not-found" title = "Task Type Not Found" _default_status_code = HTTPStatus.NOT_FOUND + _default_code = 241 # ============================================================================= @@ -395,11 +321,7 @@ class TaskTypeNotFoundError(ProblemDetailError): class FlowNotFoundError(ProblemDetailError): - """Raised when a flow cannot be found. - - # Future: detail=f"Flow {flow_id} not found." or "Flow '{name}' version '{version}' not found." - # Future: validate flow_id is positive int - """ + """Raised when a flow cannot be found.""" uri = "https://openml.org/problems/flow-not-found" title = "Flow Not Found" @@ -412,11 +334,7 @@ class FlowNotFoundError(ProblemDetailError): class ServiceNotFoundError(ProblemDetailError): - """Raised when a service cannot be found. - - # Future: detail=f"Service {service_id} not found." - # Future: validate service_id is positive int - """ + """Raised when a service cannot be found.""" uri = "https://openml.org/problems/service-not-found" title = "Service Not Found" @@ -429,10 +347,7 @@ class ServiceNotFoundError(ProblemDetailError): class InternalError(ProblemDetailError): - """Raised for unexpected internal server errors. - - # Future: detail="An unexpected error occurred. Please try again later." - """ + """Raised for unexpected internal server errors.""" uri = "https://openml.org/problems/internal-error" title = "Internal Server Error" diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index fed0bba..360b3c4 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -14,7 +14,6 @@ AuthenticationFailedError, AuthenticationRequiredError, DatasetAdminOnlyError, - DatasetError, DatasetNoAccessError, DatasetNoDataFileError, DatasetNoFeaturesError, @@ -52,11 +51,11 @@ def tag_dataset( tags = database.datasets.get_tags_for(data_id, expdb_db) if tag.casefold() in [t.casefold() for t in tags]: msg = f"Entity already tagged by this tag. id={data_id}; tag={tag}" - raise TagAlreadyExistsError(msg, code=473) + raise TagAlreadyExistsError(msg) if user is None: msg = "Authentication failed." - raise AuthenticationFailedError(msg, code=103) + raise AuthenticationFailedError(msg) database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db) return { @@ -202,7 +201,7 @@ def quality_clause(quality: str, range_: str | None) -> str: } if not datasets: msg = "No datasets match the search criteria." - raise NoResultsError(msg, code=372) + raise NoResultsError(msg) for dataset in datasets.values(): # The old API does not actually provide the checksum but just an empty field @@ -266,11 +265,11 @@ def _get_dataset_raise_otherwise( """ if not (dataset := database.datasets.get(dataset_id, expdb)): msg = "Unknown dataset." - raise DatasetNotFoundError(msg, code=DatasetError.NOT_FOUND) + raise DatasetNotFoundError(msg) if not _user_has_access(dataset=dataset, user=user): msg = "No access granted." - raise DatasetNoAccessError(msg, code=DatasetError.NO_ACCESS) + raise DatasetNoAccessError(msg) return dataset @@ -297,15 +296,15 @@ def get_dataset_features( "Dataset not processed yet. The dataset was not processed yet, " "features are not yet available. Please wait for a few minutes." ) - raise DatasetNotProcessedError(msg, code=273) + raise DatasetNotProcessedError(msg) if processing_state.error: msg = "No features found. Additionally, dataset processed with error." - raise DatasetProcessingError(msg, code=274) + raise DatasetProcessingError(msg) msg = ( "No features found. " "The dataset did not contain any features, or we could not extract them." ) - raise DatasetNoFeaturesError(msg, code=272) + raise DatasetNoFeaturesError(msg) return features @@ -327,15 +326,15 @@ def update_dataset_status( can_deactivate = dataset.uploader == user.user_id or UserGroup.ADMIN in user.groups if status == DatasetStatus.DEACTIVATED and not can_deactivate: msg = "Dataset is not owned by you." - raise DatasetNotOwnedError(msg, code=693) + raise DatasetNotOwnedError(msg) if status == DatasetStatus.ACTIVE and UserGroup.ADMIN not in user.groups: msg = "Only administrators can activate datasets." - raise DatasetAdminOnlyError(msg, code=696) + raise DatasetAdminOnlyError(msg) current_status = database.datasets.get_status(dataset_id, expdb) if current_status and current_status.status == status: msg = "Illegal status transition." - raise DatasetStatusTransitionError(msg, code=694) + raise DatasetStatusTransitionError(msg) # If current status is unknown, it is effectively "in preparation", # So the following transitions are allowed (first 3 transitions are first clause) @@ -369,7 +368,7 @@ def get_dataset( dataset_file := database.datasets.get_file(file_id=dataset.file_id, connection=user_db) ): msg = "No data file found." - raise DatasetNoDataFileError(msg, code=DatasetError.NO_DATA_FILE) + raise DatasetNoDataFileError(msg) tags = database.datasets.get_tags_for(dataset_id, expdb_db) description = database.datasets.get_description(dataset_id, expdb_db) diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index a90a793..c369b14 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -7,7 +7,7 @@ import database.datasets import database.qualities from core.access import _user_has_access -from core.errors import DatasetError, DatasetNotFoundError +from core.errors import DatasetNotFoundError from database.users import User from routers.dependencies import expdb_connection, fetch_user from schemas.datasets.openml import Quality @@ -35,11 +35,12 @@ def get_qualities( ) -> list[Quality]: dataset = database.datasets.get(dataset_id, expdb) if not dataset or not _user_has_access(dataset, user): - # Backwards compatibility: PHP API returns 412 PRECONDITION_FAILED + # Backwards compatibility: PHP API returns 412 with code 113 msg = "Unknown dataset." + no_data_file = 113 raise DatasetNotFoundError( msg, - code=DatasetError.NO_DATA_FILE, + code=no_data_file, status_code=HTTPStatus.PRECONDITION_FAILED, ) return database.qualities.get_for_dataset(dataset_id, expdb) diff --git a/src/routers/openml/tasktype.py b/src/routers/openml/tasktype.py index 18903c7..453f224 100644 --- a/src/routers/openml/tasktype.py +++ b/src/routers/openml/tasktype.py @@ -46,7 +46,7 @@ def get_task_type( task_type_record = db_get_task_type(task_type_id, expdb) if task_type_record is None: msg = "Unknown task type." - raise TaskTypeNotFoundError(msg, code=241) + raise TaskTypeNotFoundError(msg) task_type = _normalize_task_type(task_type_record) # Some names are quoted, or have typos in their comma-separation (e.g. 'A ,B') diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py index 01c17b9..3b342fc 100644 --- a/tests/routers/openml/datasets_test.py +++ b/tests/routers/openml/datasets_test.py @@ -5,7 +5,6 @@ from starlette.testclient import TestClient from core.errors import ( - DatasetError, DatasetNoAccessError, DatasetNotFoundError, DatasetProcessingError, @@ -100,7 +99,8 @@ def test_private_dataset_no_access( ) assert e.value.status_code == HTTPStatus.FORBIDDEN assert e.value.uri == DatasetNoAccessError.uri - assert e.value.code == DatasetError.NO_ACCESS + no_access = 112 + assert e.value.code == no_access @pytest.mark.parametrize(