diff --git a/src/core/errors.py b/src/core/errors.py index 840cd75..e697831 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -1,7 +1,354 @@ -from enum import IntEnum +"""RFC 9457 Problem Details for HTTP APIs. +This module provides RFC 9457 compliant error handling for the OpenML REST API. +See: https://www.rfc-editor.org/rfc/rfc9457.html +""" -class DatasetError(IntEnum): - NOT_FOUND = 111 - NO_ACCESS = 112 - NO_DATA_FILE = 113 +from http import HTTPStatus + +from fastapi import Request +from fastapi.responses import JSONResponse + +# ============================================================================= +# Base Exception +# ============================================================================= + + +class ProblemDetailError(Exception): + """Base exception for RFC 9457 compliant error responses. + + Subclasses should define class attributes: + - uri: The problem type URI + - title: Human-readable title + - _default_status_code: HTTP status code + - _default_code: Legacy error code (optional) + + The status_code and code can be overridden per-instance. + """ + + uri: str = "about:blank" + title: str = "An error occurred" + _default_status_code: HTTPStatus = HTTPStatus.INTERNAL_SERVER_ERROR + _default_code: int | None = None + + def __init__( + self, + detail: str, + *, + code: int | str | None = None, + instance: str | None = None, + status_code: HTTPStatus | None = None, + ) -> None: + self.detail = detail + self._code_override = code + self.instance = instance + self._status_code_override = status_code + super().__init__(detail) + + @property + def status_code(self) -> HTTPStatus: + """Return the status code, preferring instance override over class default.""" + if self._status_code_override is not None: + return self._status_code_override + return self._default_status_code + + @property + def code(self) -> int | str | None: + """Return the code, preferring instance override over class default.""" + if self._code_override is not None: + return self._code_override + return self._default_code + + +def problem_detail_exception_handler( + request: Request, # noqa: ARG001 + exc: ProblemDetailError, +) -> JSONResponse: + """FastAPI exception handler for ProblemDetailError. + + Returns a response with: + - Content-Type: application/problem+json + - RFC 9457 compliant JSON body + """ + content: dict[str, str | int] = { + "type": exc.uri, + "title": exc.title, + "status": int(exc.status_code), + "detail": exc.detail, + } + if exc.code is not None: + content["code"] = str(exc.code) + if exc.instance is not None: + content["instance"] = exc.instance + + return JSONResponse( + status_code=int(exc.status_code), + content=content, + media_type="application/problem+json", + ) + + +# ============================================================================= +# Dataset Errors +# ============================================================================= + + +class DatasetNotFoundError(ProblemDetailError): + """Raised when a dataset cannot be found.""" + + uri = "https://openml.org/problems/dataset-not-found" + title = "Dataset Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + _default_code = 111 + + +class DatasetNoAccessError(ProblemDetailError): + """Raised when user doesn't have access to a dataset.""" + + uri = "https://openml.org/problems/dataset-no-access" + title = "Dataset Access Denied" + _default_status_code = HTTPStatus.FORBIDDEN + _default_code = 112 + + +class DatasetNoDataFileError(ProblemDetailError): + """Raised when a dataset's data file is missing.""" + + uri = "https://openml.org/problems/dataset-no-data-file" + title = "Dataset Data File Missing" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 113 + + +class DatasetNotProcessedError(ProblemDetailError): + """Raised when a dataset has not been processed yet.""" + + uri = "https://openml.org/problems/dataset-not-processed" + title = "Dataset Not Processed" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 273 + + +class DatasetProcessingError(ProblemDetailError): + """Raised when a dataset had an error during processing.""" + + uri = "https://openml.org/problems/dataset-processing-error" + title = "Dataset Processing Error" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 274 + + +class DatasetNoFeaturesError(ProblemDetailError): + """Raised when a dataset has no features available.""" + + uri = "https://openml.org/problems/dataset-no-features" + title = "Dataset Features Not Available" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 272 + + +class DatasetStatusTransitionError(ProblemDetailError): + """Raised when an invalid dataset status transition is attempted.""" + + uri = "https://openml.org/problems/dataset-status-transition" + title = "Invalid Status Transition" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 694 + + +class DatasetNotOwnedError(ProblemDetailError): + """Raised when user tries to modify a dataset they don't own.""" + + uri = "https://openml.org/problems/dataset-not-owned" + title = "Dataset Not Owned" + _default_status_code = HTTPStatus.FORBIDDEN + _default_code = 693 + + +class DatasetAdminOnlyError(ProblemDetailError): + """Raised when a non-admin tries to perform an admin-only action.""" + + uri = "https://openml.org/problems/dataset-admin-only" + title = "Administrator Only" + _default_status_code = HTTPStatus.FORBIDDEN + _default_code = 696 + + +# ============================================================================= +# Authentication/Authorization Errors +# ============================================================================= + + +class AuthenticationRequiredError(ProblemDetailError): + """Raised when authentication is required but not provided.""" + + uri = "https://openml.org/problems/authentication-required" + title = "Authentication Required" + _default_status_code = HTTPStatus.UNAUTHORIZED + + +class AuthenticationFailedError(ProblemDetailError): + """Raised when authentication credentials are invalid.""" + + uri = "https://openml.org/problems/authentication-failed" + title = "Authentication Failed" + _default_status_code = HTTPStatus.UNAUTHORIZED + _default_code = 103 + + +class ForbiddenError(ProblemDetailError): + """Raised when user is authenticated but not authorized.""" + + uri = "https://openml.org/problems/forbidden" + title = "Forbidden" + _default_status_code = HTTPStatus.FORBIDDEN + + +# ============================================================================= +# Tag Errors +# ============================================================================= + + +class TagAlreadyExistsError(ProblemDetailError): + """Raised when trying to add a tag that already exists.""" + + uri = "https://openml.org/problems/tag-already-exists" + title = "Tag Already Exists" + _default_status_code = HTTPStatus.CONFLICT + _default_code = 473 + + +# ============================================================================= +# Search/List Errors +# ============================================================================= + + +class NoResultsError(ProblemDetailError): + """Raised when a search returns no results.""" + + uri = "https://openml.org/problems/no-results" + title = "No Results Found" + _default_status_code = HTTPStatus.NOT_FOUND + _default_code = 372 + + +# ============================================================================= +# Study Errors +# ============================================================================= + + +class StudyNotFoundError(ProblemDetailError): + """Raised when a study cannot be found.""" + + uri = "https://openml.org/problems/study-not-found" + title = "Study Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +class StudyPrivateError(ProblemDetailError): + """Raised when trying to access a private study without permission.""" + + uri = "https://openml.org/problems/study-private" + title = "Study Is Private" + _default_status_code = HTTPStatus.FORBIDDEN + + +class StudyLegacyError(ProblemDetailError): + """Raised when trying to access a legacy study that's no longer supported.""" + + uri = "https://openml.org/problems/study-legacy" + title = "Legacy Study Not Supported" + _default_status_code = HTTPStatus.GONE + + +class StudyAliasExistsError(ProblemDetailError): + """Raised when trying to create a study with an alias that already exists.""" + + uri = "https://openml.org/problems/study-alias-exists" + title = "Study Alias Already Exists" + _default_status_code = HTTPStatus.CONFLICT + + +class StudyInvalidTypeError(ProblemDetailError): + """Raised when study type configuration is invalid.""" + + uri = "https://openml.org/problems/study-invalid-type" + title = "Invalid Study Type" + _default_status_code = HTTPStatus.BAD_REQUEST + + +class StudyNotEditableError(ProblemDetailError): + """Raised when trying to edit a study that cannot be edited.""" + + uri = "https://openml.org/problems/study-not-editable" + title = "Study Not Editable" + _default_status_code = HTTPStatus.FORBIDDEN + + +class StudyConflictError(ProblemDetailError): + """Raised when there's a conflict with study data (e.g., duplicate attachment).""" + + uri = "https://openml.org/problems/study-conflict" + title = "Study Conflict" + _default_status_code = HTTPStatus.CONFLICT + + +# ============================================================================= +# Task Errors +# ============================================================================= + + +class TaskNotFoundError(ProblemDetailError): + """Raised when a task cannot be found.""" + + uri = "https://openml.org/problems/task-not-found" + title = "Task Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +class TaskTypeNotFoundError(ProblemDetailError): + """Raised when a task type cannot be found.""" + + uri = "https://openml.org/problems/task-type-not-found" + title = "Task Type Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + _default_code = 241 + + +# ============================================================================= +# Flow Errors +# ============================================================================= + + +class FlowNotFoundError(ProblemDetailError): + """Raised when a flow cannot be found.""" + + uri = "https://openml.org/problems/flow-not-found" + title = "Flow Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +# ============================================================================= +# Service Errors +# ============================================================================= + + +class ServiceNotFoundError(ProblemDetailError): + """Raised when a service cannot be found.""" + + uri = "https://openml.org/problems/service-not-found" + title = "Service Not Found" + _default_status_code = HTTPStatus.NOT_FOUND + + +# ============================================================================= +# Internal Errors +# ============================================================================= + + +class InternalError(ProblemDetailError): + """Raised for unexpected internal server errors.""" + + uri = "https://openml.org/problems/internal-error" + title = "Internal Server Error" + _default_status_code = HTTPStatus.INTERNAL_SERVER_ERROR diff --git a/src/core/formatting.py b/src/core/formatting.py index 174261f..f954e81 100644 --- a/src/core/formatting.py +++ b/src/core/formatting.py @@ -3,7 +3,6 @@ from sqlalchemy.engine import Row from config import load_routing_configuration -from core.errors import DatasetError from schemas.datasets.openml import DatasetFileFormat @@ -16,11 +15,6 @@ def _str_to_bool(string: str) -> bool: raise ValueError(msg) -def _format_error(*, code: DatasetError, message: str) -> dict[str, str]: - """Formatter for JSON bodies of OpenML error codes.""" - return {"code": str(code), "message": message} - - def _format_parquet_url(dataset: Row) -> str | None: if dataset.format.lower() != DatasetFileFormat.ARFF: return None diff --git a/src/database/users.py b/src/database/users.py index b439be7..1f66640 100644 --- a/src/database/users.py +++ b/src/database/users.py @@ -7,11 +7,10 @@ from config import load_configuration -# Enforces str is 32 hexadecimal characters, does not check validity. # If `allow_test_api_keys` is set, the key may also be one of `normaluser`, # `normaluser2`, or `abc` (admin). api_key_pattern = r"^[0-9a-fA-F]{32}$" -if load_configuration()["development"].get("allow_test_api_keys"): +if load_configuration().get("development", {}).get("allow_test_api_keys"): api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$" APIKey = Annotated[ diff --git a/src/main.py b/src/main.py index d8e61b3..e3fe6e0 100644 --- a/src/main.py +++ b/src/main.py @@ -4,6 +4,7 @@ from fastapi import FastAPI from config import load_configuration +from core.errors import ProblemDetailError, problem_detail_exception_handler from routers.mldcat_ap.dataset import router as mldcat_ap_router from routers.openml.datasets import router as datasets_router from routers.openml.estimation_procedure import router as estimationprocedure_router @@ -45,6 +46,8 @@ def create_api() -> FastAPI: fastapi_kwargs = load_configuration()["fastapi"] app = FastAPI(**fastapi_kwargs) + app.add_exception_handler(ProblemDetailError, problem_detail_exception_handler) # type: ignore[arg-type] + app.include_router(datasets_router) app.include_router(qualities_router) app.include_router(mldcat_ap_router) diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py index db34e5c..00c7610 100644 --- a/src/routers/mldcat_ap/dataset.py +++ b/src/routers/mldcat_ap/dataset.py @@ -6,10 +6,11 @@ from typing import Annotated -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection import config +from core.errors import ServiceNotFoundError from database.users import User from routers.dependencies import expdb_connection, fetch_user, userdb_connection from routers.openml.datasets import get_dataset, get_dataset_features @@ -121,7 +122,8 @@ def get_mldcat_ap_distribution( ) def get_dataservice(service_id: int) -> JsonLDGraph: if service_id != 1: - raise HTTPException(status_code=404, detail="Service not found.") + msg = "Service not found." + raise ServiceNotFoundError(msg) return JsonLDGraph( context="https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/context.jsonld", graph=[ diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index dda2511..360b3c4 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -1,21 +1,34 @@ import re from datetime import datetime from enum import StrEnum -from http import HTTPStatus from typing import Annotated, Any, Literal, NamedTuple -from fastapi import APIRouter, Body, Depends, HTTPException +from fastapi import APIRouter, Body, Depends from sqlalchemy import Connection, text from sqlalchemy.engine import Row import database.datasets import database.qualities from core.access import _user_has_access -from core.errors import DatasetError +from core.errors import ( + AuthenticationFailedError, + AuthenticationRequiredError, + DatasetAdminOnlyError, + DatasetNoAccessError, + DatasetNoDataFileError, + DatasetNoFeaturesError, + DatasetNotFoundError, + DatasetNotOwnedError, + DatasetNotProcessedError, + DatasetProcessingError, + DatasetStatusTransitionError, + InternalError, + NoResultsError, + TagAlreadyExistsError, +) from core.formatting import ( _csv_as_list, _format_dataset_url, - _format_error, _format_parquet_url, ) from database.users import User, UserGroup @@ -37,10 +50,12 @@ def tag_dataset( ) -> dict[str, dict[str, Any]]: tags = database.datasets.get_tags_for(data_id, expdb_db) if tag.casefold() in [t.casefold() for t in tags]: - raise create_tag_exists_error(data_id, tag) + msg = f"Entity already tagged by this tag. id={data_id}; tag={tag}" + raise TagAlreadyExistsError(msg) if user is None: - raise create_authentication_failed_error() + msg = "Authentication failed." + raise AuthenticationFailedError(msg) database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db) return { @@ -48,24 +63,6 @@ def tag_dataset( } -def create_authentication_failed_error() -> HTTPException: - return HTTPException( - status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": "103", "message": "Authentication failed"}, - ) - - -def create_tag_exists_error(data_id: int, tag: str) -> HTTPException: - return HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail={ - "code": "473", - "message": "Entity already tagged by this tag.", - "additional_information": f"id={data_id}; tag={tag}", - }, - ) - - class DatasetStatusFilter(StrEnum): ACTIVE = DatasetStatus.ACTIVE DEACTIVATED = DatasetStatus.DEACTIVATED @@ -203,10 +200,8 @@ def quality_clause(quality: str, range_: str | None) -> str: row.did: dict(zip(columns, row, strict=True)) for row in rows } if not datasets: - raise HTTPException( - status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": "372", "message": "No results"}, - ) from None + msg = "No datasets match the search criteria." + raise NoResultsError(msg) for dataset in datasets.values(): # The old API does not actually provide the checksum but just an empty field @@ -266,15 +261,15 @@ def _get_dataset_raise_otherwise( ) -> Row: """Fetches the dataset from the database if it exists and the user has permissions. - Raises HTTPException if the dataset does not exist or the user can not access it. + Raises ProblemDetailError if the dataset does not exist or the user can not access it. """ if not (dataset := database.datasets.get(dataset_id, expdb)): - error = _format_error(code=DatasetError.NOT_FOUND, message="Unknown dataset") - raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=error) + msg = "Unknown dataset." + raise DatasetNotFoundError(msg) if not _user_has_access(dataset=dataset, user=user): - error = _format_error(code=DatasetError.NO_ACCESS, message="No access granted") - raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail=error) + msg = "No access granted." + raise DatasetNoAccessError(msg) return dataset @@ -297,21 +292,19 @@ def get_dataset_features( if not features: processing_state = database.datasets.get_latest_processing_update(dataset_id, expdb) if processing_state is None: - code, msg = ( - 273, - "Dataset not processed yet. The dataset was not processed yet, features are not yet available. Please wait for a few minutes.", # noqa: E501 + msg = ( + "Dataset not processed yet. The dataset was not processed yet, " + "features are not yet available. Please wait for a few minutes." ) - elif processing_state.error: - code, msg = 274, "No features found. Additionally, dataset processed with error" - else: - code, msg = ( - 272, - "No features found. The dataset did not contain any features, or we could not extract them.", # noqa: E501 - ) - raise HTTPException( - status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": code, "message": msg}, + raise DatasetNotProcessedError(msg) + if processing_state.error: + msg = "No features found. Additionally, dataset processed with error." + raise DatasetProcessingError(msg) + msg = ( + "No features found. " + "The dataset did not contain any features, or we could not extract them." ) + raise DatasetNoFeaturesError(msg) return features @@ -325,31 +318,23 @@ def update_dataset_status( expdb: Annotated[Connection, Depends(expdb_connection)], ) -> dict[str, str | int]: if user is None: - raise HTTPException( - status_code=HTTPStatus.UNAUTHORIZED, - detail="Updating dataset status required authorization", - ) + msg = "Updating dataset status requires authentication." + raise AuthenticationRequiredError(msg) dataset = _get_dataset_raise_otherwise(dataset_id, user, expdb) can_deactivate = dataset.uploader == user.user_id or UserGroup.ADMIN in user.groups if status == DatasetStatus.DEACTIVATED and not can_deactivate: - raise HTTPException( - status_code=HTTPStatus.FORBIDDEN, - detail={"code": 693, "message": "Dataset is not owned by you"}, - ) + msg = "Dataset is not owned by you." + raise DatasetNotOwnedError(msg) if status == DatasetStatus.ACTIVE and UserGroup.ADMIN not in user.groups: - raise HTTPException( - status_code=HTTPStatus.FORBIDDEN, - detail={"code": 696, "message": "Only administrators can activate datasets."}, - ) + msg = "Only administrators can activate datasets." + raise DatasetAdminOnlyError(msg) current_status = database.datasets.get_status(dataset_id, expdb) if current_status and current_status.status == status: - raise HTTPException( - status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": 694, "message": "Illegal status transition."}, - ) + msg = "Illegal status transition." + raise DatasetStatusTransitionError(msg) # If current status is unknown, it is effectively "in preparation", # So the following transitions are allowed (first 3 transitions are first clause) @@ -362,10 +347,8 @@ def update_dataset_status( elif current_status.status == DatasetStatus.DEACTIVATED: database.datasets.remove_deactivated_status(dataset_id, expdb) else: - raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail={"message": f"Unknown status transition: {current_status} -> {status}"}, - ) + msg = f"Unknown status transition: {current_status} -> {status}" + raise InternalError(msg) return {"dataset_id": dataset_id, "status": status} @@ -384,11 +367,8 @@ def get_dataset( if not ( dataset_file := database.datasets.get_file(file_id=dataset.file_id, connection=user_db) ): - error = _format_error( - code=DatasetError.NO_DATA_FILE, - message="No data file found", - ) - raise HTTPException(status_code=HTTPStatus.PRECONDITION_FAILED, detail=error) + msg = "No data file found." + raise DatasetNoDataFileError(msg) tags = database.datasets.get_tags_for(dataset_id, expdb_db) description = database.datasets.get_description(dataset_id, expdb_db) diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index cb6df5d..64dd083 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -1,11 +1,11 @@ -from http import HTTPStatus from typing import Annotated, Literal -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection import database.flows from core.conversions import _str_to_num +from core.errors import FlowNotFoundError from routers.dependencies import expdb_connection from schemas.flows import Flow, Parameter, Subflow @@ -21,10 +21,8 @@ def flow_exists( """Check if a Flow with the name and version exists, if so, return the flow id.""" flow = database.flows.get_by_name(name=name, external_version=external_version, expdb=expdb) if flow is None: - raise HTTPException( - status_code=HTTPStatus.NOT_FOUND, - detail="Flow not found.", - ) + msg = "Flow not found." + raise FlowNotFoundError(msg) return {"flow_id": flow.id} @@ -32,7 +30,8 @@ def flow_exists( def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection)] = None) -> Flow: flow = database.flows.get(flow_id, expdb) if not flow: - raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Flow not found") + msg = "Flow not found." + raise FlowNotFoundError(msg) parameter_rows = database.flows.get_parameters(flow_id, expdb) parameters = [ diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index 54181f8..c369b14 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -1,13 +1,13 @@ from http import HTTPStatus from typing import Annotated, Literal -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection import database.datasets import database.qualities from core.access import _user_has_access -from core.errors import DatasetError +from core.errors import DatasetNotFoundError from database.users import User from routers.dependencies import expdb_connection, fetch_user from schemas.datasets.openml import Quality @@ -35,10 +35,14 @@ def get_qualities( ) -> list[Quality]: dataset = database.datasets.get(dataset_id, expdb) if not dataset or not _user_has_access(dataset, user): - raise HTTPException( + # Backwards compatibility: PHP API returns 412 with code 113 + msg = "Unknown dataset." + no_data_file = 113 + raise DatasetNotFoundError( + msg, + code=no_data_file, status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": DatasetError.NO_DATA_FILE, "message": "Unknown dataset"}, - ) from None + ) return database.qualities.get_for_dataset(dataset_id, expdb) # The PHP API provided (sometime) helpful error messages # if not qualities: diff --git a/src/routers/openml/study.py b/src/routers/openml/study.py index 6fe1dcc..9e9a6c9 100644 --- a/src/routers/openml/study.py +++ b/src/routers/openml/study.py @@ -1,11 +1,20 @@ -from http import HTTPStatus from typing import Annotated, Literal -from fastapi import APIRouter, Body, Depends, HTTPException +from fastapi import APIRouter, Body, Depends from pydantic import BaseModel from sqlalchemy import Connection, Row import database.studies +from core.errors import ( + AuthenticationRequiredError, + StudyAliasExistsError, + StudyConflictError, + StudyInvalidTypeError, + StudyLegacyError, + StudyNotEditableError, + StudyNotFoundError, + StudyPrivateError, +) from core.formatting import _str_to_bool from database.users import User, UserGroup from routers.dependencies import expdb_connection, fetch_user @@ -22,20 +31,18 @@ def _get_study_raise_otherwise(id_or_alias: int | str, user: User | None, expdb: study = database.studies.get_by_alias(id_or_alias, expdb) if study is None: - raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Study not found.") + msg = "Study not found." + raise StudyNotFoundError(msg) if study.visibility == Visibility.PRIVATE: if user is None: - raise HTTPException( - status_code=HTTPStatus.UNAUTHORIZED, - detail="Must authenticate for private study.", - ) + msg = "Must authenticate for private study." + raise AuthenticationRequiredError(msg) if study.creator != user.user_id and UserGroup.ADMIN not in user.groups: - raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail="Study is private.") + msg = "Study is private." + raise StudyPrivateError(msg) if _str_to_bool(study.legacy): - raise HTTPException( - status_code=HTTPStatus.GONE, - detail="Legacy studies are no longer supported", - ) + msg = "Legacy studies are no longer supported." + raise StudyLegacyError(msg) return study @@ -52,19 +59,16 @@ def attach_to_study( expdb: Annotated[Connection, Depends(expdb_connection)] = None, ) -> AttachDetachResponse: if user is None: - raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="User not found.") + msg = "Authentication required." + raise AuthenticationRequiredError(msg) study = _get_study_raise_otherwise(study_id, user, expdb) # PHP lets *anyone* edit *any* study. We're not going to do that. if study.creator != user.user_id and UserGroup.ADMIN not in user.groups: - raise HTTPException( - status_code=HTTPStatus.FORBIDDEN, - detail="Study can only be edited by its creator.", - ) + msg = "Study can only be edited by its creator." + raise StudyNotEditableError(msg) if study.status != StudyStatus.IN_PREPARATION: - raise HTTPException( - status_code=HTTPStatus.FORBIDDEN, - detail="Study can only be edited while in preparation.", - ) + msg = "Study can only be edited while in preparation." + raise StudyNotEditableError(msg) # We let the database handle the constraints on whether # the entity is already attached or if it even exists. @@ -79,10 +83,8 @@ def attach_to_study( else: database.studies.attach_runs(run_ids=entity_ids, **attach_kwargs) except ValueError as e: - raise HTTPException( - status_code=HTTPStatus.CONFLICT, - detail=str(e), - ) from None + msg = str(e) + raise StudyConflictError(msg) from e return AttachDetachResponse(study_id=study_id, main_entity_type=study.type_) @@ -93,25 +95,17 @@ def create_study( expdb: Annotated[Connection, Depends(expdb_connection)] = None, ) -> dict[Literal["study_id"], int]: if user is None: - raise HTTPException( - status_code=HTTPStatus.UNAUTHORIZED, - detail="Creating a study requires authentication.", - ) + msg = "Creating a study requires authentication." + raise AuthenticationRequiredError(msg) if study.main_entity_type == StudyType.RUN and study.tasks: - raise HTTPException( - status_code=HTTPStatus.BAD_REQUEST, - detail="Cannot create a run study with tasks.", - ) + msg = "Cannot create a run study with tasks." + raise StudyInvalidTypeError(msg) if study.main_entity_type == StudyType.TASK and study.runs: - raise HTTPException( - status_code=HTTPStatus.BAD_REQUEST, - detail="Cannot create a task study with runs.", - ) + msg = "Cannot create a task study with runs." + raise StudyInvalidTypeError(msg) if study.alias and database.studies.get_by_alias(study.alias, expdb): - raise HTTPException( - status_code=HTTPStatus.CONFLICT, - detail="Study alias already exists.", - ) + msg = "Study alias already exists." + raise StudyAliasExistsError(msg) study_id = database.studies.create(study, user, expdb) if study.main_entity_type == StudyType.TASK: for task_id in study.tasks: diff --git a/src/routers/openml/tasks.py b/src/routers/openml/tasks.py index 8397f1d..52999b1 100644 --- a/src/routers/openml/tasks.py +++ b/src/routers/openml/tasks.py @@ -1,15 +1,15 @@ import json import re -from http import HTTPStatus from typing import Annotated, cast import xmltodict -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection, RowMapping, text import config import database.datasets import database.tasks +from core.errors import InternalError, TaskNotFoundError from routers.dependencies import expdb_connection from schemas.datasets.openml import Task @@ -155,12 +155,11 @@ def get_task( expdb: Annotated[Connection, Depends(expdb_connection)] = None, ) -> Task: if not (task := database.tasks.get(task_id, expdb)): - raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Task not found") + msg = "Task not found." + raise TaskNotFoundError(msg) if not (task_type := database.tasks.get_task_type(task.ttid, expdb)): - raise HTTPException( - status_code=HTTPStatus.INTERNAL_SERVER_ERROR, - detail="Task type not found", - ) + msg = "Task type not found." + raise InternalError(msg) task_inputs = { row.input: int(row.value) if row.value.isdigit() else row.value diff --git a/src/routers/openml/tasktype.py b/src/routers/openml/tasktype.py index 5213f17..453f224 100644 --- a/src/routers/openml/tasktype.py +++ b/src/routers/openml/tasktype.py @@ -1,10 +1,10 @@ import json -from http import HTTPStatus from typing import Annotated, Any, Literal, cast -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from sqlalchemy import Connection, Row +from core.errors import TaskTypeNotFoundError from database.tasks import get_input_for_task_type, get_task_types from database.tasks import get_task_type as db_get_task_type from routers.dependencies import expdb_connection @@ -45,10 +45,8 @@ def get_task_type( ) -> dict[Literal["task_type"], dict[str, str | None | list[str] | list[dict[str, str]]]]: task_type_record = db_get_task_type(task_type_id, expdb) if task_type_record is None: - raise HTTPException( - status_code=HTTPStatus.PRECONDITION_FAILED, - detail={"code": "241", "message": "Unknown task type."}, - ) from None + msg = "Unknown task type." + raise TaskTypeNotFoundError(msg) task_type = _normalize_task_type(task_type_record) # Some names are quoted, or have typos in their comma-separation (e.g. 'A ,B') diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py index 5449862..d69a265 100644 --- a/tests/routers/openml/dataset_tag_test.py +++ b/tests/routers/openml/dataset_tag_test.py @@ -4,6 +4,7 @@ from sqlalchemy import Connection from starlette.testclient import TestClient +from core.errors import AuthenticationFailedError, TagAlreadyExistsError from database.datasets import get_tags_for from tests import constants from tests.users import ApiKey @@ -20,8 +21,11 @@ def test_dataset_tag_rejects_unauthorized(key: ApiKey, py_api: TestClient) -> No f"/datasets/tag{apikey}", json={"data_id": next(iter(constants.PRIVATE_DATASET_ID)), "tag": "test"}, ) - assert response.status_code == HTTPStatus.PRECONDITION_FAILED - assert response.json()["detail"] == {"code": "103", "message": "Authentication failed"} + assert response.status_code == HTTPStatus.UNAUTHORIZED + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == AuthenticationFailedError.uri + assert error["code"] == "103" @pytest.mark.parametrize( @@ -58,15 +62,13 @@ def test_dataset_tag_fails_if_tag_exists(py_api: TestClient) -> None: f"/datasets/tag?api_key={ApiKey.ADMIN}", json={"data_id": dataset_id, "tag": tag}, ) - assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR - expected = { - "detail": { - "code": "473", - "message": "Entity already tagged by this tag.", - "additional_information": f"id={dataset_id}; tag={tag}", - }, - } - assert expected == response.json() + assert response.status_code == HTTPStatus.CONFLICT + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == TagAlreadyExistsError.uri + assert error["code"] == "473" + assert f"id={dataset_id}" in error["detail"] + assert f"tag={tag}" in error["detail"] @pytest.mark.parametrize( diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py index e1ff17b..eaaef29 100644 --- a/tests/routers/openml/datasets_list_datasets_test.py +++ b/tests/routers/openml/datasets_list_datasets_test.py @@ -8,6 +8,7 @@ from hypothesis import strategies as st from starlette.testclient import TestClient +from core.errors import NoResultsError from tests import constants from tests.users import ApiKey @@ -15,8 +16,11 @@ def _assert_empty_result( response: httpx.Response, ) -> None: - assert response.status_code == HTTPStatus.PRECONDITION_FAILED - assert response.json()["detail"] == {"code": "372", "message": "No results"} + assert response.status_code == HTTPStatus.NOT_FOUND + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == NoResultsError.uri + assert error["code"] == "372" def test_list(py_api: TestClient) -> None: @@ -283,9 +287,21 @@ def test_list_data_identical( uri += api_key_query original = php_api.get(uri) - assert original.status_code == response.status_code, response.json() - if original.status_code == HTTPStatus.PRECONDITION_FAILED: - assert original.json()["error"] == response.json()["detail"] + # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results) + # and the error response format, so we can't compare error responses directly. + php_is_error = original.status_code == HTTPStatus.PRECONDITION_FAILED + py_is_error = response.status_code == HTTPStatus.NOT_FOUND + + if php_is_error or py_is_error: + # Both should be errors in the same cases + assert php_is_error == py_is_error, ( + f"PHP status={original.status_code}, Python status={response.status_code}" + ) + # Verify Python API returns RFC 9457 format + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == NoResultsError.uri + assert error["code"] == "372" return None new_json = response.json() # Qualities in new response are typed diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py index 4ba5ad8..3b342fc 100644 --- a/tests/routers/openml/datasets_test.py +++ b/tests/routers/openml/datasets_test.py @@ -1,10 +1,14 @@ from http import HTTPStatus import pytest -from fastapi import HTTPException from sqlalchemy import Connection from starlette.testclient import TestClient +from core.errors import ( + DatasetNoAccessError, + DatasetNotFoundError, + DatasetProcessingError, +) from database.users import User from routers.openml.datasets import get_dataset from schemas.datasets.openml import DatasetMetadata, DatasetStatus @@ -28,7 +32,13 @@ def test_error_unknown_dataset( response = py_api.get(f"/datasets/{dataset_id}") assert response.status_code == response_code - assert response.json()["detail"] == {"code": "111", "message": "Unknown dataset"} + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == DatasetNotFoundError.uri + assert error["title"] == "Dataset Not Found" + assert error["status"] == HTTPStatus.NOT_FOUND + assert error["detail"] == "Unknown dataset." + assert error["code"] == "111" def test_get_dataset(py_api: TestClient) -> None: @@ -80,7 +90,7 @@ def test_private_dataset_no_access( user: User | None, expdb_test: Connection, ) -> None: - with pytest.raises(HTTPException) as e: + with pytest.raises(DatasetNoAccessError) as e: get_dataset( dataset_id=130, user=user, @@ -88,7 +98,9 @@ def test_private_dataset_no_access( expdb_db=expdb_test, ) assert e.value.status_code == HTTPStatus.FORBIDDEN - assert e.value.detail == {"code": "112", "message": "No access granted"} # type: ignore[comparison-overlap] + assert e.value.uri == DatasetNoAccessError.uri + no_access = 112 + assert e.value.code == no_access @pytest.mark.parametrize( @@ -177,10 +189,11 @@ def test_dataset_features_with_processing_error(py_api: TestClient) -> None: # In that case, no feature information will ever be available. response = py_api.get("/datasets/features/55") assert response.status_code == HTTPStatus.PRECONDITION_FAILED - assert response.json()["detail"] == { - "code": 274, - "message": "No features found. Additionally, dataset processed with error", - } + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == DatasetProcessingError.uri + assert error["code"] == "274" + assert "No features found" in error["detail"] def test_dataset_features_dataset_does_not_exist(py_api: TestClient) -> None: diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index d5188d0..2c82ea3 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -2,11 +2,11 @@ import deepdiff.diff import pytest -from fastapi import HTTPException from pytest_mock import MockerFixture from sqlalchemy import Connection from starlette.testclient import TestClient +from core.errors import FlowNotFoundError from routers.openml.flows import flow_exists from tests.conftest import Flow @@ -53,10 +53,10 @@ def test_flow_exists_processes_found( def test_flow_exists_handles_flow_not_found(mocker: MockerFixture, expdb_test: Connection) -> None: mocker.patch("database.flows.get_by_name", return_value=None) - with pytest.raises(HTTPException) as error: + with pytest.raises(FlowNotFoundError) as error: flow_exists("foo", "bar", expdb_test) assert error.value.status_code == HTTPStatus.NOT_FOUND - assert error.value.detail == "Flow not found." + assert error.value.uri == FlowNotFoundError.uri def test_flow_exists(flow: Flow, py_api: TestClient) -> None: @@ -68,7 +68,10 @@ def test_flow_exists(flow: Flow, py_api: TestClient) -> None: def test_flow_exists_not_exists(py_api: TestClient) -> None: response = py_api.get("/flows/exists/foo/bar") assert response.status_code == HTTPStatus.NOT_FOUND - assert response.json()["detail"] == "Flow not found." + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == FlowNotFoundError.uri + assert error["detail"] == "Flow not found." def test_get_flow_no_subflow(py_api: TestClient) -> None: diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index 011d8db..99fb590 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -7,6 +7,11 @@ import tests.constants from core.conversions import nested_remove_single_element_list +from core.errors import ( + DatasetNoAccessError, + DatasetNotFoundError, + TagAlreadyExistsError, +) from tests.users import ApiKey @@ -28,7 +33,10 @@ def test_dataset_response_is_identical( # noqa: C901, PLR0912 assert original.status_code == new.status_code if new.status_code != HTTPStatus.OK: - assert original.json()["error"] == new.json()["detail"] + # RFC 9457: Python API now returns problem+json format + assert new.headers["content-type"] == "application/problem+json" + # Both APIs should return error responses in the same cases + assert "error" in original.json() return try: @@ -102,7 +110,11 @@ def test_error_unknown_dataset( # The new API has "404 Not Found" instead of "412 PRECONDITION_FAILED" assert response.status_code == HTTPStatus.NOT_FOUND - assert response.json()["detail"] == {"code": "111", "message": "Unknown dataset"} + # RFC 9457: Python API now returns problem+json format + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == DatasetNotFoundError.uri + assert error["code"] == "111" @pytest.mark.parametrize( @@ -118,7 +130,10 @@ def test_private_dataset_no_user_no_access( # New response is 403: Forbidden instead of 412: PRECONDITION FAILED assert response.status_code == HTTPStatus.FORBIDDEN - assert response.json()["detail"] == {"code": "112", "message": "No access granted"} + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == DatasetNoAccessError.uri + assert error["code"] == "112" @pytest.mark.parametrize( @@ -184,9 +199,21 @@ def test_dataset_tag_response_is_identical( json={"data_id": dataset_id, "tag": tag}, ) + # RFC 9457: Tag conflict now returns 409 instead of 500 + if original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged: + assert new.status_code == HTTPStatus.CONFLICT + assert new.headers["content-type"] == "application/problem+json" + error = new.json() + assert error["type"] == TagAlreadyExistsError.uri + assert error["code"] == "473" + return + assert original.status_code == new.status_code, original.json() if new.status_code != HTTPStatus.OK: - assert original.json()["error"] == new.json()["detail"] + # RFC 9457: Python API now returns problem+json format + assert new.headers["content-type"] == "application/problem+json" + # Both APIs should error in the same cases + assert "error" in original.json() return original = original.json() @@ -209,9 +236,14 @@ def test_datasets_feature_is_identical( assert response.status_code == original.status_code if response.status_code != HTTPStatus.OK: - error = response.json()["detail"] - error["code"] = str(error["code"]) - assert error == original.json()["error"] + # RFC 9457: Python API now returns problem+json format + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + # Verify Python API returns properly typed RFC 9457 response + assert "type" in error + assert "status" in error + # Both APIs should error in the same cases + assert "error" in original.json() return python_body = response.json() diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py index 674bc43..5b82426 100644 --- a/tests/routers/openml/migration/flows_migration_test.py +++ b/tests/routers/openml/migration/flows_migration_test.py @@ -10,6 +10,7 @@ nested_remove_single_element_list, nested_str_to_num, ) +from core.errors import FlowNotFoundError from tests.conftest import Flow @@ -27,7 +28,11 @@ def test_flow_exists_not( expect_php = {"flow_exists": {"exists": "false", "id": str(-1)}} assert php_response.json() == expect_php - assert py_response.json() == {"detail": "Flow not found."} + # RFC 9457: Python API now returns problem+json format + assert py_response.headers["content-type"] == "application/problem+json" + error = py_response.json() + assert error["type"] == FlowNotFoundError.uri + assert error["detail"] == "Flow not found." @pytest.mark.mut diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py index eed569e..54cf984 100644 --- a/tests/routers/openml/qualities_test.py +++ b/tests/routers/openml/qualities_test.py @@ -6,6 +6,8 @@ from sqlalchemy import Connection, text from starlette.testclient import TestClient +from core.errors import DatasetNotFoundError + def _remove_quality_from_database(quality_name: str, expdb_test: Connection) -> None: expdb_test.execute( @@ -313,6 +315,9 @@ def test_get_quality_identical_error( php_response = php_api.get(f"/data/qualities/{data_id}") python_response = py_api.get(f"/datasets/qualities/{data_id}") assert python_response.status_code == php_response.status_code - # The "dataset unknown" error currently has a separate code in PHP depending on - # where it occurs (e.g., get dataset->113 get quality->361) - assert python_response.json()["detail"]["message"] == php_response.json()["error"]["message"] + # RFC 9457: Python API now returns problem+json format + assert python_response.headers["content-type"] == "application/problem+json" + error = python_response.json() + assert error["type"] == DatasetNotFoundError.uri + # Verify the error message matches the PHP API semantically + assert "Unknown dataset" in error["detail"] diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py index a9a8ed4..ed7018f 100644 --- a/tests/routers/openml/study_test.py +++ b/tests/routers/openml/study_test.py @@ -5,6 +5,7 @@ from sqlalchemy import Connection, text from starlette.testclient import TestClient +from core.errors import StudyConflictError from schemas.study import StudyType from tests.users import ApiKey @@ -556,7 +557,10 @@ def test_attach_task_to_study_already_linked_raises( expdb_test=expdb_test, ) assert response.status_code == HTTPStatus.CONFLICT, response.content - assert response.json() == {"detail": "Task 1 is already attached to study 1."} + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == StudyConflictError.uri + assert "Task 1 is already attached to study 1" in error["detail"] def test_attach_task_to_study_but_task_not_exist_raises( @@ -572,4 +576,7 @@ def test_attach_task_to_study_but_task_not_exist_raises( expdb_test=expdb_test, ) assert response.status_code == HTTPStatus.CONFLICT - assert response.json() == {"detail": "One or more of the tasks do not exist."} + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == StudyConflictError.uri + assert "do not exist" in error["detail"] diff --git a/tests/routers/openml/task_type_test.py b/tests/routers/openml/task_type_test.py index d14929c..a8e897e 100644 --- a/tests/routers/openml/task_type_test.py +++ b/tests/routers/openml/task_type_test.py @@ -5,6 +5,8 @@ import pytest from starlette.testclient import TestClient +from core.errors import TaskTypeNotFoundError + def test_list_task_type(py_api: TestClient, php_api: httpx.Client) -> None: response = py_api.get("/tasktype/list") @@ -36,5 +38,9 @@ def test_get_task_type(ttype_id: int, py_api: TestClient, php_api: httpx.Client) def test_get_task_type_unknown(py_api: TestClient) -> None: response = py_api.get("/tasktype/1000") - assert response.status_code == HTTPStatus.PRECONDITION_FAILED - assert response.json() == {"detail": {"code": "241", "message": "Unknown task type."}} + assert response.status_code == HTTPStatus.NOT_FOUND + assert response.headers["content-type"] == "application/problem+json" + error = response.json() + assert error["type"] == TaskTypeNotFoundError.uri + assert error["code"] == "241" + assert "Unknown task type" in error["detail"]