From 855fa05e5d84f2e37a46453a272d54cd08bf805d Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Feb 2026 10:36:31 +0200
Subject: [PATCH 1/9] Allow more lenient api key through configuration
---
src/database/users.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/src/database/users.py b/src/database/users.py
index b439be7..220c34a 100644
--- a/src/database/users.py
+++ b/src/database/users.py
@@ -7,7 +7,6 @@
from config import load_configuration
-# Enforces str is 32 hexadecimal characters, does not check validity.
# If `allow_test_api_keys` is set, the key may also be one of `normaluser`,
# `normaluser2`, or `abc` (admin).
api_key_pattern = r"^[0-9a-fA-F]{32}$"
From 5c551c7e4a687c98fbea30798bf620e4e5612976 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 11 Feb 2026 13:42:44 +0200
Subject: [PATCH 2/9] Added RFC with some failing linting/type checks
---
src/core/errors.py | 215 ++++++++++++++++++
src/core/formatting.py | 6 -
src/main.py | 3 +
src/routers/mldcat_ap/dataset.py | 10 +-
src/routers/openml/datasets.py | 133 ++++++-----
src/routers/openml/flows.py | 12 +-
src/routers/openml/qualities.py | 12 +-
src/routers/openml/study.py | 52 +++--
src/routers/openml/tasks.py | 14 +-
src/routers/openml/tasktype.py | 13 +-
tests/routers/openml/dataset_tag_test.py | 24 +-
.../openml/datasets_list_datasets_test.py | 26 ++-
tests/routers/openml/datasets_test.py | 24 +-
tests/routers/openml/flows_test.py | 11 +-
.../migration/datasets_migration_test.py | 42 +++-
.../openml/migration/flows_migration_test.py | 7 +-
tests/routers/openml/qualities_test.py | 11 +-
tests/routers/openml/study_test.py | 11 +-
tests/routers/openml/task_type_test.py | 10 +-
19 files changed, 497 insertions(+), 139 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index 840cd75..a77e3e8 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -1,7 +1,222 @@
+"""RFC 9457 Problem Details for HTTP APIs.
+
+This module provides RFC 9457 compliant error handling for the OpenML REST API.
+See: https://www.rfc-editor.org/rfc/rfc9457.html
+"""
+
from enum import IntEnum
+from http import HTTPStatus
+from typing import NoReturn
+
+from fastapi import Request
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, ConfigDict, Field
+
+# JSON-serializable extension value type for RFC 9457 problem details
+type ExtensionValue = str | int | float | bool | None | list[str] | list[int]
class DatasetError(IntEnum):
NOT_FOUND = 111
NO_ACCESS = 112
NO_DATA_FILE = 113
+
+
+class ProblemDetail(BaseModel):
+ """RFC 9457 Problem Details model.
+
+ All fields are optional per the specification, but `type` defaults to "about:blank"
+ when not provided. The `status` field is advisory and should match the HTTP status code.
+ """
+
+ model_config = ConfigDict(populate_by_name=True)
+
+ type_: str = Field(
+ default="about:blank",
+ alias="type",
+ serialization_alias="type",
+ description="A URI reference identifying the problem type. Defaults to 'about:blank'.",
+ )
+ title: str | None = Field(
+ default=None,
+ description="A short, human-readable summary of the problem type.",
+ )
+ status: int | None = Field(
+ default=None,
+ description="The HTTP status code. Advisory only, should match the actual status.",
+ )
+ detail: str | None = Field(
+ default=None,
+ description="A human-readable explanation specific to this occurrence of the problem.",
+ )
+ instance: str | None = Field(
+ default=None,
+ description="A URI reference identifying this specific occurrence of the problem.",
+ )
+
+
+class ProblemDetailError(Exception):
+ """Exception that produces RFC 9457 compliant error responses.
+
+ Usage:
+ raise ProblemDetailException(
+ status_code=HTTPStatus.NOT_FOUND,
+ detail="Dataset 123 was not found.",
+ title="Dataset Not Found",
+ type_="https://openml.org/problems/dataset-not-found",
+ code="111", # Extension field for legacy error codes
+ )
+ """
+
+ def __init__(
+ self,
+ status_code: HTTPStatus | int,
+ detail: str | None = None,
+ title: str | None = None,
+ type_: str = "about:blank",
+ instance: str | None = None,
+ **extensions: ExtensionValue,
+ ) -> None:
+ self.status_code = int(status_code)
+ self.problem = ProblemDetail(
+ type_=type_,
+ title=title,
+ status=self.status_code,
+ detail=detail,
+ instance=instance,
+ )
+ self.extensions = extensions
+ super().__init__(detail or title or "An error occurred")
+
+
+def problem_detail_exception_handler(
+ request: Request, # noqa: ARG001
+ exc: ProblemDetailError,
+) -> JSONResponse:
+ """FastAPI exception handler for ProblemDetailException.
+
+ Returns a response with:
+ - Content-Type: application/problem+json
+ - RFC 9457 compliant JSON body
+ """
+ content = exc.problem.model_dump(by_alias=True, exclude_none=True)
+ content.update(exc.extensions)
+
+ return JSONResponse(
+ status_code=exc.status_code,
+ content=content,
+ media_type="application/problem+json",
+ )
+
+
+# Problem type URIs for OpenML-specific errors
+# These should be documented at the corresponding URLs
+class ProblemType:
+ """Problem type URIs for common OpenML errors."""
+
+ # Dataset errors
+ DATASET_NOT_FOUND = "https://openml.org/problems/dataset-not-found"
+ DATASET_NO_ACCESS = "https://openml.org/problems/dataset-no-access"
+ DATASET_NO_DATA_FILE = "https://openml.org/problems/dataset-no-data-file"
+ DATASET_NOT_PROCESSED = "https://openml.org/problems/dataset-not-processed"
+ DATASET_PROCESSING_ERROR = "https://openml.org/problems/dataset-processing-error"
+ DATASET_NO_FEATURES = "https://openml.org/problems/dataset-no-features"
+ DATASET_STATUS_TRANSITION = "https://openml.org/problems/dataset-status-transition"
+ DATASET_NOT_OWNED = "https://openml.org/problems/dataset-not-owned"
+ DATASET_ADMIN_ONLY = "https://openml.org/problems/dataset-admin-only"
+
+ # Authentication/Authorization errors
+ AUTHENTICATION_REQUIRED = "https://openml.org/problems/authentication-required"
+ AUTHENTICATION_FAILED = "https://openml.org/problems/authentication-failed"
+ FORBIDDEN = "https://openml.org/problems/forbidden"
+
+ # Tag errors
+ TAG_ALREADY_EXISTS = "https://openml.org/problems/tag-already-exists"
+
+ # Search/List errors
+ NO_RESULTS = "https://openml.org/problems/no-results"
+
+ # Study errors
+ STUDY_NOT_FOUND = "https://openml.org/problems/study-not-found"
+ STUDY_PRIVATE = "https://openml.org/problems/study-private"
+ STUDY_LEGACY = "https://openml.org/problems/study-legacy"
+ STUDY_ALIAS_EXISTS = "https://openml.org/problems/study-alias-exists"
+ STUDY_INVALID_TYPE = "https://openml.org/problems/study-invalid-type"
+ STUDY_NOT_EDITABLE = "https://openml.org/problems/study-not-editable"
+ STUDY_CONFLICT = "https://openml.org/problems/study-conflict"
+
+ # Task errors
+ TASK_NOT_FOUND = "https://openml.org/problems/task-not-found"
+ TASK_TYPE_NOT_FOUND = "https://openml.org/problems/task-type-not-found"
+
+ # Flow errors
+ FLOW_NOT_FOUND = "https://openml.org/problems/flow-not-found"
+
+ # Service errors
+ SERVICE_NOT_FOUND = "https://openml.org/problems/service-not-found"
+
+ # Internal errors
+ INTERNAL_ERROR = "https://openml.org/problems/internal-error"
+
+
+# Human-readable titles for problem types
+PROBLEM_TITLES: dict[str, str] = {
+ ProblemType.DATASET_NOT_FOUND: "Dataset Not Found",
+ ProblemType.DATASET_NO_ACCESS: "Dataset Access Denied",
+ ProblemType.DATASET_NO_DATA_FILE: "Dataset Data File Missing",
+ ProblemType.DATASET_NOT_PROCESSED: "Dataset Not Processed",
+ ProblemType.DATASET_PROCESSING_ERROR: "Dataset Processing Error",
+ ProblemType.DATASET_NO_FEATURES: "Dataset Features Not Available",
+ ProblemType.DATASET_STATUS_TRANSITION: "Invalid Status Transition",
+ ProblemType.DATASET_NOT_OWNED: "Dataset Not Owned",
+ ProblemType.DATASET_ADMIN_ONLY: "Administrator Only",
+ ProblemType.AUTHENTICATION_REQUIRED: "Authentication Required",
+ ProblemType.AUTHENTICATION_FAILED: "Authentication Failed",
+ ProblemType.FORBIDDEN: "Forbidden",
+ ProblemType.TAG_ALREADY_EXISTS: "Tag Already Exists",
+ ProblemType.NO_RESULTS: "No Results Found",
+ ProblemType.STUDY_NOT_FOUND: "Study Not Found",
+ ProblemType.STUDY_PRIVATE: "Study Is Private",
+ ProblemType.STUDY_LEGACY: "Legacy Study Not Supported",
+ ProblemType.STUDY_ALIAS_EXISTS: "Study Alias Already Exists",
+ ProblemType.STUDY_INVALID_TYPE: "Invalid Study Type",
+ ProblemType.STUDY_NOT_EDITABLE: "Study Not Editable",
+ ProblemType.STUDY_CONFLICT: "Study Conflict",
+ ProblemType.TASK_NOT_FOUND: "Task Not Found",
+ ProblemType.TASK_TYPE_NOT_FOUND: "Task Type Not Found",
+ ProblemType.FLOW_NOT_FOUND: "Flow Not Found",
+ ProblemType.SERVICE_NOT_FOUND: "Service Not Found",
+ ProblemType.INTERNAL_ERROR: "Internal Server Error",
+}
+
+
+def raise_problem(
+ status_code: HTTPStatus | int,
+ type_: str,
+ detail: str,
+ *,
+ instance: str | None = None,
+ code: int | str | None = None,
+ **extensions: ExtensionValue,
+) -> NoReturn:
+ """Helper function to raise RFC 9457 compliant errors.
+
+ Args:
+ status_code: HTTP status code for the response.
+ type_: Problem type URI identifying the error class.
+ detail: Human-readable explanation of this specific error occurrence.
+ instance: Optional URI identifying this specific error occurrence.
+ code: Optional legacy OpenML error code (for backwards compatibility).
+ **extensions: Additional extension fields to include in the response.
+ """
+ title = PROBLEM_TITLES.get(type_)
+ if code is not None:
+ extensions["code"] = str(code)
+ raise ProblemDetailError(
+ status_code=status_code,
+ detail=detail,
+ title=title,
+ type_=type_,
+ instance=instance,
+ **extensions,
+ )
diff --git a/src/core/formatting.py b/src/core/formatting.py
index 174261f..f954e81 100644
--- a/src/core/formatting.py
+++ b/src/core/formatting.py
@@ -3,7 +3,6 @@
from sqlalchemy.engine import Row
from config import load_routing_configuration
-from core.errors import DatasetError
from schemas.datasets.openml import DatasetFileFormat
@@ -16,11 +15,6 @@ def _str_to_bool(string: str) -> bool:
raise ValueError(msg)
-def _format_error(*, code: DatasetError, message: str) -> dict[str, str]:
- """Formatter for JSON bodies of OpenML error codes."""
- return {"code": str(code), "message": message}
-
-
def _format_parquet_url(dataset: Row) -> str | None:
if dataset.format.lower() != DatasetFileFormat.ARFF:
return None
diff --git a/src/main.py b/src/main.py
index d8e61b3..e3fe6e0 100644
--- a/src/main.py
+++ b/src/main.py
@@ -4,6 +4,7 @@
from fastapi import FastAPI
from config import load_configuration
+from core.errors import ProblemDetailError, problem_detail_exception_handler
from routers.mldcat_ap.dataset import router as mldcat_ap_router
from routers.openml.datasets import router as datasets_router
from routers.openml.estimation_procedure import router as estimationprocedure_router
@@ -45,6 +46,8 @@ def create_api() -> FastAPI:
fastapi_kwargs = load_configuration()["fastapi"]
app = FastAPI(**fastapi_kwargs)
+ app.add_exception_handler(ProblemDetailError, problem_detail_exception_handler) # type: ignore[arg-type]
+
app.include_router(datasets_router)
app.include_router(qualities_router)
app.include_router(mldcat_ap_router)
diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py
index db34e5c..eaa5652 100644
--- a/src/routers/mldcat_ap/dataset.py
+++ b/src/routers/mldcat_ap/dataset.py
@@ -4,12 +4,14 @@
Specific queries could be written to fetch e.g., a single feature or quality.
"""
+from http import HTTPStatus
from typing import Annotated
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends
from sqlalchemy import Connection
import config
+from core.errors import ProblemType, raise_problem
from database.users import User
from routers.dependencies import expdb_connection, fetch_user, userdb_connection
from routers.openml.datasets import get_dataset, get_dataset_features
@@ -121,7 +123,11 @@ def get_mldcat_ap_distribution(
)
def get_dataservice(service_id: int) -> JsonLDGraph:
if service_id != 1:
- raise HTTPException(status_code=404, detail="Service not found.")
+ raise_problem(
+ status_code=HTTPStatus.NOT_FOUND,
+ type_=ProblemType.SERVICE_NOT_FOUND,
+ detail="Service not found.",
+ )
return JsonLDGraph(
context="https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/context.jsonld",
graph=[
diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py
index dda2511..b2ad65d 100644
--- a/src/routers/openml/datasets.py
+++ b/src/routers/openml/datasets.py
@@ -4,18 +4,17 @@
from http import HTTPStatus
from typing import Annotated, Any, Literal, NamedTuple
-from fastapi import APIRouter, Body, Depends, HTTPException
+from fastapi import APIRouter, Body, Depends
from sqlalchemy import Connection, text
from sqlalchemy.engine import Row
import database.datasets
import database.qualities
from core.access import _user_has_access
-from core.errors import DatasetError
+from core.errors import DatasetError, ProblemType, raise_problem
from core.formatting import (
_csv_as_list,
_format_dataset_url,
- _format_error,
_format_parquet_url,
)
from database.users import User, UserGroup
@@ -37,10 +36,20 @@ def tag_dataset(
) -> dict[str, dict[str, Any]]:
tags = database.datasets.get_tags_for(data_id, expdb_db)
if tag.casefold() in [t.casefold() for t in tags]:
- raise create_tag_exists_error(data_id, tag)
+ raise_problem(
+ status_code=HTTPStatus.CONFLICT,
+ type_=ProblemType.TAG_ALREADY_EXISTS,
+ detail=f"Entity already tagged by this tag. id={data_id}; tag={tag}",
+ code=473,
+ )
if user is None:
- raise create_authentication_failed_error()
+ raise_problem(
+ status_code=HTTPStatus.UNAUTHORIZED,
+ type_=ProblemType.AUTHENTICATION_FAILED,
+ detail="Authentication failed.",
+ code=103,
+ )
database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db)
return {
@@ -48,24 +57,6 @@ def tag_dataset(
}
-def create_authentication_failed_error() -> HTTPException:
- return HTTPException(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- detail={"code": "103", "message": "Authentication failed"},
- )
-
-
-def create_tag_exists_error(data_id: int, tag: str) -> HTTPException:
- return HTTPException(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail={
- "code": "473",
- "message": "Entity already tagged by this tag.",
- "additional_information": f"id={data_id}; tag={tag}",
- },
- )
-
-
class DatasetStatusFilter(StrEnum):
ACTIVE = DatasetStatus.ACTIVE
DEACTIVATED = DatasetStatus.DEACTIVATED
@@ -203,10 +194,12 @@ def quality_clause(quality: str, range_: str | None) -> str:
row.did: dict(zip(columns, row, strict=True)) for row in rows
}
if not datasets:
- raise HTTPException(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- detail={"code": "372", "message": "No results"},
- ) from None
+ raise_problem(
+ status_code=HTTPStatus.NOT_FOUND,
+ type_=ProblemType.NO_RESULTS,
+ detail="No datasets match the search criteria.",
+ code=372,
+ )
for dataset in datasets.values():
# The old API does not actually provide the checksum but just an empty field
@@ -266,15 +259,23 @@ def _get_dataset_raise_otherwise(
) -> Row:
"""Fetches the dataset from the database if it exists and the user has permissions.
- Raises HTTPException if the dataset does not exist or the user can not access it.
+ Raises ProblemDetailException if the dataset does not exist or the user can not access it.
"""
if not (dataset := database.datasets.get(dataset_id, expdb)):
- error = _format_error(code=DatasetError.NOT_FOUND, message="Unknown dataset")
- raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=error)
+ raise_problem(
+ status_code=HTTPStatus.NOT_FOUND,
+ type_=ProblemType.DATASET_NOT_FOUND,
+ detail="Unknown dataset.",
+ code=DatasetError.NOT_FOUND,
+ )
if not _user_has_access(dataset=dataset, user=user):
- error = _format_error(code=DatasetError.NO_ACCESS, message="No access granted")
- raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail=error)
+ raise_problem(
+ status_code=HTTPStatus.FORBIDDEN,
+ type_=ProblemType.DATASET_NO_ACCESS,
+ detail="No access granted.",
+ code=DatasetError.NO_ACCESS,
+ )
return dataset
@@ -297,21 +298,32 @@ def get_dataset_features(
if not features:
processing_state = database.datasets.get_latest_processing_update(dataset_id, expdb)
if processing_state is None:
- code, msg = (
- 273,
- "Dataset not processed yet. The dataset was not processed yet, features are not yet available. Please wait for a few minutes.", # noqa: E501
+ raise_problem(
+ status_code=HTTPStatus.PRECONDITION_FAILED,
+ type_=ProblemType.DATASET_NOT_PROCESSED,
+ detail=(
+ "Dataset not processed yet. The dataset was not processed yet, "
+ "features are not yet available. Please wait for a few minutes."
+ ),
+ code=273,
)
elif processing_state.error:
- code, msg = 274, "No features found. Additionally, dataset processed with error"
+ raise_problem(
+ status_code=HTTPStatus.PRECONDITION_FAILED,
+ type_=ProblemType.DATASET_PROCESSING_ERROR,
+ detail="No features found. Additionally, dataset processed with error.",
+ code=274,
+ )
else:
- code, msg = (
- 272,
- "No features found. The dataset did not contain any features, or we could not extract them.", # noqa: E501
+ raise_problem(
+ status_code=HTTPStatus.PRECONDITION_FAILED,
+ type_=ProblemType.DATASET_NO_FEATURES,
+ detail=(
+ "No features found. "
+ "The dataset did not contain any features, or we could not extract them."
+ ),
+ code=272,
)
- raise HTTPException(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- detail={"code": code, "message": msg},
- )
return features
@@ -325,30 +337,37 @@ def update_dataset_status(
expdb: Annotated[Connection, Depends(expdb_connection)],
) -> dict[str, str | int]:
if user is None:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.UNAUTHORIZED,
- detail="Updating dataset status required authorization",
+ type_=ProblemType.AUTHENTICATION_REQUIRED,
+ detail="Updating dataset status requires authentication.",
)
dataset = _get_dataset_raise_otherwise(dataset_id, user, expdb)
can_deactivate = dataset.uploader == user.user_id or UserGroup.ADMIN in user.groups
if status == DatasetStatus.DEACTIVATED and not can_deactivate:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.FORBIDDEN,
- detail={"code": 693, "message": "Dataset is not owned by you"},
+ type_=ProblemType.DATASET_NOT_OWNED,
+ detail="Dataset is not owned by you.",
+ code=693,
)
if status == DatasetStatus.ACTIVE and UserGroup.ADMIN not in user.groups:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.FORBIDDEN,
- detail={"code": 696, "message": "Only administrators can activate datasets."},
+ type_=ProblemType.DATASET_ADMIN_ONLY,
+ detail="Only administrators can activate datasets.",
+ code=696,
)
current_status = database.datasets.get_status(dataset_id, expdb)
if current_status and current_status.status == status:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.PRECONDITION_FAILED,
- detail={"code": 694, "message": "Illegal status transition."},
+ type_=ProblemType.DATASET_STATUS_TRANSITION,
+ detail="Illegal status transition.",
+ code=694,
)
# If current status is unknown, it is effectively "in preparation",
@@ -362,9 +381,10 @@ def update_dataset_status(
elif current_status.status == DatasetStatus.DEACTIVATED:
database.datasets.remove_deactivated_status(dataset_id, expdb)
else:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail={"message": f"Unknown status transition: {current_status} -> {status}"},
+ type_=ProblemType.INTERNAL_ERROR,
+ detail=f"Unknown status transition: {current_status} -> {status}",
)
return {"dataset_id": dataset_id, "status": status}
@@ -384,11 +404,12 @@ def get_dataset(
if not (
dataset_file := database.datasets.get_file(file_id=dataset.file_id, connection=user_db)
):
- error = _format_error(
+ raise_problem(
+ status_code=HTTPStatus.PRECONDITION_FAILED,
+ type_=ProblemType.DATASET_NO_DATA_FILE,
+ detail="No data file found.",
code=DatasetError.NO_DATA_FILE,
- message="No data file found",
)
- raise HTTPException(status_code=HTTPStatus.PRECONDITION_FAILED, detail=error)
tags = database.datasets.get_tags_for(dataset_id, expdb_db)
description = database.datasets.get_description(dataset_id, expdb_db)
diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py
index cb6df5d..afd7e2a 100644
--- a/src/routers/openml/flows.py
+++ b/src/routers/openml/flows.py
@@ -1,11 +1,12 @@
from http import HTTPStatus
from typing import Annotated, Literal
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends
from sqlalchemy import Connection
import database.flows
from core.conversions import _str_to_num
+from core.errors import ProblemType, raise_problem
from routers.dependencies import expdb_connection
from schemas.flows import Flow, Parameter, Subflow
@@ -21,8 +22,9 @@ def flow_exists(
"""Check if a Flow with the name and version exists, if so, return the flow id."""
flow = database.flows.get_by_name(name=name, external_version=external_version, expdb=expdb)
if flow is None:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.NOT_FOUND,
+ type_=ProblemType.FLOW_NOT_FOUND,
detail="Flow not found.",
)
return {"flow_id": flow.id}
@@ -32,7 +34,11 @@ def flow_exists(
def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection)] = None) -> Flow:
flow = database.flows.get(flow_id, expdb)
if not flow:
- raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Flow not found")
+ raise_problem(
+ status_code=HTTPStatus.NOT_FOUND,
+ type_=ProblemType.FLOW_NOT_FOUND,
+ detail="Flow not found.",
+ )
parameter_rows = database.flows.get_parameters(flow_id, expdb)
parameters = [
diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py
index 54181f8..77bf64b 100644
--- a/src/routers/openml/qualities.py
+++ b/src/routers/openml/qualities.py
@@ -1,13 +1,13 @@
from http import HTTPStatus
from typing import Annotated, Literal
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends
from sqlalchemy import Connection
import database.datasets
import database.qualities
from core.access import _user_has_access
-from core.errors import DatasetError
+from core.errors import DatasetError, ProblemType, raise_problem
from database.users import User
from routers.dependencies import expdb_connection, fetch_user
from schemas.datasets.openml import Quality
@@ -35,10 +35,12 @@ def get_qualities(
) -> list[Quality]:
dataset = database.datasets.get(dataset_id, expdb)
if not dataset or not _user_has_access(dataset, user):
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.PRECONDITION_FAILED,
- detail={"code": DatasetError.NO_DATA_FILE, "message": "Unknown dataset"},
- ) from None
+ type_=ProblemType.DATASET_NOT_FOUND,
+ detail="Unknown dataset.",
+ code=DatasetError.NO_DATA_FILE,
+ )
return database.qualities.get_for_dataset(dataset_id, expdb)
# The PHP API provided (sometime) helpful error messages
# if not qualities:
diff --git a/src/routers/openml/study.py b/src/routers/openml/study.py
index 6fe1dcc..0ff49e8 100644
--- a/src/routers/openml/study.py
+++ b/src/routers/openml/study.py
@@ -1,11 +1,12 @@
from http import HTTPStatus
from typing import Annotated, Literal
-from fastapi import APIRouter, Body, Depends, HTTPException
+from fastapi import APIRouter, Body, Depends
from pydantic import BaseModel
from sqlalchemy import Connection, Row
import database.studies
+from core.errors import ProblemType, raise_problem
from core.formatting import _str_to_bool
from database.users import User, UserGroup
from routers.dependencies import expdb_connection, fetch_user
@@ -22,19 +23,29 @@ def _get_study_raise_otherwise(id_or_alias: int | str, user: User | None, expdb:
study = database.studies.get_by_alias(id_or_alias, expdb)
if study is None:
- raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Study not found.")
+ raise_problem(
+ status_code=HTTPStatus.NOT_FOUND,
+ type_=ProblemType.STUDY_NOT_FOUND,
+ detail="Study not found.",
+ )
if study.visibility == Visibility.PRIVATE:
if user is None:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.UNAUTHORIZED,
+ type_=ProblemType.AUTHENTICATION_REQUIRED,
detail="Must authenticate for private study.",
)
if study.creator != user.user_id and UserGroup.ADMIN not in user.groups:
- raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail="Study is private.")
+ raise_problem(
+ status_code=HTTPStatus.FORBIDDEN,
+ type_=ProblemType.STUDY_PRIVATE,
+ detail="Study is private.",
+ )
if _str_to_bool(study.legacy):
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.GONE,
- detail="Legacy studies are no longer supported",
+ type_=ProblemType.STUDY_LEGACY,
+ detail="Legacy studies are no longer supported.",
)
return study
@@ -52,17 +63,23 @@ def attach_to_study(
expdb: Annotated[Connection, Depends(expdb_connection)] = None,
) -> AttachDetachResponse:
if user is None:
- raise HTTPException(status_code=HTTPStatus.UNAUTHORIZED, detail="User not found.")
+ raise_problem(
+ status_code=HTTPStatus.UNAUTHORIZED,
+ type_=ProblemType.AUTHENTICATION_REQUIRED,
+ detail="Authentication required.",
+ )
study = _get_study_raise_otherwise(study_id, user, expdb)
# PHP lets *anyone* edit *any* study. We're not going to do that.
if study.creator != user.user_id and UserGroup.ADMIN not in user.groups:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.FORBIDDEN,
+ type_=ProblemType.STUDY_NOT_EDITABLE,
detail="Study can only be edited by its creator.",
)
if study.status != StudyStatus.IN_PREPARATION:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.FORBIDDEN,
+ type_=ProblemType.STUDY_NOT_EDITABLE,
detail="Study can only be edited while in preparation.",
)
@@ -79,10 +96,11 @@ def attach_to_study(
else:
database.studies.attach_runs(run_ids=entity_ids, **attach_kwargs)
except ValueError as e:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.CONFLICT,
+ type_=ProblemType.STUDY_CONFLICT,
detail=str(e),
- ) from None
+ )
return AttachDetachResponse(study_id=study_id, main_entity_type=study.type_)
@@ -93,23 +111,27 @@ def create_study(
expdb: Annotated[Connection, Depends(expdb_connection)] = None,
) -> dict[Literal["study_id"], int]:
if user is None:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.UNAUTHORIZED,
+ type_=ProblemType.AUTHENTICATION_REQUIRED,
detail="Creating a study requires authentication.",
)
if study.main_entity_type == StudyType.RUN and study.tasks:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.BAD_REQUEST,
+ type_=ProblemType.STUDY_INVALID_TYPE,
detail="Cannot create a run study with tasks.",
)
if study.main_entity_type == StudyType.TASK and study.runs:
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.BAD_REQUEST,
+ type_=ProblemType.STUDY_INVALID_TYPE,
detail="Cannot create a task study with runs.",
)
if study.alias and database.studies.get_by_alias(study.alias, expdb):
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.CONFLICT,
+ type_=ProblemType.STUDY_ALIAS_EXISTS,
detail="Study alias already exists.",
)
study_id = database.studies.create(study, user, expdb)
diff --git a/src/routers/openml/tasks.py b/src/routers/openml/tasks.py
index 8397f1d..8007020 100644
--- a/src/routers/openml/tasks.py
+++ b/src/routers/openml/tasks.py
@@ -4,12 +4,13 @@
from typing import Annotated, cast
import xmltodict
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends
from sqlalchemy import Connection, RowMapping, text
import config
import database.datasets
import database.tasks
+from core.errors import ProblemType, raise_problem
from routers.dependencies import expdb_connection
from schemas.datasets.openml import Task
@@ -155,11 +156,16 @@ def get_task(
expdb: Annotated[Connection, Depends(expdb_connection)] = None,
) -> Task:
if not (task := database.tasks.get(task_id, expdb)):
- raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail="Task not found")
+ raise_problem(
+ status_code=HTTPStatus.NOT_FOUND,
+ type_=ProblemType.TASK_NOT_FOUND,
+ detail="Task not found.",
+ )
if not (task_type := database.tasks.get_task_type(task.ttid, expdb)):
- raise HTTPException(
+ raise_problem(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- detail="Task type not found",
+ type_=ProblemType.INTERNAL_ERROR,
+ detail="Task type not found.",
)
task_inputs = {
diff --git a/src/routers/openml/tasktype.py b/src/routers/openml/tasktype.py
index 5213f17..9916499 100644
--- a/src/routers/openml/tasktype.py
+++ b/src/routers/openml/tasktype.py
@@ -2,9 +2,10 @@
from http import HTTPStatus
from typing import Annotated, Any, Literal, cast
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends
from sqlalchemy import Connection, Row
+from core.errors import ProblemType, raise_problem
from database.tasks import get_input_for_task_type, get_task_types
from database.tasks import get_task_type as db_get_task_type
from routers.dependencies import expdb_connection
@@ -45,10 +46,12 @@ def get_task_type(
) -> dict[Literal["task_type"], dict[str, str | None | list[str] | list[dict[str, str]]]]:
task_type_record = db_get_task_type(task_type_id, expdb)
if task_type_record is None:
- raise HTTPException(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- detail={"code": "241", "message": "Unknown task type."},
- ) from None
+ raise_problem(
+ status_code=HTTPStatus.NOT_FOUND,
+ type_=ProblemType.TASK_TYPE_NOT_FOUND,
+ detail="Unknown task type.",
+ code=241,
+ )
task_type = _normalize_task_type(task_type_record)
# Some names are quoted, or have typos in their comma-separation (e.g. 'A ,B')
diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py
index 5449862..7147eca 100644
--- a/tests/routers/openml/dataset_tag_test.py
+++ b/tests/routers/openml/dataset_tag_test.py
@@ -4,6 +4,7 @@
from sqlalchemy import Connection
from starlette.testclient import TestClient
+from core.errors import ProblemType
from database.datasets import get_tags_for
from tests import constants
from tests.users import ApiKey
@@ -20,8 +21,11 @@ def test_dataset_tag_rejects_unauthorized(key: ApiKey, py_api: TestClient) -> No
f"/datasets/tag{apikey}",
json={"data_id": next(iter(constants.PRIVATE_DATASET_ID)), "tag": "test"},
)
- assert response.status_code == HTTPStatus.PRECONDITION_FAILED
- assert response.json()["detail"] == {"code": "103", "message": "Authentication failed"}
+ assert response.status_code == HTTPStatus.UNAUTHORIZED
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.AUTHENTICATION_FAILED
+ assert error["code"] == "103"
@pytest.mark.parametrize(
@@ -58,15 +62,13 @@ def test_dataset_tag_fails_if_tag_exists(py_api: TestClient) -> None:
f"/datasets/tag?api_key={ApiKey.ADMIN}",
json={"data_id": dataset_id, "tag": tag},
)
- assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR
- expected = {
- "detail": {
- "code": "473",
- "message": "Entity already tagged by this tag.",
- "additional_information": f"id={dataset_id}; tag={tag}",
- },
- }
- assert expected == response.json()
+ assert response.status_code == HTTPStatus.CONFLICT
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.TAG_ALREADY_EXISTS
+ assert error["code"] == "473"
+ assert f"id={dataset_id}" in error["detail"]
+ assert f"tag={tag}" in error["detail"]
@pytest.mark.parametrize(
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index e1ff17b..78f2d42 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -8,6 +8,7 @@
from hypothesis import strategies as st
from starlette.testclient import TestClient
+from core.errors import ProblemType
from tests import constants
from tests.users import ApiKey
@@ -15,8 +16,11 @@
def _assert_empty_result(
response: httpx.Response,
) -> None:
- assert response.status_code == HTTPStatus.PRECONDITION_FAILED
- assert response.json()["detail"] == {"code": "372", "message": "No results"}
+ assert response.status_code == HTTPStatus.NOT_FOUND
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.NO_RESULTS
+ assert error["code"] == "372"
def test_list(py_api: TestClient) -> None:
@@ -283,9 +287,21 @@ def test_list_data_identical(
uri += api_key_query
original = php_api.get(uri)
- assert original.status_code == response.status_code, response.json()
- if original.status_code == HTTPStatus.PRECONDITION_FAILED:
- assert original.json()["error"] == response.json()["detail"]
+ # Note: RFC 9457 changed some status codes (PRECONDITION_FAILED -> NOT_FOUND for no results)
+ # and the error response format, so we can't compare error responses directly.
+ php_is_error = original.status_code == HTTPStatus.PRECONDITION_FAILED
+ py_is_error = response.status_code == HTTPStatus.NOT_FOUND
+
+ if php_is_error or py_is_error:
+ # Both should be errors in the same cases
+ assert php_is_error == py_is_error, (
+ f"PHP status={original.status_code}, Python status={response.status_code}"
+ )
+ # Verify Python API returns RFC 9457 format
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.NO_RESULTS
+ assert error["code"] == "372"
return None
new_json = response.json()
# Qualities in new response are typed
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index 4ba5ad8..a1395f1 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -1,10 +1,10 @@
from http import HTTPStatus
import pytest
-from fastapi import HTTPException
from sqlalchemy import Connection
from starlette.testclient import TestClient
+from core.errors import ProblemDetailError, ProblemType
from database.users import User
from routers.openml.datasets import get_dataset
from schemas.datasets.openml import DatasetMetadata, DatasetStatus
@@ -28,7 +28,13 @@ def test_error_unknown_dataset(
response = py_api.get(f"/datasets/{dataset_id}")
assert response.status_code == response_code
- assert response.json()["detail"] == {"code": "111", "message": "Unknown dataset"}
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.DATASET_NOT_FOUND
+ assert error["title"] == "Dataset Not Found"
+ assert error["status"] == HTTPStatus.NOT_FOUND
+ assert error["detail"] == "Unknown dataset."
+ assert error["code"] == "111"
def test_get_dataset(py_api: TestClient) -> None:
@@ -80,7 +86,7 @@ def test_private_dataset_no_access(
user: User | None,
expdb_test: Connection,
) -> None:
- with pytest.raises(HTTPException) as e:
+ with pytest.raises(ProblemDetailError) as e:
get_dataset(
dataset_id=130,
user=user,
@@ -88,7 +94,8 @@ def test_private_dataset_no_access(
expdb_db=expdb_test,
)
assert e.value.status_code == HTTPStatus.FORBIDDEN
- assert e.value.detail == {"code": "112", "message": "No access granted"} # type: ignore[comparison-overlap]
+ assert e.value.problem.type_ == ProblemType.DATASET_NO_ACCESS
+ assert e.value.extensions.get("code") == "112"
@pytest.mark.parametrize(
@@ -177,10 +184,11 @@ def test_dataset_features_with_processing_error(py_api: TestClient) -> None:
# In that case, no feature information will ever be available.
response = py_api.get("/datasets/features/55")
assert response.status_code == HTTPStatus.PRECONDITION_FAILED
- assert response.json()["detail"] == {
- "code": 274,
- "message": "No features found. Additionally, dataset processed with error",
- }
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.DATASET_PROCESSING_ERROR
+ assert error["code"] == "274"
+ assert "No features found" in error["detail"]
def test_dataset_features_dataset_does_not_exist(py_api: TestClient) -> None:
diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py
index d5188d0..2627053 100644
--- a/tests/routers/openml/flows_test.py
+++ b/tests/routers/openml/flows_test.py
@@ -2,11 +2,11 @@
import deepdiff.diff
import pytest
-from fastapi import HTTPException
from pytest_mock import MockerFixture
from sqlalchemy import Connection
from starlette.testclient import TestClient
+from core.errors import ProblemDetailError, ProblemType
from routers.openml.flows import flow_exists
from tests.conftest import Flow
@@ -53,10 +53,10 @@ def test_flow_exists_processes_found(
def test_flow_exists_handles_flow_not_found(mocker: MockerFixture, expdb_test: Connection) -> None:
mocker.patch("database.flows.get_by_name", return_value=None)
- with pytest.raises(HTTPException) as error:
+ with pytest.raises(ProblemDetailError) as error:
flow_exists("foo", "bar", expdb_test)
assert error.value.status_code == HTTPStatus.NOT_FOUND
- assert error.value.detail == "Flow not found."
+ assert error.value.problem.type_ == ProblemType.FLOW_NOT_FOUND
def test_flow_exists(flow: Flow, py_api: TestClient) -> None:
@@ -68,7 +68,10 @@ def test_flow_exists(flow: Flow, py_api: TestClient) -> None:
def test_flow_exists_not_exists(py_api: TestClient) -> None:
response = py_api.get("/flows/exists/foo/bar")
assert response.status_code == HTTPStatus.NOT_FOUND
- assert response.json()["detail"] == "Flow not found."
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.FLOW_NOT_FOUND
+ assert error["detail"] == "Flow not found."
def test_get_flow_no_subflow(py_api: TestClient) -> None:
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
index 011d8db..1c514f6 100644
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ b/tests/routers/openml/migration/datasets_migration_test.py
@@ -7,6 +7,7 @@
import tests.constants
from core.conversions import nested_remove_single_element_list
+from core.errors import ProblemType
from tests.users import ApiKey
@@ -28,7 +29,10 @@ def test_dataset_response_is_identical( # noqa: C901, PLR0912
assert original.status_code == new.status_code
if new.status_code != HTTPStatus.OK:
- assert original.json()["error"] == new.json()["detail"]
+ # RFC 9457: Python API now returns problem+json format
+ assert new.headers["content-type"] == "application/problem+json"
+ # Both APIs should return error responses in the same cases
+ assert "error" in original.json()
return
try:
@@ -102,7 +106,11 @@ def test_error_unknown_dataset(
# The new API has "404 Not Found" instead of "412 PRECONDITION_FAILED"
assert response.status_code == HTTPStatus.NOT_FOUND
- assert response.json()["detail"] == {"code": "111", "message": "Unknown dataset"}
+ # RFC 9457: Python API now returns problem+json format
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.DATASET_NOT_FOUND
+ assert error["code"] == "111"
@pytest.mark.parametrize(
@@ -118,7 +126,10 @@ def test_private_dataset_no_user_no_access(
# New response is 403: Forbidden instead of 412: PRECONDITION FAILED
assert response.status_code == HTTPStatus.FORBIDDEN
- assert response.json()["detail"] == {"code": "112", "message": "No access granted"}
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.DATASET_NO_ACCESS
+ assert error["code"] == "112"
@pytest.mark.parametrize(
@@ -184,9 +195,21 @@ def test_dataset_tag_response_is_identical(
json={"data_id": dataset_id, "tag": tag},
)
+ # RFC 9457: Tag conflict now returns 409 instead of 500
+ if original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR and already_tagged:
+ assert new.status_code == HTTPStatus.CONFLICT
+ assert new.headers["content-type"] == "application/problem+json"
+ error = new.json()
+ assert error["type"] == ProblemType.TAG_ALREADY_EXISTS
+ assert error["code"] == "473"
+ return
+
assert original.status_code == new.status_code, original.json()
if new.status_code != HTTPStatus.OK:
- assert original.json()["error"] == new.json()["detail"]
+ # RFC 9457: Python API now returns problem+json format
+ assert new.headers["content-type"] == "application/problem+json"
+ # Both APIs should error in the same cases
+ assert "error" in original.json()
return
original = original.json()
@@ -209,9 +232,14 @@ def test_datasets_feature_is_identical(
assert response.status_code == original.status_code
if response.status_code != HTTPStatus.OK:
- error = response.json()["detail"]
- error["code"] = str(error["code"])
- assert error == original.json()["error"]
+ # RFC 9457: Python API now returns problem+json format
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ # Verify Python API returns properly typed RFC 9457 response
+ assert "type" in error
+ assert "status" in error
+ # Both APIs should error in the same cases
+ assert "error" in original.json()
return
python_body = response.json()
diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py
index 674bc43..e3a559d 100644
--- a/tests/routers/openml/migration/flows_migration_test.py
+++ b/tests/routers/openml/migration/flows_migration_test.py
@@ -10,6 +10,7 @@
nested_remove_single_element_list,
nested_str_to_num,
)
+from core.errors import ProblemType
from tests.conftest import Flow
@@ -27,7 +28,11 @@ def test_flow_exists_not(
expect_php = {"flow_exists": {"exists": "false", "id": str(-1)}}
assert php_response.json() == expect_php
- assert py_response.json() == {"detail": "Flow not found."}
+ # RFC 9457: Python API now returns problem+json format
+ assert py_response.headers["content-type"] == "application/problem+json"
+ error = py_response.json()
+ assert error["type"] == ProblemType.FLOW_NOT_FOUND
+ assert error["detail"] == "Flow not found."
@pytest.mark.mut
diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py
index eed569e..814607c 100644
--- a/tests/routers/openml/qualities_test.py
+++ b/tests/routers/openml/qualities_test.py
@@ -6,6 +6,8 @@
from sqlalchemy import Connection, text
from starlette.testclient import TestClient
+from core.errors import ProblemType
+
def _remove_quality_from_database(quality_name: str, expdb_test: Connection) -> None:
expdb_test.execute(
@@ -313,6 +315,9 @@ def test_get_quality_identical_error(
php_response = php_api.get(f"/data/qualities/{data_id}")
python_response = py_api.get(f"/datasets/qualities/{data_id}")
assert python_response.status_code == php_response.status_code
- # The "dataset unknown" error currently has a separate code in PHP depending on
- # where it occurs (e.g., get dataset->113 get quality->361)
- assert python_response.json()["detail"]["message"] == php_response.json()["error"]["message"]
+ # RFC 9457: Python API now returns problem+json format
+ assert python_response.headers["content-type"] == "application/problem+json"
+ error = python_response.json()
+ assert error["type"] == ProblemType.DATASET_NOT_FOUND
+ # Verify the error message matches the PHP API semantically
+ assert "Unknown dataset" in error["detail"]
diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py
index a9a8ed4..b2dd862 100644
--- a/tests/routers/openml/study_test.py
+++ b/tests/routers/openml/study_test.py
@@ -5,6 +5,7 @@
from sqlalchemy import Connection, text
from starlette.testclient import TestClient
+from core.errors import ProblemType
from schemas.study import StudyType
from tests.users import ApiKey
@@ -556,7 +557,10 @@ def test_attach_task_to_study_already_linked_raises(
expdb_test=expdb_test,
)
assert response.status_code == HTTPStatus.CONFLICT, response.content
- assert response.json() == {"detail": "Task 1 is already attached to study 1."}
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.STUDY_CONFLICT
+ assert "Task 1 is already attached to study 1" in error["detail"]
def test_attach_task_to_study_but_task_not_exist_raises(
@@ -572,4 +576,7 @@ def test_attach_task_to_study_but_task_not_exist_raises(
expdb_test=expdb_test,
)
assert response.status_code == HTTPStatus.CONFLICT
- assert response.json() == {"detail": "One or more of the tasks do not exist."}
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.STUDY_CONFLICT
+ assert "do not exist" in error["detail"]
diff --git a/tests/routers/openml/task_type_test.py b/tests/routers/openml/task_type_test.py
index d14929c..95d5c70 100644
--- a/tests/routers/openml/task_type_test.py
+++ b/tests/routers/openml/task_type_test.py
@@ -5,6 +5,8 @@
import pytest
from starlette.testclient import TestClient
+from core.errors import ProblemType
+
def test_list_task_type(py_api: TestClient, php_api: httpx.Client) -> None:
response = py_api.get("/tasktype/list")
@@ -36,5 +38,9 @@ def test_get_task_type(ttype_id: int, py_api: TestClient, php_api: httpx.Client)
def test_get_task_type_unknown(py_api: TestClient) -> None:
response = py_api.get("/tasktype/1000")
- assert response.status_code == HTTPStatus.PRECONDITION_FAILED
- assert response.json() == {"detail": {"code": "241", "message": "Unknown task type."}}
+ assert response.status_code == HTTPStatus.NOT_FOUND
+ assert response.headers["content-type"] == "application/problem+json"
+ error = response.json()
+ assert error["type"] == ProblemType.TASK_TYPE_NOT_FOUND
+ assert error["code"] == "241"
+ assert "Unknown task type" in error["detail"]
From 81b4f60e36ff486eaba561fd8b9dbb8ae2a33d47 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 11 Feb 2026 15:34:06 +0200
Subject: [PATCH 3/9] make access safe even if toml doesn't have dev section
---
src/database/users.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/database/users.py b/src/database/users.py
index 220c34a..1f66640 100644
--- a/src/database/users.py
+++ b/src/database/users.py
@@ -10,7 +10,7 @@
# If `allow_test_api_keys` is set, the key may also be one of `normaluser`,
# `normaluser2`, or `abc` (admin).
api_key_pattern = r"^[0-9a-fA-F]{32}$"
-if load_configuration()["development"].get("allow_test_api_keys"):
+if load_configuration().get("development", {}).get("allow_test_api_keys"):
api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$"
APIKey = Annotated[
From 50ed2357eb80c7f24b560785f816b49cade23365 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Feb 2026 11:00:19 +0200
Subject: [PATCH 4/9] Simplify model definition
---
src/core/errors.py | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index a77e3e8..bd8f799 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -10,7 +10,7 @@
from fastapi import Request
from fastapi.responses import JSONResponse
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field
# JSON-serializable extension value type for RFC 9457 problem details
type ExtensionValue = str | int | float | bool | None | list[str] | list[int]
@@ -29,11 +29,8 @@ class ProblemDetail(BaseModel):
when not provided. The `status` field is advisory and should match the HTTP status code.
"""
- model_config = ConfigDict(populate_by_name=True)
-
type_: str = Field(
default="about:blank",
- alias="type",
serialization_alias="type",
description="A URI reference identifying the problem type. Defaults to 'about:blank'.",
)
From dcc5fcdc9aed8da0279c4e6a8edf41c501296e4a Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 12 Feb 2026 11:14:57 +0200
Subject: [PATCH 5/9] Update name in docstring
---
src/core/errors.py | 4 ++--
src/routers/openml/datasets.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index bd8f799..149e5ed 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -56,7 +56,7 @@ class ProblemDetailError(Exception):
"""Exception that produces RFC 9457 compliant error responses.
Usage:
- raise ProblemDetailException(
+ raise ProblemDetailError(
status_code=HTTPStatus.NOT_FOUND,
detail="Dataset 123 was not found.",
title="Dataset Not Found",
@@ -90,7 +90,7 @@ def problem_detail_exception_handler(
request: Request, # noqa: ARG001
exc: ProblemDetailError,
) -> JSONResponse:
- """FastAPI exception handler for ProblemDetailException.
+ """FastAPI exception handler for ProblemDetailError.
Returns a response with:
- Content-Type: application/problem+json
diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py
index b2ad65d..b18e987 100644
--- a/src/routers/openml/datasets.py
+++ b/src/routers/openml/datasets.py
@@ -259,7 +259,7 @@ def _get_dataset_raise_otherwise(
) -> Row:
"""Fetches the dataset from the database if it exists and the user has permissions.
- Raises ProblemDetailException if the dataset does not exist or the user can not access it.
+ Raises ProblemDetailError if the dataset does not exist or the user can not access it.
"""
if not (dataset := database.datasets.get(dataset_id, expdb)):
raise_problem(
From b6db69076f71ff30d3c8584ce4ddff1dc70fdce5 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 13 Feb 2026 11:06:40 +0200
Subject: [PATCH 6/9] Rewrite errors to separate classes
---
src/core/errors.py | 625 +++++++++++++++++++-------
src/routers/mldcat_ap/dataset.py | 10 +-
src/routers/openml/datasets.py | 142 +++---
src/routers/openml/flows.py | 17 +-
src/routers/openml/qualities.py | 11 +-
src/routers/openml/study.py | 96 ++--
src/routers/openml/tasks.py | 17 +-
src/routers/openml/tasktype.py | 11 +-
tests/routers/openml/datasets_test.py | 8 +-
tests/routers/openml/flows_test.py | 6 +-
10 files changed, 571 insertions(+), 372 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index 149e5ed..8db4965 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -6,14 +6,9 @@
from enum import IntEnum
from http import HTTPStatus
-from typing import NoReturn
from fastapi import Request
from fastapi.responses import JSONResponse
-from pydantic import BaseModel, Field
-
-# JSON-serializable extension value type for RFC 9457 problem details
-type ExtensionValue = str | int | float | bool | None | list[str] | list[int]
class DatasetError(IntEnum):
@@ -22,68 +17,46 @@ class DatasetError(IntEnum):
NO_DATA_FILE = 113
-class ProblemDetail(BaseModel):
- """RFC 9457 Problem Details model.
-
- All fields are optional per the specification, but `type` defaults to "about:blank"
- when not provided. The `status` field is advisory and should match the HTTP status code.
- """
-
- type_: str = Field(
- default="about:blank",
- serialization_alias="type",
- description="A URI reference identifying the problem type. Defaults to 'about:blank'.",
- )
- title: str | None = Field(
- default=None,
- description="A short, human-readable summary of the problem type.",
- )
- status: int | None = Field(
- default=None,
- description="The HTTP status code. Advisory only, should match the actual status.",
- )
- detail: str | None = Field(
- default=None,
- description="A human-readable explanation specific to this occurrence of the problem.",
- )
- instance: str | None = Field(
- default=None,
- description="A URI reference identifying this specific occurrence of the problem.",
- )
+# =============================================================================
+# Base Exception
+# =============================================================================
class ProblemDetailError(Exception):
- """Exception that produces RFC 9457 compliant error responses.
+ """Base exception for RFC 9457 compliant error responses.
- Usage:
- raise ProblemDetailError(
- status_code=HTTPStatus.NOT_FOUND,
- detail="Dataset 123 was not found.",
- title="Dataset Not Found",
- type_="https://openml.org/problems/dataset-not-found",
- code="111", # Extension field for legacy error codes
- )
+ Subclasses should define class attributes:
+ - uri: The problem type URI
+ - title: Human-readable title
+ - status_code: HTTP status code
+
+ The status_code can be overridden per-instance for backwards compatibility.
"""
+ uri: str = "about:blank"
+ title: str = "An error occurred"
+ _default_status_code: HTTPStatus = HTTPStatus.INTERNAL_SERVER_ERROR
+
def __init__(
self,
- status_code: HTTPStatus | int,
- detail: str | None = None,
- title: str | None = None,
- type_: str = "about:blank",
+ detail: str,
+ *,
+ code: int | str | None = None,
instance: str | None = None,
- **extensions: ExtensionValue,
+ status_code: HTTPStatus | None = None,
) -> None:
- self.status_code = int(status_code)
- self.problem = ProblemDetail(
- type_=type_,
- title=title,
- status=self.status_code,
- detail=detail,
- instance=instance,
- )
- self.extensions = extensions
- super().__init__(detail or title or "An error occurred")
+ self.detail = detail
+ self.code = code
+ self.instance = instance
+ self._status_code_override = status_code
+ super().__init__(detail)
+
+ @property
+ def status_code(self) -> HTTPStatus:
+ """Return the status code, preferring instance override over class default."""
+ if self._status_code_override is not None:
+ return self._status_code_override
+ return self._default_status_code
def problem_detail_exception_handler(
@@ -96,124 +69,440 @@ def problem_detail_exception_handler(
- Content-Type: application/problem+json
- RFC 9457 compliant JSON body
"""
- content = exc.problem.model_dump(by_alias=True, exclude_none=True)
- content.update(exc.extensions)
+ content: dict[str, str | int] = {
+ "type": exc.uri,
+ "title": exc.title,
+ "status": int(exc.status_code),
+ "detail": exc.detail,
+ }
+ if exc.code is not None:
+ content["code"] = str(exc.code)
+ if exc.instance is not None:
+ content["instance"] = exc.instance
return JSONResponse(
- status_code=exc.status_code,
+ status_code=int(exc.status_code),
content=content,
media_type="application/problem+json",
)
-# Problem type URIs for OpenML-specific errors
-# These should be documented at the corresponding URLs
-class ProblemType:
- """Problem type URIs for common OpenML errors."""
-
- # Dataset errors
- DATASET_NOT_FOUND = "https://openml.org/problems/dataset-not-found"
- DATASET_NO_ACCESS = "https://openml.org/problems/dataset-no-access"
- DATASET_NO_DATA_FILE = "https://openml.org/problems/dataset-no-data-file"
- DATASET_NOT_PROCESSED = "https://openml.org/problems/dataset-not-processed"
- DATASET_PROCESSING_ERROR = "https://openml.org/problems/dataset-processing-error"
- DATASET_NO_FEATURES = "https://openml.org/problems/dataset-no-features"
- DATASET_STATUS_TRANSITION = "https://openml.org/problems/dataset-status-transition"
- DATASET_NOT_OWNED = "https://openml.org/problems/dataset-not-owned"
- DATASET_ADMIN_ONLY = "https://openml.org/problems/dataset-admin-only"
-
- # Authentication/Authorization errors
- AUTHENTICATION_REQUIRED = "https://openml.org/problems/authentication-required"
- AUTHENTICATION_FAILED = "https://openml.org/problems/authentication-failed"
- FORBIDDEN = "https://openml.org/problems/forbidden"
-
- # Tag errors
- TAG_ALREADY_EXISTS = "https://openml.org/problems/tag-already-exists"
-
- # Search/List errors
- NO_RESULTS = "https://openml.org/problems/no-results"
-
- # Study errors
- STUDY_NOT_FOUND = "https://openml.org/problems/study-not-found"
- STUDY_PRIVATE = "https://openml.org/problems/study-private"
- STUDY_LEGACY = "https://openml.org/problems/study-legacy"
- STUDY_ALIAS_EXISTS = "https://openml.org/problems/study-alias-exists"
- STUDY_INVALID_TYPE = "https://openml.org/problems/study-invalid-type"
- STUDY_NOT_EDITABLE = "https://openml.org/problems/study-not-editable"
- STUDY_CONFLICT = "https://openml.org/problems/study-conflict"
-
- # Task errors
- TASK_NOT_FOUND = "https://openml.org/problems/task-not-found"
- TASK_TYPE_NOT_FOUND = "https://openml.org/problems/task-type-not-found"
-
- # Flow errors
- FLOW_NOT_FOUND = "https://openml.org/problems/flow-not-found"
-
- # Service errors
- SERVICE_NOT_FOUND = "https://openml.org/problems/service-not-found"
-
- # Internal errors
- INTERNAL_ERROR = "https://openml.org/problems/internal-error"
-
-
-# Human-readable titles for problem types
-PROBLEM_TITLES: dict[str, str] = {
- ProblemType.DATASET_NOT_FOUND: "Dataset Not Found",
- ProblemType.DATASET_NO_ACCESS: "Dataset Access Denied",
- ProblemType.DATASET_NO_DATA_FILE: "Dataset Data File Missing",
- ProblemType.DATASET_NOT_PROCESSED: "Dataset Not Processed",
- ProblemType.DATASET_PROCESSING_ERROR: "Dataset Processing Error",
- ProblemType.DATASET_NO_FEATURES: "Dataset Features Not Available",
- ProblemType.DATASET_STATUS_TRANSITION: "Invalid Status Transition",
- ProblemType.DATASET_NOT_OWNED: "Dataset Not Owned",
- ProblemType.DATASET_ADMIN_ONLY: "Administrator Only",
- ProblemType.AUTHENTICATION_REQUIRED: "Authentication Required",
- ProblemType.AUTHENTICATION_FAILED: "Authentication Failed",
- ProblemType.FORBIDDEN: "Forbidden",
- ProblemType.TAG_ALREADY_EXISTS: "Tag Already Exists",
- ProblemType.NO_RESULTS: "No Results Found",
- ProblemType.STUDY_NOT_FOUND: "Study Not Found",
- ProblemType.STUDY_PRIVATE: "Study Is Private",
- ProblemType.STUDY_LEGACY: "Legacy Study Not Supported",
- ProblemType.STUDY_ALIAS_EXISTS: "Study Alias Already Exists",
- ProblemType.STUDY_INVALID_TYPE: "Invalid Study Type",
- ProblemType.STUDY_NOT_EDITABLE: "Study Not Editable",
- ProblemType.STUDY_CONFLICT: "Study Conflict",
- ProblemType.TASK_NOT_FOUND: "Task Not Found",
- ProblemType.TASK_TYPE_NOT_FOUND: "Task Type Not Found",
- ProblemType.FLOW_NOT_FOUND: "Flow Not Found",
- ProblemType.SERVICE_NOT_FOUND: "Service Not Found",
- ProblemType.INTERNAL_ERROR: "Internal Server Error",
+# =============================================================================
+# Dataset Errors
+# =============================================================================
+
+
+class DatasetNotFoundError(ProblemDetailError):
+ """Raised when a dataset cannot be found.
+
+ # Future: detail=f"Dataset {dataset_id} not found."
+ # Future: validate dataset_id is positive int
+ """
+
+ uri = "https://openml.org/problems/dataset-not-found"
+ title = "Dataset Not Found"
+ _default_status_code = HTTPStatus.NOT_FOUND
+
+
+class DatasetNoAccessError(ProblemDetailError):
+ """Raised when user doesn't have access to a dataset.
+
+ # Future: detail=f"Access denied to dataset {dataset_id}."
+ # Future: validate dataset_id is positive int
+ """
+
+ uri = "https://openml.org/problems/dataset-no-access"
+ title = "Dataset Access Denied"
+ _default_status_code = HTTPStatus.FORBIDDEN
+
+
+class DatasetNoDataFileError(ProblemDetailError):
+ """Raised when a dataset's data file is missing.
+
+ # Future: detail=f"Data file for dataset {dataset_id} not found."
+ # Future: validate dataset_id is positive int
+ """
+
+ uri = "https://openml.org/problems/dataset-no-data-file"
+ title = "Dataset Data File Missing"
+ _default_status_code = HTTPStatus.PRECONDITION_FAILED
+
+
+class DatasetNotProcessedError(ProblemDetailError):
+ """Raised when a dataset has not been processed yet.
+
+ # Future: detail=f"Dataset {dataset_id} has not been processed yet."
+ # Future: validate dataset_id is positive int
+ """
+
+ uri = "https://openml.org/problems/dataset-not-processed"
+ title = "Dataset Not Processed"
+ _default_status_code = HTTPStatus.PRECONDITION_FAILED
+
+
+class DatasetProcessingError(ProblemDetailError):
+ """Raised when a dataset had an error during processing.
+
+ # Future: detail=f"Dataset {dataset_id} encountered an error during processing."
+ # Future: validate dataset_id is positive int
+ """
+
+ uri = "https://openml.org/problems/dataset-processing-error"
+ title = "Dataset Processing Error"
+ _default_status_code = HTTPStatus.PRECONDITION_FAILED
+
+
+class DatasetNoFeaturesError(ProblemDetailError):
+ """Raised when a dataset has no features available.
+
+ # Future: detail=f"No features found for dataset {dataset_id}."
+ # Future: validate dataset_id is positive int
+ """
+
+ uri = "https://openml.org/problems/dataset-no-features"
+ title = "Dataset Features Not Available"
+ _default_status_code = HTTPStatus.PRECONDITION_FAILED
+
+
+class DatasetStatusTransitionError(ProblemDetailError):
+ """Raised when an invalid dataset status transition is attempted.
+
+ # Future: detail=f"Cannot transition dataset {dataset_id} from {from_status} to {to_status}."
+ # Future: validate statuses are valid DatasetStatus values
+ """
+
+ uri = "https://openml.org/problems/dataset-status-transition"
+ title = "Invalid Status Transition"
+ _default_status_code = HTTPStatus.PRECONDITION_FAILED
+
+
+class DatasetNotOwnedError(ProblemDetailError):
+ """Raised when user tries to modify a dataset they don't own.
+
+ # Future: detail=f"Dataset {dataset_id} is not owned by you."
+ # Future: validate dataset_id is positive int
+ """
+
+ uri = "https://openml.org/problems/dataset-not-owned"
+ title = "Dataset Not Owned"
+ _default_status_code = HTTPStatus.FORBIDDEN
+
+
+class DatasetAdminOnlyError(ProblemDetailError):
+ """Raised when a non-admin tries to perform an admin-only action.
+
+ # Future: detail=f"Only administrators can {action}."
+ # Future: validate action is non-empty string
+ """
+
+ uri = "https://openml.org/problems/dataset-admin-only"
+ title = "Administrator Only"
+ _default_status_code = HTTPStatus.FORBIDDEN
+
+
+# =============================================================================
+# Authentication/Authorization Errors
+# =============================================================================
+
+
+class AuthenticationRequiredError(ProblemDetailError):
+ """Raised when authentication is required but not provided.
+
+ # Future: detail=f"{action} requires authentication."
+ # Future: validate action is non-empty string
+ """
+
+ uri = "https://openml.org/problems/authentication-required"
+ title = "Authentication Required"
+ _default_status_code = HTTPStatus.UNAUTHORIZED
+
+
+class AuthenticationFailedError(ProblemDetailError):
+ """Raised when authentication credentials are invalid.
+
+ # Future: detail="Authentication failed. Invalid or expired credentials."
+ """
+
+ uri = "https://openml.org/problems/authentication-failed"
+ title = "Authentication Failed"
+ _default_status_code = HTTPStatus.UNAUTHORIZED
+
+
+class ForbiddenError(ProblemDetailError):
+ """Raised when user is authenticated but not authorized.
+
+ # Future: detail=f"You do not have permission to {action}."
+ # Future: validate action is non-empty string
+ """
+
+ uri = "https://openml.org/problems/forbidden"
+ title = "Forbidden"
+ _default_status_code = HTTPStatus.FORBIDDEN
+
+
+# =============================================================================
+# Tag Errors
+# =============================================================================
+
+
+class TagAlreadyExistsError(ProblemDetailError):
+ """Raised when trying to add a tag that already exists.
+
+ # Future: detail=f"Entity {entity_id} is already tagged with '{tag}'."
+ # Future: validate entity_id is positive int, tag is non-empty string
+ """
+
+ uri = "https://openml.org/problems/tag-already-exists"
+ title = "Tag Already Exists"
+ _default_status_code = HTTPStatus.CONFLICT
+
+
+# =============================================================================
+# Search/List Errors
+# =============================================================================
+
+
+class NoResultsError(ProblemDetailError):
+ """Raised when a search returns no results.
+
+ # Future: detail="No results match the search criteria."
+ """
+
+ uri = "https://openml.org/problems/no-results"
+ title = "No Results Found"
+ _default_status_code = HTTPStatus.NOT_FOUND
+
+
+# =============================================================================
+# Study Errors
+# =============================================================================
+
+
+class StudyNotFoundError(ProblemDetailError):
+ """Raised when a study cannot be found.
+
+ # Future: detail=f"Study {study_id} not found."
+ # Future: validate study_id is positive int or valid alias string
+ """
+
+ uri = "https://openml.org/problems/study-not-found"
+ title = "Study Not Found"
+ _default_status_code = HTTPStatus.NOT_FOUND
+
+
+class StudyPrivateError(ProblemDetailError):
+ """Raised when trying to access a private study without permission.
+
+ # Future: detail=f"Study {study_id} is private."
+ # Future: validate study_id is positive int
+ """
+
+ uri = "https://openml.org/problems/study-private"
+ title = "Study Is Private"
+ _default_status_code = HTTPStatus.FORBIDDEN
+
+
+class StudyLegacyError(ProblemDetailError):
+ """Raised when trying to access a legacy study that's no longer supported.
+
+ # Future: detail=f"Study {study_id} is a legacy study and no longer supported."
+ # Future: validate study_id is positive int
+ """
+
+ uri = "https://openml.org/problems/study-legacy"
+ title = "Legacy Study Not Supported"
+ _default_status_code = HTTPStatus.GONE
+
+
+class StudyAliasExistsError(ProblemDetailError):
+ """Raised when trying to create a study with an alias that already exists.
+
+ # Future: detail=f"Study alias '{alias}' already exists."
+ # Future: validate alias is non-empty string
+ """
+
+ uri = "https://openml.org/problems/study-alias-exists"
+ title = "Study Alias Already Exists"
+ _default_status_code = HTTPStatus.CONFLICT
+
+
+class StudyInvalidTypeError(ProblemDetailError):
+ """Raised when study type configuration is invalid.
+
+ # Future: detail=f"Cannot create {study_type} study with {invalid_field}."
+ """
+
+ uri = "https://openml.org/problems/study-invalid-type"
+ title = "Invalid Study Type"
+ _default_status_code = HTTPStatus.BAD_REQUEST
+
+
+class StudyNotEditableError(ProblemDetailError):
+ """Raised when trying to edit a study that cannot be edited.
+
+ # Future: detail=f"Study {study_id} cannot be edited. {reason}"
+ # Future: validate study_id is positive int
+ """
+
+ uri = "https://openml.org/problems/study-not-editable"
+ title = "Study Not Editable"
+ _default_status_code = HTTPStatus.FORBIDDEN
+
+
+class StudyConflictError(ProblemDetailError):
+ """Raised when there's a conflict with study data (e.g., duplicate attachment).
+
+ # Future: detail=f"Conflict: {reason}"
+ """
+
+ uri = "https://openml.org/problems/study-conflict"
+ title = "Study Conflict"
+ _default_status_code = HTTPStatus.CONFLICT
+
+
+# =============================================================================
+# Task Errors
+# =============================================================================
+
+
+class TaskNotFoundError(ProblemDetailError):
+ """Raised when a task cannot be found.
+
+ # Future: detail=f"Task {task_id} not found."
+ # Future: validate task_id is positive int
+ """
+
+ uri = "https://openml.org/problems/task-not-found"
+ title = "Task Not Found"
+ _default_status_code = HTTPStatus.NOT_FOUND
+
+
+class TaskTypeNotFoundError(ProblemDetailError):
+ """Raised when a task type cannot be found.
+
+ # Future: detail=f"Task type {task_type_id} not found."
+ # Future: validate task_type_id is positive int
+ """
+
+ uri = "https://openml.org/problems/task-type-not-found"
+ title = "Task Type Not Found"
+ _default_status_code = HTTPStatus.NOT_FOUND
+
+
+# =============================================================================
+# Flow Errors
+# =============================================================================
+
+
+class FlowNotFoundError(ProblemDetailError):
+ """Raised when a flow cannot be found.
+
+ # Future: detail=f"Flow {flow_id} not found." or "Flow '{name}' version '{version}' not found."
+ # Future: validate flow_id is positive int
+ """
+
+ uri = "https://openml.org/problems/flow-not-found"
+ title = "Flow Not Found"
+ _default_status_code = HTTPStatus.NOT_FOUND
+
+
+# =============================================================================
+# Service Errors
+# =============================================================================
+
+
+class ServiceNotFoundError(ProblemDetailError):
+ """Raised when a service cannot be found.
+
+ # Future: detail=f"Service {service_id} not found."
+ # Future: validate service_id is positive int
+ """
+
+ uri = "https://openml.org/problems/service-not-found"
+ title = "Service Not Found"
+ _default_status_code = HTTPStatus.NOT_FOUND
+
+
+# =============================================================================
+# Internal Errors
+# =============================================================================
+
+
+class InternalError(ProblemDetailError):
+ """Raised for unexpected internal server errors.
+
+ # Future: detail="An unexpected error occurred. Please try again later."
+ """
+
+ uri = "https://openml.org/problems/internal-error"
+ title = "Internal Server Error"
+ _default_status_code = HTTPStatus.INTERNAL_SERVER_ERROR
+
+
+# =============================================================================
+# Backwards Compatibility
+# =============================================================================
+
+# Mapping from old ProblemType strings to new exception classes
+_PROBLEM_TYPE_TO_EXCEPTION: dict[str, type[ProblemDetailError]] = {
+ "https://openml.org/problems/dataset-not-found": DatasetNotFoundError,
+ "https://openml.org/problems/dataset-no-access": DatasetNoAccessError,
+ "https://openml.org/problems/dataset-no-data-file": DatasetNoDataFileError,
+ "https://openml.org/problems/dataset-not-processed": DatasetNotProcessedError,
+ "https://openml.org/problems/dataset-processing-error": DatasetProcessingError,
+ "https://openml.org/problems/dataset-no-features": DatasetNoFeaturesError,
+ "https://openml.org/problems/dataset-status-transition": DatasetStatusTransitionError,
+ "https://openml.org/problems/dataset-not-owned": DatasetNotOwnedError,
+ "https://openml.org/problems/dataset-admin-only": DatasetAdminOnlyError,
+ "https://openml.org/problems/authentication-required": AuthenticationRequiredError,
+ "https://openml.org/problems/authentication-failed": AuthenticationFailedError,
+ "https://openml.org/problems/forbidden": ForbiddenError,
+ "https://openml.org/problems/tag-already-exists": TagAlreadyExistsError,
+ "https://openml.org/problems/no-results": NoResultsError,
+ "https://openml.org/problems/study-not-found": StudyNotFoundError,
+ "https://openml.org/problems/study-private": StudyPrivateError,
+ "https://openml.org/problems/study-legacy": StudyLegacyError,
+ "https://openml.org/problems/study-alias-exists": StudyAliasExistsError,
+ "https://openml.org/problems/study-invalid-type": StudyInvalidTypeError,
+ "https://openml.org/problems/study-not-editable": StudyNotEditableError,
+ "https://openml.org/problems/study-conflict": StudyConflictError,
+ "https://openml.org/problems/task-not-found": TaskNotFoundError,
+ "https://openml.org/problems/task-type-not-found": TaskTypeNotFoundError,
+ "https://openml.org/problems/flow-not-found": FlowNotFoundError,
+ "https://openml.org/problems/service-not-found": ServiceNotFoundError,
+ "https://openml.org/problems/internal-error": InternalError,
}
-def raise_problem(
- status_code: HTTPStatus | int,
- type_: str,
- detail: str,
- *,
- instance: str | None = None,
- code: int | str | None = None,
- **extensions: ExtensionValue,
-) -> NoReturn:
- """Helper function to raise RFC 9457 compliant errors.
-
- Args:
- status_code: HTTP status code for the response.
- type_: Problem type URI identifying the error class.
- detail: Human-readable explanation of this specific error occurrence.
- instance: Optional URI identifying this specific error occurrence.
- code: Optional legacy OpenML error code (for backwards compatibility).
- **extensions: Additional extension fields to include in the response.
- """
- title = PROBLEM_TITLES.get(type_)
- if code is not None:
- extensions["code"] = str(code)
- raise ProblemDetailError(
- status_code=status_code,
- detail=detail,
- title=title,
- type_=type_,
- instance=instance,
- **extensions,
- )
+class ProblemType:
+ """Problem type URIs for common OpenML errors.
+
+ Deprecated: Use the specific exception classes directly instead.
+ """
+
+ DATASET_NOT_FOUND = DatasetNotFoundError.uri
+ DATASET_NO_ACCESS = DatasetNoAccessError.uri
+ DATASET_NO_DATA_FILE = DatasetNoDataFileError.uri
+ DATASET_NOT_PROCESSED = DatasetNotProcessedError.uri
+ DATASET_PROCESSING_ERROR = DatasetProcessingError.uri
+ DATASET_NO_FEATURES = DatasetNoFeaturesError.uri
+ DATASET_STATUS_TRANSITION = DatasetStatusTransitionError.uri
+ DATASET_NOT_OWNED = DatasetNotOwnedError.uri
+ DATASET_ADMIN_ONLY = DatasetAdminOnlyError.uri
+ AUTHENTICATION_REQUIRED = AuthenticationRequiredError.uri
+ AUTHENTICATION_FAILED = AuthenticationFailedError.uri
+ FORBIDDEN = ForbiddenError.uri
+ TAG_ALREADY_EXISTS = TagAlreadyExistsError.uri
+ NO_RESULTS = NoResultsError.uri
+ STUDY_NOT_FOUND = StudyNotFoundError.uri
+ STUDY_PRIVATE = StudyPrivateError.uri
+ STUDY_LEGACY = StudyLegacyError.uri
+ STUDY_ALIAS_EXISTS = StudyAliasExistsError.uri
+ STUDY_INVALID_TYPE = StudyInvalidTypeError.uri
+ STUDY_NOT_EDITABLE = StudyNotEditableError.uri
+ STUDY_CONFLICT = StudyConflictError.uri
+ TASK_NOT_FOUND = TaskNotFoundError.uri
+ TASK_TYPE_NOT_FOUND = TaskTypeNotFoundError.uri
+ FLOW_NOT_FOUND = FlowNotFoundError.uri
+ SERVICE_NOT_FOUND = ServiceNotFoundError.uri
+ INTERNAL_ERROR = InternalError.uri
diff --git a/src/routers/mldcat_ap/dataset.py b/src/routers/mldcat_ap/dataset.py
index eaa5652..00c7610 100644
--- a/src/routers/mldcat_ap/dataset.py
+++ b/src/routers/mldcat_ap/dataset.py
@@ -4,14 +4,13 @@
Specific queries could be written to fetch e.g., a single feature or quality.
"""
-from http import HTTPStatus
from typing import Annotated
from fastapi import APIRouter, Depends
from sqlalchemy import Connection
import config
-from core.errors import ProblemType, raise_problem
+from core.errors import ServiceNotFoundError
from database.users import User
from routers.dependencies import expdb_connection, fetch_user, userdb_connection
from routers.openml.datasets import get_dataset, get_dataset_features
@@ -123,11 +122,8 @@ def get_mldcat_ap_distribution(
)
def get_dataservice(service_id: int) -> JsonLDGraph:
if service_id != 1:
- raise_problem(
- status_code=HTTPStatus.NOT_FOUND,
- type_=ProblemType.SERVICE_NOT_FOUND,
- detail="Service not found.",
- )
+ msg = "Service not found."
+ raise ServiceNotFoundError(msg)
return JsonLDGraph(
context="https://semiceu.github.io/MLDCAT-AP/releases/1.0.0/context.jsonld",
graph=[
diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py
index b18e987..fed0bba 100644
--- a/src/routers/openml/datasets.py
+++ b/src/routers/openml/datasets.py
@@ -1,7 +1,6 @@
import re
from datetime import datetime
from enum import StrEnum
-from http import HTTPStatus
from typing import Annotated, Any, Literal, NamedTuple
from fastapi import APIRouter, Body, Depends
@@ -11,7 +10,23 @@
import database.datasets
import database.qualities
from core.access import _user_has_access
-from core.errors import DatasetError, ProblemType, raise_problem
+from core.errors import (
+ AuthenticationFailedError,
+ AuthenticationRequiredError,
+ DatasetAdminOnlyError,
+ DatasetError,
+ DatasetNoAccessError,
+ DatasetNoDataFileError,
+ DatasetNoFeaturesError,
+ DatasetNotFoundError,
+ DatasetNotOwnedError,
+ DatasetNotProcessedError,
+ DatasetProcessingError,
+ DatasetStatusTransitionError,
+ InternalError,
+ NoResultsError,
+ TagAlreadyExistsError,
+)
from core.formatting import (
_csv_as_list,
_format_dataset_url,
@@ -36,20 +51,12 @@ def tag_dataset(
) -> dict[str, dict[str, Any]]:
tags = database.datasets.get_tags_for(data_id, expdb_db)
if tag.casefold() in [t.casefold() for t in tags]:
- raise_problem(
- status_code=HTTPStatus.CONFLICT,
- type_=ProblemType.TAG_ALREADY_EXISTS,
- detail=f"Entity already tagged by this tag. id={data_id}; tag={tag}",
- code=473,
- )
+ msg = f"Entity already tagged by this tag. id={data_id}; tag={tag}"
+ raise TagAlreadyExistsError(msg, code=473)
if user is None:
- raise_problem(
- status_code=HTTPStatus.UNAUTHORIZED,
- type_=ProblemType.AUTHENTICATION_FAILED,
- detail="Authentication failed.",
- code=103,
- )
+ msg = "Authentication failed."
+ raise AuthenticationFailedError(msg, code=103)
database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db)
return {
@@ -194,12 +201,8 @@ def quality_clause(quality: str, range_: str | None) -> str:
row.did: dict(zip(columns, row, strict=True)) for row in rows
}
if not datasets:
- raise_problem(
- status_code=HTTPStatus.NOT_FOUND,
- type_=ProblemType.NO_RESULTS,
- detail="No datasets match the search criteria.",
- code=372,
- )
+ msg = "No datasets match the search criteria."
+ raise NoResultsError(msg, code=372)
for dataset in datasets.values():
# The old API does not actually provide the checksum but just an empty field
@@ -262,20 +265,12 @@ def _get_dataset_raise_otherwise(
Raises ProblemDetailError if the dataset does not exist or the user can not access it.
"""
if not (dataset := database.datasets.get(dataset_id, expdb)):
- raise_problem(
- status_code=HTTPStatus.NOT_FOUND,
- type_=ProblemType.DATASET_NOT_FOUND,
- detail="Unknown dataset.",
- code=DatasetError.NOT_FOUND,
- )
+ msg = "Unknown dataset."
+ raise DatasetNotFoundError(msg, code=DatasetError.NOT_FOUND)
if not _user_has_access(dataset=dataset, user=user):
- raise_problem(
- status_code=HTTPStatus.FORBIDDEN,
- type_=ProblemType.DATASET_NO_ACCESS,
- detail="No access granted.",
- code=DatasetError.NO_ACCESS,
- )
+ msg = "No access granted."
+ raise DatasetNoAccessError(msg, code=DatasetError.NO_ACCESS)
return dataset
@@ -298,32 +293,19 @@ def get_dataset_features(
if not features:
processing_state = database.datasets.get_latest_processing_update(dataset_id, expdb)
if processing_state is None:
- raise_problem(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- type_=ProblemType.DATASET_NOT_PROCESSED,
- detail=(
- "Dataset not processed yet. The dataset was not processed yet, "
- "features are not yet available. Please wait for a few minutes."
- ),
- code=273,
- )
- elif processing_state.error:
- raise_problem(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- type_=ProblemType.DATASET_PROCESSING_ERROR,
- detail="No features found. Additionally, dataset processed with error.",
- code=274,
- )
- else:
- raise_problem(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- type_=ProblemType.DATASET_NO_FEATURES,
- detail=(
- "No features found. "
- "The dataset did not contain any features, or we could not extract them."
- ),
- code=272,
+ msg = (
+ "Dataset not processed yet. The dataset was not processed yet, "
+ "features are not yet available. Please wait for a few minutes."
)
+ raise DatasetNotProcessedError(msg, code=273)
+ if processing_state.error:
+ msg = "No features found. Additionally, dataset processed with error."
+ raise DatasetProcessingError(msg, code=274)
+ msg = (
+ "No features found. "
+ "The dataset did not contain any features, or we could not extract them."
+ )
+ raise DatasetNoFeaturesError(msg, code=272)
return features
@@ -337,38 +319,23 @@ def update_dataset_status(
expdb: Annotated[Connection, Depends(expdb_connection)],
) -> dict[str, str | int]:
if user is None:
- raise_problem(
- status_code=HTTPStatus.UNAUTHORIZED,
- type_=ProblemType.AUTHENTICATION_REQUIRED,
- detail="Updating dataset status requires authentication.",
- )
+ msg = "Updating dataset status requires authentication."
+ raise AuthenticationRequiredError(msg)
dataset = _get_dataset_raise_otherwise(dataset_id, user, expdb)
can_deactivate = dataset.uploader == user.user_id or UserGroup.ADMIN in user.groups
if status == DatasetStatus.DEACTIVATED and not can_deactivate:
- raise_problem(
- status_code=HTTPStatus.FORBIDDEN,
- type_=ProblemType.DATASET_NOT_OWNED,
- detail="Dataset is not owned by you.",
- code=693,
- )
+ msg = "Dataset is not owned by you."
+ raise DatasetNotOwnedError(msg, code=693)
if status == DatasetStatus.ACTIVE and UserGroup.ADMIN not in user.groups:
- raise_problem(
- status_code=HTTPStatus.FORBIDDEN,
- type_=ProblemType.DATASET_ADMIN_ONLY,
- detail="Only administrators can activate datasets.",
- code=696,
- )
+ msg = "Only administrators can activate datasets."
+ raise DatasetAdminOnlyError(msg, code=696)
current_status = database.datasets.get_status(dataset_id, expdb)
if current_status and current_status.status == status:
- raise_problem(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- type_=ProblemType.DATASET_STATUS_TRANSITION,
- detail="Illegal status transition.",
- code=694,
- )
+ msg = "Illegal status transition."
+ raise DatasetStatusTransitionError(msg, code=694)
# If current status is unknown, it is effectively "in preparation",
# So the following transitions are allowed (first 3 transitions are first clause)
@@ -381,11 +348,8 @@ def update_dataset_status(
elif current_status.status == DatasetStatus.DEACTIVATED:
database.datasets.remove_deactivated_status(dataset_id, expdb)
else:
- raise_problem(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- type_=ProblemType.INTERNAL_ERROR,
- detail=f"Unknown status transition: {current_status} -> {status}",
- )
+ msg = f"Unknown status transition: {current_status} -> {status}"
+ raise InternalError(msg)
return {"dataset_id": dataset_id, "status": status}
@@ -404,12 +368,8 @@ def get_dataset(
if not (
dataset_file := database.datasets.get_file(file_id=dataset.file_id, connection=user_db)
):
- raise_problem(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- type_=ProblemType.DATASET_NO_DATA_FILE,
- detail="No data file found.",
- code=DatasetError.NO_DATA_FILE,
- )
+ msg = "No data file found."
+ raise DatasetNoDataFileError(msg, code=DatasetError.NO_DATA_FILE)
tags = database.datasets.get_tags_for(dataset_id, expdb_db)
description = database.datasets.get_description(dataset_id, expdb_db)
diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py
index afd7e2a..64dd083 100644
--- a/src/routers/openml/flows.py
+++ b/src/routers/openml/flows.py
@@ -1,4 +1,3 @@
-from http import HTTPStatus
from typing import Annotated, Literal
from fastapi import APIRouter, Depends
@@ -6,7 +5,7 @@
import database.flows
from core.conversions import _str_to_num
-from core.errors import ProblemType, raise_problem
+from core.errors import FlowNotFoundError
from routers.dependencies import expdb_connection
from schemas.flows import Flow, Parameter, Subflow
@@ -22,11 +21,8 @@ def flow_exists(
"""Check if a Flow with the name and version exists, if so, return the flow id."""
flow = database.flows.get_by_name(name=name, external_version=external_version, expdb=expdb)
if flow is None:
- raise_problem(
- status_code=HTTPStatus.NOT_FOUND,
- type_=ProblemType.FLOW_NOT_FOUND,
- detail="Flow not found.",
- )
+ msg = "Flow not found."
+ raise FlowNotFoundError(msg)
return {"flow_id": flow.id}
@@ -34,11 +30,8 @@ def flow_exists(
def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection)] = None) -> Flow:
flow = database.flows.get(flow_id, expdb)
if not flow:
- raise_problem(
- status_code=HTTPStatus.NOT_FOUND,
- type_=ProblemType.FLOW_NOT_FOUND,
- detail="Flow not found.",
- )
+ msg = "Flow not found."
+ raise FlowNotFoundError(msg)
parameter_rows = database.flows.get_parameters(flow_id, expdb)
parameters = [
diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py
index 77bf64b..a90a793 100644
--- a/src/routers/openml/qualities.py
+++ b/src/routers/openml/qualities.py
@@ -7,7 +7,7 @@
import database.datasets
import database.qualities
from core.access import _user_has_access
-from core.errors import DatasetError, ProblemType, raise_problem
+from core.errors import DatasetError, DatasetNotFoundError
from database.users import User
from routers.dependencies import expdb_connection, fetch_user
from schemas.datasets.openml import Quality
@@ -35,11 +35,12 @@ def get_qualities(
) -> list[Quality]:
dataset = database.datasets.get(dataset_id, expdb)
if not dataset or not _user_has_access(dataset, user):
- raise_problem(
- status_code=HTTPStatus.PRECONDITION_FAILED,
- type_=ProblemType.DATASET_NOT_FOUND,
- detail="Unknown dataset.",
+ # Backwards compatibility: PHP API returns 412 PRECONDITION_FAILED
+ msg = "Unknown dataset."
+ raise DatasetNotFoundError(
+ msg,
code=DatasetError.NO_DATA_FILE,
+ status_code=HTTPStatus.PRECONDITION_FAILED,
)
return database.qualities.get_for_dataset(dataset_id, expdb)
# The PHP API provided (sometime) helpful error messages
diff --git a/src/routers/openml/study.py b/src/routers/openml/study.py
index 0ff49e8..9e9a6c9 100644
--- a/src/routers/openml/study.py
+++ b/src/routers/openml/study.py
@@ -1,4 +1,3 @@
-from http import HTTPStatus
from typing import Annotated, Literal
from fastapi import APIRouter, Body, Depends
@@ -6,7 +5,16 @@
from sqlalchemy import Connection, Row
import database.studies
-from core.errors import ProblemType, raise_problem
+from core.errors import (
+ AuthenticationRequiredError,
+ StudyAliasExistsError,
+ StudyConflictError,
+ StudyInvalidTypeError,
+ StudyLegacyError,
+ StudyNotEditableError,
+ StudyNotFoundError,
+ StudyPrivateError,
+)
from core.formatting import _str_to_bool
from database.users import User, UserGroup
from routers.dependencies import expdb_connection, fetch_user
@@ -23,30 +31,18 @@ def _get_study_raise_otherwise(id_or_alias: int | str, user: User | None, expdb:
study = database.studies.get_by_alias(id_or_alias, expdb)
if study is None:
- raise_problem(
- status_code=HTTPStatus.NOT_FOUND,
- type_=ProblemType.STUDY_NOT_FOUND,
- detail="Study not found.",
- )
+ msg = "Study not found."
+ raise StudyNotFoundError(msg)
if study.visibility == Visibility.PRIVATE:
if user is None:
- raise_problem(
- status_code=HTTPStatus.UNAUTHORIZED,
- type_=ProblemType.AUTHENTICATION_REQUIRED,
- detail="Must authenticate for private study.",
- )
+ msg = "Must authenticate for private study."
+ raise AuthenticationRequiredError(msg)
if study.creator != user.user_id and UserGroup.ADMIN not in user.groups:
- raise_problem(
- status_code=HTTPStatus.FORBIDDEN,
- type_=ProblemType.STUDY_PRIVATE,
- detail="Study is private.",
- )
+ msg = "Study is private."
+ raise StudyPrivateError(msg)
if _str_to_bool(study.legacy):
- raise_problem(
- status_code=HTTPStatus.GONE,
- type_=ProblemType.STUDY_LEGACY,
- detail="Legacy studies are no longer supported.",
- )
+ msg = "Legacy studies are no longer supported."
+ raise StudyLegacyError(msg)
return study
@@ -63,25 +59,16 @@ def attach_to_study(
expdb: Annotated[Connection, Depends(expdb_connection)] = None,
) -> AttachDetachResponse:
if user is None:
- raise_problem(
- status_code=HTTPStatus.UNAUTHORIZED,
- type_=ProblemType.AUTHENTICATION_REQUIRED,
- detail="Authentication required.",
- )
+ msg = "Authentication required."
+ raise AuthenticationRequiredError(msg)
study = _get_study_raise_otherwise(study_id, user, expdb)
# PHP lets *anyone* edit *any* study. We're not going to do that.
if study.creator != user.user_id and UserGroup.ADMIN not in user.groups:
- raise_problem(
- status_code=HTTPStatus.FORBIDDEN,
- type_=ProblemType.STUDY_NOT_EDITABLE,
- detail="Study can only be edited by its creator.",
- )
+ msg = "Study can only be edited by its creator."
+ raise StudyNotEditableError(msg)
if study.status != StudyStatus.IN_PREPARATION:
- raise_problem(
- status_code=HTTPStatus.FORBIDDEN,
- type_=ProblemType.STUDY_NOT_EDITABLE,
- detail="Study can only be edited while in preparation.",
- )
+ msg = "Study can only be edited while in preparation."
+ raise StudyNotEditableError(msg)
# We let the database handle the constraints on whether
# the entity is already attached or if it even exists.
@@ -96,11 +83,8 @@ def attach_to_study(
else:
database.studies.attach_runs(run_ids=entity_ids, **attach_kwargs)
except ValueError as e:
- raise_problem(
- status_code=HTTPStatus.CONFLICT,
- type_=ProblemType.STUDY_CONFLICT,
- detail=str(e),
- )
+ msg = str(e)
+ raise StudyConflictError(msg) from e
return AttachDetachResponse(study_id=study_id, main_entity_type=study.type_)
@@ -111,29 +95,17 @@ def create_study(
expdb: Annotated[Connection, Depends(expdb_connection)] = None,
) -> dict[Literal["study_id"], int]:
if user is None:
- raise_problem(
- status_code=HTTPStatus.UNAUTHORIZED,
- type_=ProblemType.AUTHENTICATION_REQUIRED,
- detail="Creating a study requires authentication.",
- )
+ msg = "Creating a study requires authentication."
+ raise AuthenticationRequiredError(msg)
if study.main_entity_type == StudyType.RUN and study.tasks:
- raise_problem(
- status_code=HTTPStatus.BAD_REQUEST,
- type_=ProblemType.STUDY_INVALID_TYPE,
- detail="Cannot create a run study with tasks.",
- )
+ msg = "Cannot create a run study with tasks."
+ raise StudyInvalidTypeError(msg)
if study.main_entity_type == StudyType.TASK and study.runs:
- raise_problem(
- status_code=HTTPStatus.BAD_REQUEST,
- type_=ProblemType.STUDY_INVALID_TYPE,
- detail="Cannot create a task study with runs.",
- )
+ msg = "Cannot create a task study with runs."
+ raise StudyInvalidTypeError(msg)
if study.alias and database.studies.get_by_alias(study.alias, expdb):
- raise_problem(
- status_code=HTTPStatus.CONFLICT,
- type_=ProblemType.STUDY_ALIAS_EXISTS,
- detail="Study alias already exists.",
- )
+ msg = "Study alias already exists."
+ raise StudyAliasExistsError(msg)
study_id = database.studies.create(study, user, expdb)
if study.main_entity_type == StudyType.TASK:
for task_id in study.tasks:
diff --git a/src/routers/openml/tasks.py b/src/routers/openml/tasks.py
index 8007020..52999b1 100644
--- a/src/routers/openml/tasks.py
+++ b/src/routers/openml/tasks.py
@@ -1,6 +1,5 @@
import json
import re
-from http import HTTPStatus
from typing import Annotated, cast
import xmltodict
@@ -10,7 +9,7 @@
import config
import database.datasets
import database.tasks
-from core.errors import ProblemType, raise_problem
+from core.errors import InternalError, TaskNotFoundError
from routers.dependencies import expdb_connection
from schemas.datasets.openml import Task
@@ -156,17 +155,11 @@ def get_task(
expdb: Annotated[Connection, Depends(expdb_connection)] = None,
) -> Task:
if not (task := database.tasks.get(task_id, expdb)):
- raise_problem(
- status_code=HTTPStatus.NOT_FOUND,
- type_=ProblemType.TASK_NOT_FOUND,
- detail="Task not found.",
- )
+ msg = "Task not found."
+ raise TaskNotFoundError(msg)
if not (task_type := database.tasks.get_task_type(task.ttid, expdb)):
- raise_problem(
- status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
- type_=ProblemType.INTERNAL_ERROR,
- detail="Task type not found.",
- )
+ msg = "Task type not found."
+ raise InternalError(msg)
task_inputs = {
row.input: int(row.value) if row.value.isdigit() else row.value
diff --git a/src/routers/openml/tasktype.py b/src/routers/openml/tasktype.py
index 9916499..18903c7 100644
--- a/src/routers/openml/tasktype.py
+++ b/src/routers/openml/tasktype.py
@@ -1,11 +1,10 @@
import json
-from http import HTTPStatus
from typing import Annotated, Any, Literal, cast
from fastapi import APIRouter, Depends
from sqlalchemy import Connection, Row
-from core.errors import ProblemType, raise_problem
+from core.errors import TaskTypeNotFoundError
from database.tasks import get_input_for_task_type, get_task_types
from database.tasks import get_task_type as db_get_task_type
from routers.dependencies import expdb_connection
@@ -46,12 +45,8 @@ def get_task_type(
) -> dict[Literal["task_type"], dict[str, str | None | list[str] | list[dict[str, str]]]]:
task_type_record = db_get_task_type(task_type_id, expdb)
if task_type_record is None:
- raise_problem(
- status_code=HTTPStatus.NOT_FOUND,
- type_=ProblemType.TASK_TYPE_NOT_FOUND,
- detail="Unknown task type.",
- code=241,
- )
+ msg = "Unknown task type."
+ raise TaskTypeNotFoundError(msg, code=241)
task_type = _normalize_task_type(task_type_record)
# Some names are quoted, or have typos in their comma-separation (e.g. 'A ,B')
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index a1395f1..6d83780 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -4,7 +4,7 @@
from sqlalchemy import Connection
from starlette.testclient import TestClient
-from core.errors import ProblemDetailError, ProblemType
+from core.errors import DatasetError, DatasetNoAccessError, ProblemType
from database.users import User
from routers.openml.datasets import get_dataset
from schemas.datasets.openml import DatasetMetadata, DatasetStatus
@@ -86,7 +86,7 @@ def test_private_dataset_no_access(
user: User | None,
expdb_test: Connection,
) -> None:
- with pytest.raises(ProblemDetailError) as e:
+ with pytest.raises(DatasetNoAccessError) as e:
get_dataset(
dataset_id=130,
user=user,
@@ -94,8 +94,8 @@ def test_private_dataset_no_access(
expdb_db=expdb_test,
)
assert e.value.status_code == HTTPStatus.FORBIDDEN
- assert e.value.problem.type_ == ProblemType.DATASET_NO_ACCESS
- assert e.value.extensions.get("code") == "112"
+ assert e.value.uri == ProblemType.DATASET_NO_ACCESS
+ assert e.value.code == DatasetError.NO_ACCESS
@pytest.mark.parametrize(
diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py
index 2627053..2f70705 100644
--- a/tests/routers/openml/flows_test.py
+++ b/tests/routers/openml/flows_test.py
@@ -6,7 +6,7 @@
from sqlalchemy import Connection
from starlette.testclient import TestClient
-from core.errors import ProblemDetailError, ProblemType
+from core.errors import FlowNotFoundError, ProblemType
from routers.openml.flows import flow_exists
from tests.conftest import Flow
@@ -53,10 +53,10 @@ def test_flow_exists_processes_found(
def test_flow_exists_handles_flow_not_found(mocker: MockerFixture, expdb_test: Connection) -> None:
mocker.patch("database.flows.get_by_name", return_value=None)
- with pytest.raises(ProblemDetailError) as error:
+ with pytest.raises(FlowNotFoundError) as error:
flow_exists("foo", "bar", expdb_test)
assert error.value.status_code == HTTPStatus.NOT_FOUND
- assert error.value.problem.type_ == ProblemType.FLOW_NOT_FOUND
+ assert error.value.uri == ProblemType.FLOW_NOT_FOUND
def test_flow_exists(flow: Flow, py_api: TestClient) -> None:
From 5e12e748e631a655e960df0e8814bf86df6ccc7f Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 13 Feb 2026 11:17:50 +0200
Subject: [PATCH 7/9] Remove unused dictionary
---
src/core/errors.py | 30 ------------------------------
1 file changed, 30 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index 8db4965..c6f0d3e 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -443,36 +443,6 @@ class InternalError(ProblemDetailError):
# Backwards Compatibility
# =============================================================================
-# Mapping from old ProblemType strings to new exception classes
-_PROBLEM_TYPE_TO_EXCEPTION: dict[str, type[ProblemDetailError]] = {
- "https://openml.org/problems/dataset-not-found": DatasetNotFoundError,
- "https://openml.org/problems/dataset-no-access": DatasetNoAccessError,
- "https://openml.org/problems/dataset-no-data-file": DatasetNoDataFileError,
- "https://openml.org/problems/dataset-not-processed": DatasetNotProcessedError,
- "https://openml.org/problems/dataset-processing-error": DatasetProcessingError,
- "https://openml.org/problems/dataset-no-features": DatasetNoFeaturesError,
- "https://openml.org/problems/dataset-status-transition": DatasetStatusTransitionError,
- "https://openml.org/problems/dataset-not-owned": DatasetNotOwnedError,
- "https://openml.org/problems/dataset-admin-only": DatasetAdminOnlyError,
- "https://openml.org/problems/authentication-required": AuthenticationRequiredError,
- "https://openml.org/problems/authentication-failed": AuthenticationFailedError,
- "https://openml.org/problems/forbidden": ForbiddenError,
- "https://openml.org/problems/tag-already-exists": TagAlreadyExistsError,
- "https://openml.org/problems/no-results": NoResultsError,
- "https://openml.org/problems/study-not-found": StudyNotFoundError,
- "https://openml.org/problems/study-private": StudyPrivateError,
- "https://openml.org/problems/study-legacy": StudyLegacyError,
- "https://openml.org/problems/study-alias-exists": StudyAliasExistsError,
- "https://openml.org/problems/study-invalid-type": StudyInvalidTypeError,
- "https://openml.org/problems/study-not-editable": StudyNotEditableError,
- "https://openml.org/problems/study-conflict": StudyConflictError,
- "https://openml.org/problems/task-not-found": TaskNotFoundError,
- "https://openml.org/problems/task-type-not-found": TaskTypeNotFoundError,
- "https://openml.org/problems/flow-not-found": FlowNotFoundError,
- "https://openml.org/problems/service-not-found": ServiceNotFoundError,
- "https://openml.org/problems/internal-error": InternalError,
-}
-
class ProblemType:
"""Problem type URIs for common OpenML errors.
From 4a0d5cb01e1d97af7835903ae755cdd91e26b551 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 13 Feb 2026 11:26:48 +0200
Subject: [PATCH 8/9] Remove the ProblemType class as it was confusing and only
for tests
---
src/core/errors.py | 39 -------------------
tests/routers/openml/dataset_tag_test.py | 6 +--
.../openml/datasets_list_datasets_test.py | 6 +--
tests/routers/openml/datasets_test.py | 13 +++++--
tests/routers/openml/flows_test.py | 6 +--
.../migration/datasets_migration_test.py | 12 ++++--
.../openml/migration/flows_migration_test.py | 4 +-
tests/routers/openml/qualities_test.py | 4 +-
tests/routers/openml/study_test.py | 6 +--
tests/routers/openml/task_type_test.py | 4 +-
10 files changed, 35 insertions(+), 65 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index c6f0d3e..bc45db7 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -437,42 +437,3 @@ class InternalError(ProblemDetailError):
uri = "https://openml.org/problems/internal-error"
title = "Internal Server Error"
_default_status_code = HTTPStatus.INTERNAL_SERVER_ERROR
-
-
-# =============================================================================
-# Backwards Compatibility
-# =============================================================================
-
-
-class ProblemType:
- """Problem type URIs for common OpenML errors.
-
- Deprecated: Use the specific exception classes directly instead.
- """
-
- DATASET_NOT_FOUND = DatasetNotFoundError.uri
- DATASET_NO_ACCESS = DatasetNoAccessError.uri
- DATASET_NO_DATA_FILE = DatasetNoDataFileError.uri
- DATASET_NOT_PROCESSED = DatasetNotProcessedError.uri
- DATASET_PROCESSING_ERROR = DatasetProcessingError.uri
- DATASET_NO_FEATURES = DatasetNoFeaturesError.uri
- DATASET_STATUS_TRANSITION = DatasetStatusTransitionError.uri
- DATASET_NOT_OWNED = DatasetNotOwnedError.uri
- DATASET_ADMIN_ONLY = DatasetAdminOnlyError.uri
- AUTHENTICATION_REQUIRED = AuthenticationRequiredError.uri
- AUTHENTICATION_FAILED = AuthenticationFailedError.uri
- FORBIDDEN = ForbiddenError.uri
- TAG_ALREADY_EXISTS = TagAlreadyExistsError.uri
- NO_RESULTS = NoResultsError.uri
- STUDY_NOT_FOUND = StudyNotFoundError.uri
- STUDY_PRIVATE = StudyPrivateError.uri
- STUDY_LEGACY = StudyLegacyError.uri
- STUDY_ALIAS_EXISTS = StudyAliasExistsError.uri
- STUDY_INVALID_TYPE = StudyInvalidTypeError.uri
- STUDY_NOT_EDITABLE = StudyNotEditableError.uri
- STUDY_CONFLICT = StudyConflictError.uri
- TASK_NOT_FOUND = TaskNotFoundError.uri
- TASK_TYPE_NOT_FOUND = TaskTypeNotFoundError.uri
- FLOW_NOT_FOUND = FlowNotFoundError.uri
- SERVICE_NOT_FOUND = ServiceNotFoundError.uri
- INTERNAL_ERROR = InternalError.uri
diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py
index 7147eca..d69a265 100644
--- a/tests/routers/openml/dataset_tag_test.py
+++ b/tests/routers/openml/dataset_tag_test.py
@@ -4,7 +4,7 @@
from sqlalchemy import Connection
from starlette.testclient import TestClient
-from core.errors import ProblemType
+from core.errors import AuthenticationFailedError, TagAlreadyExistsError
from database.datasets import get_tags_for
from tests import constants
from tests.users import ApiKey
@@ -24,7 +24,7 @@ def test_dataset_tag_rejects_unauthorized(key: ApiKey, py_api: TestClient) -> No
assert response.status_code == HTTPStatus.UNAUTHORIZED
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.AUTHENTICATION_FAILED
+ assert error["type"] == AuthenticationFailedError.uri
assert error["code"] == "103"
@@ -65,7 +65,7 @@ def test_dataset_tag_fails_if_tag_exists(py_api: TestClient) -> None:
assert response.status_code == HTTPStatus.CONFLICT
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.TAG_ALREADY_EXISTS
+ assert error["type"] == TagAlreadyExistsError.uri
assert error["code"] == "473"
assert f"id={dataset_id}" in error["detail"]
assert f"tag={tag}" in error["detail"]
diff --git a/tests/routers/openml/datasets_list_datasets_test.py b/tests/routers/openml/datasets_list_datasets_test.py
index 78f2d42..eaaef29 100644
--- a/tests/routers/openml/datasets_list_datasets_test.py
+++ b/tests/routers/openml/datasets_list_datasets_test.py
@@ -8,7 +8,7 @@
from hypothesis import strategies as st
from starlette.testclient import TestClient
-from core.errors import ProblemType
+from core.errors import NoResultsError
from tests import constants
from tests.users import ApiKey
@@ -19,7 +19,7 @@ def _assert_empty_result(
assert response.status_code == HTTPStatus.NOT_FOUND
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.NO_RESULTS
+ assert error["type"] == NoResultsError.uri
assert error["code"] == "372"
@@ -300,7 +300,7 @@ def test_list_data_identical(
# Verify Python API returns RFC 9457 format
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.NO_RESULTS
+ assert error["type"] == NoResultsError.uri
assert error["code"] == "372"
return None
new_json = response.json()
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index 6d83780..01c17b9 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -4,7 +4,12 @@
from sqlalchemy import Connection
from starlette.testclient import TestClient
-from core.errors import DatasetError, DatasetNoAccessError, ProblemType
+from core.errors import (
+ DatasetError,
+ DatasetNoAccessError,
+ DatasetNotFoundError,
+ DatasetProcessingError,
+)
from database.users import User
from routers.openml.datasets import get_dataset
from schemas.datasets.openml import DatasetMetadata, DatasetStatus
@@ -30,7 +35,7 @@ def test_error_unknown_dataset(
assert response.status_code == response_code
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.DATASET_NOT_FOUND
+ assert error["type"] == DatasetNotFoundError.uri
assert error["title"] == "Dataset Not Found"
assert error["status"] == HTTPStatus.NOT_FOUND
assert error["detail"] == "Unknown dataset."
@@ -94,7 +99,7 @@ def test_private_dataset_no_access(
expdb_db=expdb_test,
)
assert e.value.status_code == HTTPStatus.FORBIDDEN
- assert e.value.uri == ProblemType.DATASET_NO_ACCESS
+ assert e.value.uri == DatasetNoAccessError.uri
assert e.value.code == DatasetError.NO_ACCESS
@@ -186,7 +191,7 @@ def test_dataset_features_with_processing_error(py_api: TestClient) -> None:
assert response.status_code == HTTPStatus.PRECONDITION_FAILED
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.DATASET_PROCESSING_ERROR
+ assert error["type"] == DatasetProcessingError.uri
assert error["code"] == "274"
assert "No features found" in error["detail"]
diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py
index 2f70705..2c82ea3 100644
--- a/tests/routers/openml/flows_test.py
+++ b/tests/routers/openml/flows_test.py
@@ -6,7 +6,7 @@
from sqlalchemy import Connection
from starlette.testclient import TestClient
-from core.errors import FlowNotFoundError, ProblemType
+from core.errors import FlowNotFoundError
from routers.openml.flows import flow_exists
from tests.conftest import Flow
@@ -56,7 +56,7 @@ def test_flow_exists_handles_flow_not_found(mocker: MockerFixture, expdb_test: C
with pytest.raises(FlowNotFoundError) as error:
flow_exists("foo", "bar", expdb_test)
assert error.value.status_code == HTTPStatus.NOT_FOUND
- assert error.value.uri == ProblemType.FLOW_NOT_FOUND
+ assert error.value.uri == FlowNotFoundError.uri
def test_flow_exists(flow: Flow, py_api: TestClient) -> None:
@@ -70,7 +70,7 @@ def test_flow_exists_not_exists(py_api: TestClient) -> None:
assert response.status_code == HTTPStatus.NOT_FOUND
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.FLOW_NOT_FOUND
+ assert error["type"] == FlowNotFoundError.uri
assert error["detail"] == "Flow not found."
diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py
index 1c514f6..99fb590 100644
--- a/tests/routers/openml/migration/datasets_migration_test.py
+++ b/tests/routers/openml/migration/datasets_migration_test.py
@@ -7,7 +7,11 @@
import tests.constants
from core.conversions import nested_remove_single_element_list
-from core.errors import ProblemType
+from core.errors import (
+ DatasetNoAccessError,
+ DatasetNotFoundError,
+ TagAlreadyExistsError,
+)
from tests.users import ApiKey
@@ -109,7 +113,7 @@ def test_error_unknown_dataset(
# RFC 9457: Python API now returns problem+json format
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.DATASET_NOT_FOUND
+ assert error["type"] == DatasetNotFoundError.uri
assert error["code"] == "111"
@@ -128,7 +132,7 @@ def test_private_dataset_no_user_no_access(
assert response.status_code == HTTPStatus.FORBIDDEN
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.DATASET_NO_ACCESS
+ assert error["type"] == DatasetNoAccessError.uri
assert error["code"] == "112"
@@ -200,7 +204,7 @@ def test_dataset_tag_response_is_identical(
assert new.status_code == HTTPStatus.CONFLICT
assert new.headers["content-type"] == "application/problem+json"
error = new.json()
- assert error["type"] == ProblemType.TAG_ALREADY_EXISTS
+ assert error["type"] == TagAlreadyExistsError.uri
assert error["code"] == "473"
return
diff --git a/tests/routers/openml/migration/flows_migration_test.py b/tests/routers/openml/migration/flows_migration_test.py
index e3a559d..5b82426 100644
--- a/tests/routers/openml/migration/flows_migration_test.py
+++ b/tests/routers/openml/migration/flows_migration_test.py
@@ -10,7 +10,7 @@
nested_remove_single_element_list,
nested_str_to_num,
)
-from core.errors import ProblemType
+from core.errors import FlowNotFoundError
from tests.conftest import Flow
@@ -31,7 +31,7 @@ def test_flow_exists_not(
# RFC 9457: Python API now returns problem+json format
assert py_response.headers["content-type"] == "application/problem+json"
error = py_response.json()
- assert error["type"] == ProblemType.FLOW_NOT_FOUND
+ assert error["type"] == FlowNotFoundError.uri
assert error["detail"] == "Flow not found."
diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py
index 814607c..54cf984 100644
--- a/tests/routers/openml/qualities_test.py
+++ b/tests/routers/openml/qualities_test.py
@@ -6,7 +6,7 @@
from sqlalchemy import Connection, text
from starlette.testclient import TestClient
-from core.errors import ProblemType
+from core.errors import DatasetNotFoundError
def _remove_quality_from_database(quality_name: str, expdb_test: Connection) -> None:
@@ -318,6 +318,6 @@ def test_get_quality_identical_error(
# RFC 9457: Python API now returns problem+json format
assert python_response.headers["content-type"] == "application/problem+json"
error = python_response.json()
- assert error["type"] == ProblemType.DATASET_NOT_FOUND
+ assert error["type"] == DatasetNotFoundError.uri
# Verify the error message matches the PHP API semantically
assert "Unknown dataset" in error["detail"]
diff --git a/tests/routers/openml/study_test.py b/tests/routers/openml/study_test.py
index b2dd862..ed7018f 100644
--- a/tests/routers/openml/study_test.py
+++ b/tests/routers/openml/study_test.py
@@ -5,7 +5,7 @@
from sqlalchemy import Connection, text
from starlette.testclient import TestClient
-from core.errors import ProblemType
+from core.errors import StudyConflictError
from schemas.study import StudyType
from tests.users import ApiKey
@@ -559,7 +559,7 @@ def test_attach_task_to_study_already_linked_raises(
assert response.status_code == HTTPStatus.CONFLICT, response.content
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.STUDY_CONFLICT
+ assert error["type"] == StudyConflictError.uri
assert "Task 1 is already attached to study 1" in error["detail"]
@@ -578,5 +578,5 @@ def test_attach_task_to_study_but_task_not_exist_raises(
assert response.status_code == HTTPStatus.CONFLICT
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.STUDY_CONFLICT
+ assert error["type"] == StudyConflictError.uri
assert "do not exist" in error["detail"]
diff --git a/tests/routers/openml/task_type_test.py b/tests/routers/openml/task_type_test.py
index 95d5c70..a8e897e 100644
--- a/tests/routers/openml/task_type_test.py
+++ b/tests/routers/openml/task_type_test.py
@@ -5,7 +5,7 @@
import pytest
from starlette.testclient import TestClient
-from core.errors import ProblemType
+from core.errors import TaskTypeNotFoundError
def test_list_task_type(py_api: TestClient, php_api: httpx.Client) -> None:
@@ -41,6 +41,6 @@ def test_get_task_type_unknown(py_api: TestClient) -> None:
assert response.status_code == HTTPStatus.NOT_FOUND
assert response.headers["content-type"] == "application/problem+json"
error = response.json()
- assert error["type"] == ProblemType.TASK_TYPE_NOT_FOUND
+ assert error["type"] == TaskTypeNotFoundError.uri
assert error["code"] == "241"
assert "Unknown task type" in error["detail"]
From 97d53785ea628b43edec262d5ea7b4c699051a6c Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 13 Feb 2026 11:44:20 +0200
Subject: [PATCH 9/9] Provide default codes for the different errors based on
PHP codes
---
src/core/errors.py | 187 +++++++-------------------
src/routers/openml/datasets.py | 25 ++--
src/routers/openml/qualities.py | 7 +-
src/routers/openml/tasktype.py | 2 +-
tests/routers/openml/datasets_test.py | 4 +-
5 files changed, 70 insertions(+), 155 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index bc45db7..e697831 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -4,19 +4,11 @@
See: https://www.rfc-editor.org/rfc/rfc9457.html
"""
-from enum import IntEnum
from http import HTTPStatus
from fastapi import Request
from fastapi.responses import JSONResponse
-
-class DatasetError(IntEnum):
- NOT_FOUND = 111
- NO_ACCESS = 112
- NO_DATA_FILE = 113
-
-
# =============================================================================
# Base Exception
# =============================================================================
@@ -28,14 +20,16 @@ class ProblemDetailError(Exception):
Subclasses should define class attributes:
- uri: The problem type URI
- title: Human-readable title
- - status_code: HTTP status code
+ - _default_status_code: HTTP status code
+ - _default_code: Legacy error code (optional)
- The status_code can be overridden per-instance for backwards compatibility.
+ The status_code and code can be overridden per-instance.
"""
uri: str = "about:blank"
title: str = "An error occurred"
_default_status_code: HTTPStatus = HTTPStatus.INTERNAL_SERVER_ERROR
+ _default_code: int | None = None
def __init__(
self,
@@ -46,7 +40,7 @@ def __init__(
status_code: HTTPStatus | None = None,
) -> None:
self.detail = detail
- self.code = code
+ self._code_override = code
self.instance = instance
self._status_code_override = status_code
super().__init__(detail)
@@ -58,6 +52,13 @@ def status_code(self) -> HTTPStatus:
return self._status_code_override
return self._default_status_code
+ @property
+ def code(self) -> int | str | None:
+ """Return the code, preferring instance override over class default."""
+ if self._code_override is not None:
+ return self._code_override
+ return self._default_code
+
def problem_detail_exception_handler(
request: Request, # noqa: ARG001
@@ -93,111 +94,84 @@ def problem_detail_exception_handler(
class DatasetNotFoundError(ProblemDetailError):
- """Raised when a dataset cannot be found.
-
- # Future: detail=f"Dataset {dataset_id} not found."
- # Future: validate dataset_id is positive int
- """
+ """Raised when a dataset cannot be found."""
uri = "https://openml.org/problems/dataset-not-found"
title = "Dataset Not Found"
_default_status_code = HTTPStatus.NOT_FOUND
+ _default_code = 111
class DatasetNoAccessError(ProblemDetailError):
- """Raised when user doesn't have access to a dataset.
-
- # Future: detail=f"Access denied to dataset {dataset_id}."
- # Future: validate dataset_id is positive int
- """
+ """Raised when user doesn't have access to a dataset."""
uri = "https://openml.org/problems/dataset-no-access"
title = "Dataset Access Denied"
_default_status_code = HTTPStatus.FORBIDDEN
+ _default_code = 112
class DatasetNoDataFileError(ProblemDetailError):
- """Raised when a dataset's data file is missing.
-
- # Future: detail=f"Data file for dataset {dataset_id} not found."
- # Future: validate dataset_id is positive int
- """
+ """Raised when a dataset's data file is missing."""
uri = "https://openml.org/problems/dataset-no-data-file"
title = "Dataset Data File Missing"
_default_status_code = HTTPStatus.PRECONDITION_FAILED
+ _default_code = 113
class DatasetNotProcessedError(ProblemDetailError):
- """Raised when a dataset has not been processed yet.
-
- # Future: detail=f"Dataset {dataset_id} has not been processed yet."
- # Future: validate dataset_id is positive int
- """
+ """Raised when a dataset has not been processed yet."""
uri = "https://openml.org/problems/dataset-not-processed"
title = "Dataset Not Processed"
_default_status_code = HTTPStatus.PRECONDITION_FAILED
+ _default_code = 273
class DatasetProcessingError(ProblemDetailError):
- """Raised when a dataset had an error during processing.
-
- # Future: detail=f"Dataset {dataset_id} encountered an error during processing."
- # Future: validate dataset_id is positive int
- """
+ """Raised when a dataset had an error during processing."""
uri = "https://openml.org/problems/dataset-processing-error"
title = "Dataset Processing Error"
_default_status_code = HTTPStatus.PRECONDITION_FAILED
+ _default_code = 274
class DatasetNoFeaturesError(ProblemDetailError):
- """Raised when a dataset has no features available.
-
- # Future: detail=f"No features found for dataset {dataset_id}."
- # Future: validate dataset_id is positive int
- """
+ """Raised when a dataset has no features available."""
uri = "https://openml.org/problems/dataset-no-features"
title = "Dataset Features Not Available"
_default_status_code = HTTPStatus.PRECONDITION_FAILED
+ _default_code = 272
class DatasetStatusTransitionError(ProblemDetailError):
- """Raised when an invalid dataset status transition is attempted.
-
- # Future: detail=f"Cannot transition dataset {dataset_id} from {from_status} to {to_status}."
- # Future: validate statuses are valid DatasetStatus values
- """
+ """Raised when an invalid dataset status transition is attempted."""
uri = "https://openml.org/problems/dataset-status-transition"
title = "Invalid Status Transition"
_default_status_code = HTTPStatus.PRECONDITION_FAILED
+ _default_code = 694
class DatasetNotOwnedError(ProblemDetailError):
- """Raised when user tries to modify a dataset they don't own.
-
- # Future: detail=f"Dataset {dataset_id} is not owned by you."
- # Future: validate dataset_id is positive int
- """
+ """Raised when user tries to modify a dataset they don't own."""
uri = "https://openml.org/problems/dataset-not-owned"
title = "Dataset Not Owned"
_default_status_code = HTTPStatus.FORBIDDEN
+ _default_code = 693
class DatasetAdminOnlyError(ProblemDetailError):
- """Raised when a non-admin tries to perform an admin-only action.
-
- # Future: detail=f"Only administrators can {action}."
- # Future: validate action is non-empty string
- """
+ """Raised when a non-admin tries to perform an admin-only action."""
uri = "https://openml.org/problems/dataset-admin-only"
title = "Administrator Only"
_default_status_code = HTTPStatus.FORBIDDEN
+ _default_code = 696
# =============================================================================
@@ -206,11 +180,7 @@ class DatasetAdminOnlyError(ProblemDetailError):
class AuthenticationRequiredError(ProblemDetailError):
- """Raised when authentication is required but not provided.
-
- # Future: detail=f"{action} requires authentication."
- # Future: validate action is non-empty string
- """
+ """Raised when authentication is required but not provided."""
uri = "https://openml.org/problems/authentication-required"
title = "Authentication Required"
@@ -218,22 +188,16 @@ class AuthenticationRequiredError(ProblemDetailError):
class AuthenticationFailedError(ProblemDetailError):
- """Raised when authentication credentials are invalid.
-
- # Future: detail="Authentication failed. Invalid or expired credentials."
- """
+ """Raised when authentication credentials are invalid."""
uri = "https://openml.org/problems/authentication-failed"
title = "Authentication Failed"
_default_status_code = HTTPStatus.UNAUTHORIZED
+ _default_code = 103
class ForbiddenError(ProblemDetailError):
- """Raised when user is authenticated but not authorized.
-
- # Future: detail=f"You do not have permission to {action}."
- # Future: validate action is non-empty string
- """
+ """Raised when user is authenticated but not authorized."""
uri = "https://openml.org/problems/forbidden"
title = "Forbidden"
@@ -246,15 +210,12 @@ class ForbiddenError(ProblemDetailError):
class TagAlreadyExistsError(ProblemDetailError):
- """Raised when trying to add a tag that already exists.
-
- # Future: detail=f"Entity {entity_id} is already tagged with '{tag}'."
- # Future: validate entity_id is positive int, tag is non-empty string
- """
+ """Raised when trying to add a tag that already exists."""
uri = "https://openml.org/problems/tag-already-exists"
title = "Tag Already Exists"
_default_status_code = HTTPStatus.CONFLICT
+ _default_code = 473
# =============================================================================
@@ -263,14 +224,12 @@ class TagAlreadyExistsError(ProblemDetailError):
class NoResultsError(ProblemDetailError):
- """Raised when a search returns no results.
-
- # Future: detail="No results match the search criteria."
- """
+ """Raised when a search returns no results."""
uri = "https://openml.org/problems/no-results"
title = "No Results Found"
_default_status_code = HTTPStatus.NOT_FOUND
+ _default_code = 372
# =============================================================================
@@ -279,11 +238,7 @@ class NoResultsError(ProblemDetailError):
class StudyNotFoundError(ProblemDetailError):
- """Raised when a study cannot be found.
-
- # Future: detail=f"Study {study_id} not found."
- # Future: validate study_id is positive int or valid alias string
- """
+ """Raised when a study cannot be found."""
uri = "https://openml.org/problems/study-not-found"
title = "Study Not Found"
@@ -291,11 +246,7 @@ class StudyNotFoundError(ProblemDetailError):
class StudyPrivateError(ProblemDetailError):
- """Raised when trying to access a private study without permission.
-
- # Future: detail=f"Study {study_id} is private."
- # Future: validate study_id is positive int
- """
+ """Raised when trying to access a private study without permission."""
uri = "https://openml.org/problems/study-private"
title = "Study Is Private"
@@ -303,11 +254,7 @@ class StudyPrivateError(ProblemDetailError):
class StudyLegacyError(ProblemDetailError):
- """Raised when trying to access a legacy study that's no longer supported.
-
- # Future: detail=f"Study {study_id} is a legacy study and no longer supported."
- # Future: validate study_id is positive int
- """
+ """Raised when trying to access a legacy study that's no longer supported."""
uri = "https://openml.org/problems/study-legacy"
title = "Legacy Study Not Supported"
@@ -315,11 +262,7 @@ class StudyLegacyError(ProblemDetailError):
class StudyAliasExistsError(ProblemDetailError):
- """Raised when trying to create a study with an alias that already exists.
-
- # Future: detail=f"Study alias '{alias}' already exists."
- # Future: validate alias is non-empty string
- """
+ """Raised when trying to create a study with an alias that already exists."""
uri = "https://openml.org/problems/study-alias-exists"
title = "Study Alias Already Exists"
@@ -327,10 +270,7 @@ class StudyAliasExistsError(ProblemDetailError):
class StudyInvalidTypeError(ProblemDetailError):
- """Raised when study type configuration is invalid.
-
- # Future: detail=f"Cannot create {study_type} study with {invalid_field}."
- """
+ """Raised when study type configuration is invalid."""
uri = "https://openml.org/problems/study-invalid-type"
title = "Invalid Study Type"
@@ -338,11 +278,7 @@ class StudyInvalidTypeError(ProblemDetailError):
class StudyNotEditableError(ProblemDetailError):
- """Raised when trying to edit a study that cannot be edited.
-
- # Future: detail=f"Study {study_id} cannot be edited. {reason}"
- # Future: validate study_id is positive int
- """
+ """Raised when trying to edit a study that cannot be edited."""
uri = "https://openml.org/problems/study-not-editable"
title = "Study Not Editable"
@@ -350,10 +286,7 @@ class StudyNotEditableError(ProblemDetailError):
class StudyConflictError(ProblemDetailError):
- """Raised when there's a conflict with study data (e.g., duplicate attachment).
-
- # Future: detail=f"Conflict: {reason}"
- """
+ """Raised when there's a conflict with study data (e.g., duplicate attachment)."""
uri = "https://openml.org/problems/study-conflict"
title = "Study Conflict"
@@ -366,11 +299,7 @@ class StudyConflictError(ProblemDetailError):
class TaskNotFoundError(ProblemDetailError):
- """Raised when a task cannot be found.
-
- # Future: detail=f"Task {task_id} not found."
- # Future: validate task_id is positive int
- """
+ """Raised when a task cannot be found."""
uri = "https://openml.org/problems/task-not-found"
title = "Task Not Found"
@@ -378,15 +307,12 @@ class TaskNotFoundError(ProblemDetailError):
class TaskTypeNotFoundError(ProblemDetailError):
- """Raised when a task type cannot be found.
-
- # Future: detail=f"Task type {task_type_id} not found."
- # Future: validate task_type_id is positive int
- """
+ """Raised when a task type cannot be found."""
uri = "https://openml.org/problems/task-type-not-found"
title = "Task Type Not Found"
_default_status_code = HTTPStatus.NOT_FOUND
+ _default_code = 241
# =============================================================================
@@ -395,11 +321,7 @@ class TaskTypeNotFoundError(ProblemDetailError):
class FlowNotFoundError(ProblemDetailError):
- """Raised when a flow cannot be found.
-
- # Future: detail=f"Flow {flow_id} not found." or "Flow '{name}' version '{version}' not found."
- # Future: validate flow_id is positive int
- """
+ """Raised when a flow cannot be found."""
uri = "https://openml.org/problems/flow-not-found"
title = "Flow Not Found"
@@ -412,11 +334,7 @@ class FlowNotFoundError(ProblemDetailError):
class ServiceNotFoundError(ProblemDetailError):
- """Raised when a service cannot be found.
-
- # Future: detail=f"Service {service_id} not found."
- # Future: validate service_id is positive int
- """
+ """Raised when a service cannot be found."""
uri = "https://openml.org/problems/service-not-found"
title = "Service Not Found"
@@ -429,10 +347,7 @@ class ServiceNotFoundError(ProblemDetailError):
class InternalError(ProblemDetailError):
- """Raised for unexpected internal server errors.
-
- # Future: detail="An unexpected error occurred. Please try again later."
- """
+ """Raised for unexpected internal server errors."""
uri = "https://openml.org/problems/internal-error"
title = "Internal Server Error"
diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py
index fed0bba..360b3c4 100644
--- a/src/routers/openml/datasets.py
+++ b/src/routers/openml/datasets.py
@@ -14,7 +14,6 @@
AuthenticationFailedError,
AuthenticationRequiredError,
DatasetAdminOnlyError,
- DatasetError,
DatasetNoAccessError,
DatasetNoDataFileError,
DatasetNoFeaturesError,
@@ -52,11 +51,11 @@ def tag_dataset(
tags = database.datasets.get_tags_for(data_id, expdb_db)
if tag.casefold() in [t.casefold() for t in tags]:
msg = f"Entity already tagged by this tag. id={data_id}; tag={tag}"
- raise TagAlreadyExistsError(msg, code=473)
+ raise TagAlreadyExistsError(msg)
if user is None:
msg = "Authentication failed."
- raise AuthenticationFailedError(msg, code=103)
+ raise AuthenticationFailedError(msg)
database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db)
return {
@@ -202,7 +201,7 @@ def quality_clause(quality: str, range_: str | None) -> str:
}
if not datasets:
msg = "No datasets match the search criteria."
- raise NoResultsError(msg, code=372)
+ raise NoResultsError(msg)
for dataset in datasets.values():
# The old API does not actually provide the checksum but just an empty field
@@ -266,11 +265,11 @@ def _get_dataset_raise_otherwise(
"""
if not (dataset := database.datasets.get(dataset_id, expdb)):
msg = "Unknown dataset."
- raise DatasetNotFoundError(msg, code=DatasetError.NOT_FOUND)
+ raise DatasetNotFoundError(msg)
if not _user_has_access(dataset=dataset, user=user):
msg = "No access granted."
- raise DatasetNoAccessError(msg, code=DatasetError.NO_ACCESS)
+ raise DatasetNoAccessError(msg)
return dataset
@@ -297,15 +296,15 @@ def get_dataset_features(
"Dataset not processed yet. The dataset was not processed yet, "
"features are not yet available. Please wait for a few minutes."
)
- raise DatasetNotProcessedError(msg, code=273)
+ raise DatasetNotProcessedError(msg)
if processing_state.error:
msg = "No features found. Additionally, dataset processed with error."
- raise DatasetProcessingError(msg, code=274)
+ raise DatasetProcessingError(msg)
msg = (
"No features found. "
"The dataset did not contain any features, or we could not extract them."
)
- raise DatasetNoFeaturesError(msg, code=272)
+ raise DatasetNoFeaturesError(msg)
return features
@@ -327,15 +326,15 @@ def update_dataset_status(
can_deactivate = dataset.uploader == user.user_id or UserGroup.ADMIN in user.groups
if status == DatasetStatus.DEACTIVATED and not can_deactivate:
msg = "Dataset is not owned by you."
- raise DatasetNotOwnedError(msg, code=693)
+ raise DatasetNotOwnedError(msg)
if status == DatasetStatus.ACTIVE and UserGroup.ADMIN not in user.groups:
msg = "Only administrators can activate datasets."
- raise DatasetAdminOnlyError(msg, code=696)
+ raise DatasetAdminOnlyError(msg)
current_status = database.datasets.get_status(dataset_id, expdb)
if current_status and current_status.status == status:
msg = "Illegal status transition."
- raise DatasetStatusTransitionError(msg, code=694)
+ raise DatasetStatusTransitionError(msg)
# If current status is unknown, it is effectively "in preparation",
# So the following transitions are allowed (first 3 transitions are first clause)
@@ -369,7 +368,7 @@ def get_dataset(
dataset_file := database.datasets.get_file(file_id=dataset.file_id, connection=user_db)
):
msg = "No data file found."
- raise DatasetNoDataFileError(msg, code=DatasetError.NO_DATA_FILE)
+ raise DatasetNoDataFileError(msg)
tags = database.datasets.get_tags_for(dataset_id, expdb_db)
description = database.datasets.get_description(dataset_id, expdb_db)
diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py
index a90a793..c369b14 100644
--- a/src/routers/openml/qualities.py
+++ b/src/routers/openml/qualities.py
@@ -7,7 +7,7 @@
import database.datasets
import database.qualities
from core.access import _user_has_access
-from core.errors import DatasetError, DatasetNotFoundError
+from core.errors import DatasetNotFoundError
from database.users import User
from routers.dependencies import expdb_connection, fetch_user
from schemas.datasets.openml import Quality
@@ -35,11 +35,12 @@ def get_qualities(
) -> list[Quality]:
dataset = database.datasets.get(dataset_id, expdb)
if not dataset or not _user_has_access(dataset, user):
- # Backwards compatibility: PHP API returns 412 PRECONDITION_FAILED
+ # Backwards compatibility: PHP API returns 412 with code 113
msg = "Unknown dataset."
+ no_data_file = 113
raise DatasetNotFoundError(
msg,
- code=DatasetError.NO_DATA_FILE,
+ code=no_data_file,
status_code=HTTPStatus.PRECONDITION_FAILED,
)
return database.qualities.get_for_dataset(dataset_id, expdb)
diff --git a/src/routers/openml/tasktype.py b/src/routers/openml/tasktype.py
index 18903c7..453f224 100644
--- a/src/routers/openml/tasktype.py
+++ b/src/routers/openml/tasktype.py
@@ -46,7 +46,7 @@ def get_task_type(
task_type_record = db_get_task_type(task_type_id, expdb)
if task_type_record is None:
msg = "Unknown task type."
- raise TaskTypeNotFoundError(msg, code=241)
+ raise TaskTypeNotFoundError(msg)
task_type = _normalize_task_type(task_type_record)
# Some names are quoted, or have typos in their comma-separation (e.g. 'A ,B')
diff --git a/tests/routers/openml/datasets_test.py b/tests/routers/openml/datasets_test.py
index 01c17b9..3b342fc 100644
--- a/tests/routers/openml/datasets_test.py
+++ b/tests/routers/openml/datasets_test.py
@@ -5,7 +5,6 @@
from starlette.testclient import TestClient
from core.errors import (
- DatasetError,
DatasetNoAccessError,
DatasetNotFoundError,
DatasetProcessingError,
@@ -100,7 +99,8 @@ def test_private_dataset_no_access(
)
assert e.value.status_code == HTTPStatus.FORBIDDEN
assert e.value.uri == DatasetNoAccessError.uri
- assert e.value.code == DatasetError.NO_ACCESS
+ no_access = 112
+ assert e.value.code == no_access
@pytest.mark.parametrize(