Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
e89ed3f
Added __repr__
JATAYU000 Dec 29, 2025
e7006d8
Merge branch 'main' into repr_openmlsplit
fkiraly Dec 31, 2025
e948f2e
[BUG] Temporarily fix issue #1586 by marking some failed tests as non…
EmanAbdelhaleem Jan 1, 2026
4a3aae6
[BUG] Fix Sklearn Models detection by safely importing openml-sklearn…
EmanAbdelhaleem Jan 1, 2026
8bbed43
refactor: updated OpenMLEvaluation to use dataclass decorator (#1559)
rohansen856 Jan 2, 2026
fa589ac
[MNT] Update Python version support and CI to include Python 3.14 (#1…
DDiyash Jan 2, 2026
5dfa47d
Added RepeMixin in utils
JATAYU000 Jan 3, 2026
a76333e
[MNT] add pytest marker to tests requiring test server (#1599)
geetu040 Jan 7, 2026
d755d4c
Updated OpenML classes which require repr
JATAYU000 Jan 8, 2026
8450715
Merge branch 'main' into repr_openmlsplit
fkiraly Jan 9, 2026
47bfec3
Merge branch 'main' into repr_openmlsplit
JATAYU000 Jan 22, 2026
d7c6406
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 22, 2026
b295d38
Class requires __hash__
JATAYU000 Jan 22, 2026
ff648f8
Merge branch 'main' into repr_openmlsplit
JATAYU000 Jan 26, 2026
c76f884
set __hash__ None
JATAYU000 Feb 6, 2026
26575dd
Revert "set __hash__ None"
JATAYU000 Feb 12, 2026
b5a09d9
Merge branch 'main' into pr/1567
fkiraly Feb 16, 2026
d053fad
Merge branch 'openml:main' into repr_openmlsplit
JATAYU000 Feb 16, 2026
c7ce60a
Fixes failures
JATAYU000 Feb 16, 2026
745d3bc
Merge branch 'main' into repr_openmlsplit
JATAYU000 Feb 17, 2026
1b831f2
Set hash none for split
JATAYU000 Feb 17, 2026
922df03
Merge branch 'main' into repr_openmlsplit
JATAYU000 Feb 17, 2026
fc20ecc
chore: rerun CI
JATAYU000 Feb 18, 2026
670f83d
Merge branch 'main' into repr_openmlsplit
JATAYU000 Feb 18, 2026
b2b81c6
dataclass overrides repr
JATAYU000 Feb 18, 2026
f33beac
Merge branch 'repr_openmlsplit'
JATAYU000 Feb 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 3 additions & 35 deletions openml/base.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,22 @@
# License: BSD 3-Clause
from __future__ import annotations

import re
import webbrowser
from abc import ABC, abstractmethod
from collections.abc import Iterable, Sequence
from collections.abc import Sequence

import xmltodict

import openml._api_calls
import openml.config
from openml.utils import ReprMixin

from .utils import _get_rest_api_type_alias, _tag_openml_base


class OpenMLBase(ABC):
class OpenMLBase(ReprMixin, ABC):
"""Base object for functionality that is shared across entities."""

def __repr__(self) -> str:
body_fields = self._get_repr_body_fields()
return self._apply_repr_template(body_fields)

@property
@abstractmethod
def id(self) -> int | None:
Expand Down Expand Up @@ -60,34 +56,6 @@ def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | N
"""
# Should be implemented in the base class.

def _apply_repr_template(
self,
body_fields: Iterable[tuple[str, str | int | list[str] | None]],
) -> str:
"""Generates the header and formats the body for string representation of the object.

Parameters
----------
body_fields: List[Tuple[str, str]]
A list of (name, value) pairs to display in the body of the __repr__.
"""
# We add spaces between capitals, e.g. ClassificationTask -> Classification Task
name_with_spaces = re.sub(
r"(\w)([A-Z])",
r"\1 \2",
self.__class__.__name__[len("OpenML") :],
)
header_text = f"OpenML {name_with_spaces}"
header = f"{header_text}\n{'=' * len(header_text)}\n"

_body_fields: list[tuple[str, str | int | list[str]]] = [
(k, "None" if v is None else v) for k, v in body_fields
]
longest_field_name_length = max(len(name) for name, _ in _body_fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _body_fields)
return header + body

@abstractmethod
def _to_dict(self) -> dict[str, dict]:
"""Creates a dictionary representation of self.
Expand Down
32 changes: 29 additions & 3 deletions openml/datasets/data_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
if TYPE_CHECKING:
from IPython.lib import pretty

from openml.utils import ReprMixin

class OpenMLDataFeature: # noqa: PLW1641

class OpenMLDataFeature(ReprMixin):
"""
Data Feature (a.k.a. Attribute) object.

Expand Down Expand Up @@ -74,11 +76,35 @@ def __init__( # noqa: PLR0913
self.number_missing_values = number_missing_values
self.ontologies = ontologies

def __repr__(self) -> str:
return f"[{self.index} - {self.name} ({self.data_type})]"
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields: dict[str, int | str | None] = {
"Index": self.index,
"Name": self.name,
"Data Type": self.data_type,
}

order = [
"Index",
"Name",
"Data Type",
]
return [(key, fields[key]) for key in order if key in fields]

def __eq__(self, other: Any) -> bool:
return isinstance(other, OpenMLDataFeature) and self.__dict__ == other.__dict__

def __hash__(self) -> int:
return hash(
(
self.index,
self.name,
self.data_type,
tuple(self.nominal_values) if self.nominal_values is not None else None,
self.number_missing_values,
tuple(self.ontologies) if self.ontologies is not None else None,
)
)

def _repr_pretty_(self, pp: pretty.PrettyPrinter, cycle: bool) -> None: # noqa: ARG002
pp.text(str(self))
44 changes: 15 additions & 29 deletions openml/setups/setup.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
# License: BSD 3-Clause
from __future__ import annotations

from collections.abc import Sequence
from dataclasses import asdict, dataclass
from typing import Any

import openml.config
import openml.flows
from openml.utils import ReprMixin


@dataclass
class OpenMLSetup:
@dataclass(repr=False)
class OpenMLSetup(ReprMixin):
"""Setup object (a.k.a. Configuration).

Parameters
Expand Down Expand Up @@ -45,31 +47,22 @@ def _to_dict(self) -> dict[str, Any]:
else None,
}

def __repr__(self) -> str:
header = "OpenML Setup"
header = f"{header}\n{'=' * len(header)}\n"

fields = {
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields: dict[str, int | str | None] = {
"Setup ID": self.setup_id,
"Flow ID": self.flow_id,
"Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
"# of Parameters": (
len(self.parameters) if self.parameters is not None else float("nan")
),
"# of Parameters": (len(self.parameters) if self.parameters is not None else "nan"),
}

# determines the order in which the information will be printed
order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
_fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, _ in _fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
return header + body
return [(key, fields[key]) for key in order if key in fields]


@dataclass
class OpenMLParameter:
@dataclass(repr=False)
class OpenMLParameter(ReprMixin):
"""Parameter object (used in setup).

Parameters
Expand Down Expand Up @@ -113,11 +106,9 @@ def _to_dict(self) -> dict[str, Any]:
result["id"] = result.pop("input_id")
return result

def __repr__(self) -> str:
header = "OpenML Parameter"
header = f"{header}\n{'=' * len(header)}\n"

fields = {
def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields: dict[str, int | str | None] = {
"ID": self.id,
"Flow ID": self.flow_id,
# "Flow Name": self.flow_name,
Expand Down Expand Up @@ -146,9 +137,4 @@ def __repr__(self) -> str:
parameter_default,
parameter_value,
]
_fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, _ in _fields)
field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
return header + body
return [(key, fields[key]) for key in order if key in fields]
23 changes: 22 additions & 1 deletion openml/tasks/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@

import pickle
from collections import OrderedDict
from collections.abc import Sequence
from pathlib import Path
from typing import Any
from typing_extensions import NamedTuple

import arff # type: ignore
import numpy as np

from openml.utils import ReprMixin


class Split(NamedTuple):
"""A single split of a dataset."""
Expand All @@ -18,7 +21,7 @@ class Split(NamedTuple):
test: np.ndarray


class OpenMLSplit: # noqa: PLW1641
class OpenMLSplit(ReprMixin):
"""OpenML Split object.

This class manages train-test splits for a dataset across multiple
Expand Down Expand Up @@ -63,6 +66,22 @@ def __init__(
self.folds = len(self.split[0])
self.samples = len(self.split[0][0])

def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str] | None]]:
"""Collect all information to display in the __repr__ body."""
fields = {
"Name": self.name,
"Description": (
self.description if len(self.description) <= 80 else self.description[:77] + "..."
),
"Repeats": self.repeats,
"Folds": self.folds,
"Samples": self.samples,
}

order = ["Name", "Description", "Repeats", "Folds", "Samples"]

return [(key, fields[key]) for key in order if key in fields]

def __eq__(self, other: Any) -> bool:
if (
(not isinstance(self, type(other)))
Expand Down Expand Up @@ -90,6 +109,8 @@ def __eq__(self, other: Any) -> bool:
return False
return True

__hash__ = None # type: ignore

@classmethod
def _from_arff_file(cls, filename: Path) -> OpenMLSplit: # noqa: C901, PLR0912
repetitions = None
Expand Down