openml · Omswastik-11 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -376,7 +376,8 @@ def initialize_model_from_run(run_id: int, *, strict_version: bool = True) -> An
     run = get_run(run_id)
     # TODO(eddiebergman): I imagine this is None if it's not published,
     # might need to raise an explicit error for that
-    assert run.setup_id is not None
+    if run.setup_id is None:
+        raise ValueError(f"Run {run_id} has no associated setup_id. Cannot initialize model.")
     return initialize_model(setup_id=run.setup_id, strict_version=strict_version)
 
 
@@ -416,7 +417,8 @@ def initialize_model_from_trace(
     run = get_run(run_id)
     # TODO(eddiebergman): I imagine this is None if it's not published,
     # might need to raise an explicit error for that
-    assert run.flow_id is not None
+    if run.flow_id is None:
+        raise ValueError(f"Run {run_id} has no associated flow_id. Cannot initialize model.")
 
     flow = get_flow(run.flow_id)
     run_trace = get_run_trace(run_id)
@@ -576,8 +578,10 @@ def _calculate_local_measure(  # type: ignore
             _user_defined_measures_fold[openml_name] = sklearn_fn(_test_y, _pred_y)
 
         if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-            assert test_y is not None
-            assert proba_y is not None
+            if test_y is None:
+                raise ValueError("test_y cannot be None for classification tasks.")
+            if proba_y is None:
+                raise ValueError("proba_y cannot be None for classification tasks.")
 
             for i, tst_idx in enumerate(test_indices):
                 if task.class_labels is not None:
@@ -622,7 +626,8 @@ def _calculate_local_measure(  # type: ignore
                 )
 
         elif isinstance(task, OpenMLRegressionTask):
-            assert test_y is not None
+            if test_y is None:
+                raise ValueError("test_y cannot be None for regression tasks.")
             for i, _ in enumerate(test_indices):
                 truth = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
                 arff_line = format_prediction(
@@ -743,7 +748,8 @@ def _run_task_get_arffcontent_parallel_helper(  # noqa: PLR0913
 
     if isinstance(task, OpenMLSupervisedTask):
         x, y = task.get_X_and_y()
-        assert isinstance(y, (pd.Series, pd.DataFrame))
+        if not isinstance(y, (pd.Series, pd.DataFrame)):
+            raise TypeError(f"y must be a pandas Series or DataFrame, got {type(y).__name__}")
         train_x = x.iloc[train_indices]
         train_y = y.iloc[train_indices]
         test_x = x.iloc[test_indices]
@@ -1213,7 +1219,11 @@ def __list_runs(api_call: str) -> pd.DataFrame:
             f'"http://openml.org/openml": {runs_dict}',
         )
 
-    assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(runs_dict["oml:runs"])
+    if not isinstance(runs_dict["oml:runs"]["oml:run"], list):
+        raise TypeError(
+            f"Expected runs_dict['oml:runs']['oml:run'] to be a list, "
+            f"got {type(runs_dict['oml:runs']).__name__}"
+        )
 
     runs = {
         int(r["oml:run_id"]): {

diff --git a/openml/runs/run.py b/openml/runs/run.py
@@ -389,6 +389,48 @@ def to_filesystem(
         if self.trace is not None:
             self.trace._to_filesystem(directory)
 
+    def _get_arff_attributes_for_task(self, task: OpenMLTask) -> list[tuple[str, Any]]:
+        """Get ARFF attributes based on task type.
+
+        Parameters
+        ----------
+        task : OpenMLTask
+            The task for which to generate attributes.
+
+        Returns
+        -------
+        list[tuple[str, Any]]
+            List of attribute tuples (name, type).
+        """
+        instance_specifications = [
+            ("repeat", "NUMERIC"),
+            ("fold", "NUMERIC"),
+        ]
+
+        if isinstance(task, (OpenMLLearningCurveTask, OpenMLClassificationTask)):
+            instance_specifications.append(("sample", "NUMERIC"))
+
+        instance_specifications.append(("row_id", "NUMERIC"))
+
+        if isinstance(task, (OpenMLLearningCurveTask, OpenMLClassificationTask)):
+            class_labels = task.class_labels
+            if class_labels is None:
+                raise ValueError("The task has no class labels")
+
+            prediction_confidences = [
+                ("confidence." + class_labels[i], "NUMERIC") for i in range(len(class_labels))
+            ]
+            prediction_and_true = [("prediction", class_labels), ("correct", class_labels)]
+            return instance_specifications + prediction_and_true + prediction_confidences
+
+        if isinstance(task, OpenMLRegressionTask):
+            return [*instance_specifications, ("prediction", "NUMERIC"), ("truth", "NUMERIC")]
+
+        if isinstance(task, OpenMLClusteringTask):
+            return [*instance_specifications, ("cluster", "NUMERIC")]
+
+        raise NotImplementedError(f"Task type {task.task_type!s} is not yet supported.")
+
     def _generate_arff_dict(self) -> OrderedDict[str, Any]:
         """Generates the arff dictionary for uploading predictions to the
         server.
@@ -406,7 +448,8 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]:
         if self.data_content is None:
             raise ValueError("Run has not been executed.")
         if self.flow is None:
-            assert self.flow_id is not None, "Run has no associated flow id!"
+            if self.flow_id is None:
+                raise ValueError("Run has no associated flow id!")
             self.flow = get_flow(self.flow_id)
 
         if self.description_text is None:
@@ -417,74 +460,7 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]:
         arff_dict["data"] = self.data_content
         arff_dict["description"] = self.description_text
         arff_dict["relation"] = f"openml_task_{task.task_id}_predictions"
-
-        if isinstance(task, OpenMLLearningCurveTask):
-            class_labels = task.class_labels
-            instance_specifications = [
-                ("repeat", "NUMERIC"),
-                ("fold", "NUMERIC"),
-                ("sample", "NUMERIC"),
-                ("row_id", "NUMERIC"),
-            ]
-
-            arff_dict["attributes"] = instance_specifications
-            if class_labels is not None:
-                arff_dict["attributes"] = (
-                    arff_dict["attributes"]
-                    + [("prediction", class_labels), ("correct", class_labels)]
-                    + [
-                        ("confidence." + class_labels[i], "NUMERIC")
-                        for i in range(len(class_labels))
-                    ]
-                )
-            else:
-                raise ValueError("The task has no class labels")
-
-        elif isinstance(task, OpenMLClassificationTask):
-            class_labels = task.class_labels
-            instance_specifications = [
-                ("repeat", "NUMERIC"),
-                ("fold", "NUMERIC"),
-                ("sample", "NUMERIC"),  # Legacy
-                ("row_id", "NUMERIC"),
-            ]
-
-            arff_dict["attributes"] = instance_specifications
-            if class_labels is not None:
-                prediction_confidences = [
-                    ("confidence." + class_labels[i], "NUMERIC") for i in range(len(class_labels))
-                ]
-                prediction_and_true = [("prediction", class_labels), ("correct", class_labels)]
-                arff_dict["attributes"] = (
-                    arff_dict["attributes"] + prediction_and_true + prediction_confidences
-                )
-            else:
-                raise ValueError("The task has no class labels")
-
-        elif isinstance(task, OpenMLRegressionTask):
-            arff_dict["attributes"] = [
-                ("repeat", "NUMERIC"),
-                ("fold", "NUMERIC"),
-                ("row_id", "NUMERIC"),
-                ("prediction", "NUMERIC"),
-                ("truth", "NUMERIC"),
-            ]
-
-        elif isinstance(task, OpenMLClusteringTask):
-            arff_dict["attributes"] = [
-                ("repeat", "NUMERIC"),
-                ("fold", "NUMERIC"),
-                ("row_id", "NUMERIC"),
-                ("cluster", "NUMERIC"),
-            ]
-
-        else:
-            raise NotImplementedError(
-                f"Task type '{task.task_type}' is not yet supported. "
-                f"Supported task types: Classification, Regression, Clustering, Learning Curve. "
-                f"Task ID: {task.task_id}. "
-                f"Please check the OpenML documentation for supported task types."
-            )
+        arff_dict["attributes"] = self._get_arff_attributes_for_task(task)
 
         return arff_dict
 
@@ -641,7 +617,10 @@ def _get_file_elements(self) -> dict:
 
         if self.parameter_settings is None:
             if self.flow is None:
-                assert self.flow_id is not None  # for mypy
+                if self.flow_id is None:
+                    raise ValueError(
+                        "Run has no associated flow_id and cannot obtain parameter values."
+                    )
                 self.flow = openml.flows.get_flow(self.flow_id)
             self.parameter_settings = self.flow.extension.obtain_parameter_values(
                 self.flow,

diff --git a/openml/runs/trace.py b/openml/runs/trace.py
@@ -94,7 +94,8 @@ def get_parameters(self) -> dict[str, Any]:
                 for param, value in self.setup_string.items()
             }
 
-        assert self.parameters is not None
+        if self.parameters is None:
+            raise ValueError("Parameters must be set before calling get_parameters().")
         return {param[len(PREFIX) :]: value for param, value in self.parameters.items()}
 
 
@@ -490,13 +491,21 @@ def merge_traces(cls, traces: list[OpenMLRunTrace]) -> OpenMLRunTrace:
             for iteration in trace:
                 key = (iteration.repeat, iteration.fold, iteration.iteration)
 
-                assert iteration.parameters is not None
+                if iteration.parameters is None:
+                    raise ValueError(
+                        f"Iteration parameters cannot be None for repeat {iteration.repeat}, "
+                        f"fold {iteration.fold}, iteration {iteration.iteration}"
+                    )
                 param_keys = iteration.parameters.keys()
 
                 if previous_iteration is not None:
                     trace_itr = merged_trace[previous_iteration]
 
-                    assert trace_itr.parameters is not None
+                    if trace_itr.parameters is None:
+                        raise ValueError(
+                            f"Trace iteration parameters cannot be None "
+                            f"for iteration {previous_iteration}"
+                        )
                     trace_itr_keys = trace_itr.parameters.keys()
 
                     if list(param_keys) != list(trace_itr_keys):