From ab2e5b478ae880d04f6ab02c49e96e26d0eabac5 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 20 Jan 2026 16:14:21 +0000 Subject: [PATCH 1/7] Remove the 'python' feature from vortex-error Signed-off-by: Adam Gutglick --- vortex-error/Cargo.toml | 1 - vortex-error/src/lib.rs | 3 -- vortex-error/src/python.rs | 24 --------- vortex-python/Cargo.toml | 2 +- vortex-python/src/arrays/builtins/struct_.rs | 3 +- vortex-python/src/arrays/compressed.rs | 3 +- vortex-python/src/arrays/from_arrow.rs | 16 +++--- vortex-python/src/arrays/into_array.rs | 11 +++- vortex-python/src/arrays/mod.rs | 57 +++++++++++--------- vortex-python/src/arrays/py/array.rs | 4 +- vortex-python/src/arrays/py/mod.rs | 5 +- vortex-python/src/arrays/py/python.rs | 3 +- vortex-python/src/arrays/py/vtable.rs | 8 ++- vortex-python/src/compress.rs | 3 +- vortex-python/src/dataset.rs | 20 +++---- vortex-python/src/dtype/factory.rs | 5 +- vortex-python/src/dtype/mod.rs | 9 ++-- vortex-python/src/error.rs | 40 ++++++++++++++ vortex-python/src/expr/mod.rs | 3 +- vortex-python/src/file.rs | 19 ++++--- vortex-python/src/io.rs | 12 +++-- vortex-python/src/iter/mod.rs | 9 ++-- vortex-python/src/iter/python.rs | 3 +- vortex-python/src/lib.rs | 1 + vortex-python/src/registry.rs | 3 +- vortex-python/src/scalar/factory.rs | 6 ++- vortex-python/src/scalar/struct_.rs | 8 +-- vortex-python/src/scan.rs | 15 +++--- vortex-python/src/serde/mod.rs | 27 +++++----- vortex-python/src/serde/parts.rs | 16 ++++-- vortex/Cargo.toml | 1 - 31 files changed, 205 insertions(+), 135 deletions(-) delete mode 100644 vortex-error/src/python.rs create mode 100644 vortex-python/src/error.rs diff --git a/vortex-error/Cargo.toml b/vortex-error/Cargo.toml index 859183b1bdd..88d9850bfbc 100644 --- a/vortex-error/Cargo.toml +++ b/vortex-error/Cargo.toml @@ -18,7 +18,6 @@ all-features = true [features] flatbuffers = ["dep:flatbuffers"] -python = ["dep:pyo3"] serde = ["dep:serde_json"] [dependencies] diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index bf1079f5e66..61a1f28df40 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -6,9 +6,6 @@ //! This crate defines error & result types for Vortex. //! It also contains a variety of useful macros for error handling. -#[cfg(feature = "python")] -pub mod python; - use std::backtrace::Backtrace; use std::borrow::Cow; use std::convert::Infallible; diff --git a/vortex-error/src/python.rs b/vortex-error/src/python.rs deleted file mode 100644 index 6d3dad0af5c..00000000000 --- a/vortex-error/src/python.rs +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Python bindings for Vortex errors. - -use std::backtrace::Backtrace; - -use pyo3::PyErr; -#[cfg(feature = "python")] -use pyo3::exceptions::PyValueError; - -use crate::VortexError; - -impl From for PyErr { - fn from(value: VortexError) -> Self { - PyValueError::new_err(value.to_string()) - } -} - -impl From for VortexError { - fn from(value: PyErr) -> Self { - VortexError::InvalidArgument(value.to_string().into(), Box::new(Backtrace::capture())) - } -} diff --git a/vortex-python/Cargo.toml b/vortex-python/Cargo.toml index 5f9a85d0835..47986dc7fb1 100644 --- a/vortex-python/Cargo.toml +++ b/vortex-python/Cargo.toml @@ -43,7 +43,7 @@ tokio = { workspace = true, features = ["fs", "rt-multi-thread"] } # This feature makes the underlying tracing logs to be emitted as `log` events tracing = { workspace = true, features = ["std", "log"] } url = { workspace = true } -vortex = { workspace = true, features = ["object_store", "python", "tokio"] } +vortex = { workspace = true, features = ["object_store", "tokio"] } [dev-dependencies] vortex-array = { workspace = true, features = ["_test-harness"] } diff --git a/vortex-python/src/arrays/builtins/struct_.rs b/vortex-python/src/arrays/builtins/struct_.rs index 53547a25132..0098948eb3d 100644 --- a/vortex-python/src/arrays/builtins/struct_.rs +++ b/vortex-python/src/arrays/builtins/struct_.rs @@ -12,6 +12,7 @@ use crate::arrays::PyArrayRef; use crate::arrays::native::AsArrayRef; use crate::arrays::native::EncodingSubclass; use crate::arrays::native::PyNativeArray; +use crate::error::PyVortexError; /// Concrete class for arrays with `vortex.struct` encoding. #[pyclass(name = "StructArray", module = "vortex", extends=PyNativeArray, frozen)] @@ -24,7 +25,7 @@ impl EncodingSubclass for PyStructArray { #[pymethods] impl PyStructArray { /// Returns the given field of the struct array. - pub fn field(self_: PyRef<'_, Self>, name: &str) -> PyResult { + pub fn field(self_: PyRef<'_, Self>, name: &str) -> Result { let field = self_.as_array_ref().field_by_name(name)?.clone(); Ok(PyArrayRef::from(field)) } diff --git a/vortex-python/src/arrays/compressed.rs b/vortex-python/src/arrays/compressed.rs index c294d7ee5c0..ba68a56b834 100644 --- a/vortex-python/src/arrays/compressed.rs +++ b/vortex-python/src/arrays/compressed.rs @@ -19,6 +19,7 @@ use crate::PyVortex; use crate::arrays::PyArrayRef; use crate::arrays::native::EncodingSubclass; use crate::arrays::native::PyNativeArray; +use crate::error::PyVortexError; /// Concrete class for arrays with `vortex.alp` encoding. #[pyclass(name = "AlpArray", module = "vortex", extends=PyNativeArray, frozen)] @@ -87,7 +88,7 @@ impl EncodingSubclass for PyZigZagArray { #[pymethods] impl PyZigZagArray { #[staticmethod] - pub fn encode(array: PyArrayRef) -> PyResult { + pub fn encode(array: PyArrayRef) -> Result { Ok(PyVortex( zigzag_encode(array.inner().clone().to_primitive())?.into_array(), )) diff --git a/vortex-python/src/arrays/from_arrow.rs b/vortex-python/src/arrays/from_arrow.rs index 0578cd9f0bc..14a57db5940 100644 --- a/vortex-python/src/arrays/from_arrow.rs +++ b/vortex-python/src/arrays/from_arrow.rs @@ -7,7 +7,6 @@ use arrow_array::make_array; use arrow_data::ArrayData as ArrowArrayData; use arrow_schema::DataType; use arrow_schema::Field; -use itertools::Itertools; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use vortex::array::ArrayRef; @@ -17,13 +16,13 @@ use vortex::array::arrow::FromArrowArray; use vortex::dtype::DType; use vortex::dtype::arrow::FromArrowType; use vortex::error::VortexError; -use vortex::error::VortexResult; use crate::arrays::PyArrayRef; use crate::arrow::FromPyArrow; +use crate::error::PyVortexError; /// Convert an Arrow object to a Vortex array. -pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> PyResult { +pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> Result { let pa = obj.py().import("pyarrow")?; let pa_array = pa.getattr("Array")?; let chunked_array = pa.getattr("ChunkedArray")?; @@ -56,15 +55,14 @@ pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> PyResult let dtype = DType::from_arrow(array_stream.schema()); let chunks = array_stream .into_iter() - .map(|b| b.map_err(VortexError::from)) - .map_ok(|b| ArrayRef::from_arrow(b, false)) - .collect::>>()?; + .map(|b| -> Result<_, PyVortexError> { + Ok(ArrayRef::from_arrow(b.map_err(VortexError::from)?, false)) + }) + .collect::, _>>()?; Ok(PyArrayRef::from( ChunkedArray::try_new(chunks, dtype)?.into_array(), )) } else { - Err(PyValueError::new_err( - "Cannot convert object to Vortex array", - )) + Err(PyValueError::new_err("Cannot convert object to Vortex array").into()) } } diff --git a/vortex-python/src/arrays/into_array.rs b/vortex-python/src/arrays/into_array.rs index 83e42794463..65c32bf1ceb 100644 --- a/vortex-python/src/arrays/into_array.rs +++ b/vortex-python/src/arrays/into_array.rs @@ -9,6 +9,7 @@ use pyo3::Borrowed; use pyo3::FromPyObject; use pyo3::PyAny; use pyo3::PyErr; +use pyo3::exceptions::PyRuntimeError; use pyo3::exceptions::PyTypeError; use pyo3::types::PyAnyMethods; use vortex::array::ArrayRef; @@ -17,6 +18,7 @@ use vortex::array::iter::ArrayIteratorAdapter; use vortex::array::iter::ArrayIteratorExt; use vortex::dtype::DType; use vortex::dtype::arrow::FromArrowType as _; +use vortex::error::VortexError; use vortex::error::VortexResult; use crate::PyVortex; @@ -64,9 +66,14 @@ impl<'py> FromPyObject<'_, 'py> for PyIntoArray { let vortex_iter = arrow_stream .into_iter() .map(|batch_result| -> VortexResult<_> { - Ok(ArrayRef::from_arrow(batch_result?, false)) + Ok(ArrayRef::from_arrow( + batch_result.map_err(VortexError::from)?, + false, + )) }); - let array = ArrayIteratorAdapter::new(dtype, vortex_iter).read_all()?; + let array = ArrayIteratorAdapter::new(dtype, vortex_iter) + .read_all() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; return Ok(PyIntoArray(PyVortex(array))); } diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index 57412dd28b1..b8a248d0dab 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -33,7 +33,6 @@ use vortex::dtype::DType; use vortex::dtype::Nullability; use vortex::dtype::PType; use vortex::dtype::match_each_integer_ptype; -use vortex::error::VortexError; use vortex::ipc::messages::EncoderMessage; use vortex::ipc::messages::MessageEncoder; @@ -43,6 +42,7 @@ use crate::arrays::py::PyPythonArray; use crate::arrays::py::PythonArray; use crate::arrow::ToPyArrow; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::install_module; use crate::python_repr::PythonRepr; use crate::scalar::PyScalar; @@ -112,7 +112,7 @@ impl<'py> FromPyObject<'_, 'py> for PyArrayRef { impl<'py> IntoPyObject<'py> for PyArrayRef { type Target = PyAny; type Output = Bound<'py, PyAny>; - type Error = VortexError; + type Error = PyVortexError; fn into_pyobject(self, py: Python<'py>) -> Result { // If the ArrayRef is a PyArrayInstance, extract the Python object. @@ -214,7 +214,7 @@ impl PyArray { /// ------- /// :class:`~vortex.Array` #[staticmethod] - fn from_arrow(obj: Bound<'_, PyAny>) -> PyResult { + fn from_arrow(obj: Bound<'_, PyAny>) -> Result { from_arrow::from_arrow(&obj.as_borrowed()) } @@ -257,7 +257,10 @@ impl PyArray { /// ``` #[staticmethod] #[pyo3(signature = (range, *, dtype = None))] - fn from_range(range: Bound, dtype: Option>) -> PyResult { + fn from_range( + range: Bound, + dtype: Option>, + ) -> Result { let range = range.cast::()?; let start = range.start()?; let stop = range.stop()?; @@ -268,7 +271,8 @@ impl PyArray { let DType::Primitive(ptype, ..) = &dtype else { return Err(PyValueError::new_err( "Cannot construct non-numeric array from a range.", - )); + ) + .into()); }; (*ptype, dtype) } else { @@ -313,7 +317,9 @@ impl PyArray { /// ] /// ``` /// - fn to_arrow_array<'py>(self_: &'py Bound<'py, Self>) -> PyResult> { + fn to_arrow_array<'py>( + self_: &'py Bound<'py, Self>, + ) -> Result, PyVortexError> { // NOTE(ngates): for struct arrays, we could also return a RecordBatchStreamReader. let array = PyArrayRef::extract(self_.as_any().as_borrowed())?.into_inner(); let py = self_.py(); @@ -326,8 +332,10 @@ impl PyArray { let chunks = chunked_array .chunks() .iter() - .map(|chunk| PyResult::Ok(chunk.clone().into_arrow(&arrow_dtype)?)) - .collect::>>()?; + .map(|chunk| -> Result<_, PyVortexError> { + Ok(chunk.clone().into_arrow(&arrow_dtype)?) + }) + .collect::, _>>()?; let pa_data_type = arrow_dtype.clone().to_pyarrow(py)?; let chunks = chunks @@ -339,11 +347,11 @@ impl PyArray { PyDict::from_sequence(&PyList::new(py, vec![("type", pa_data_type)])?.into_any())?; // Combine into a chunked array - PyModule::import(py, "pyarrow")?.call_method( + Ok(PyModule::import(py, "pyarrow")?.call_method( "chunked_array", (PyList::new(py, chunks)?,), Some(&kwargs), - ) + )?) } else { Ok(array .clone() @@ -417,42 +425,42 @@ impl PyArray { } ///Rust docs are *not* copied into Python for __lt__: https://github.com/PyO3/pyo3/issues/4326 - fn __lt__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __lt__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&slf, &*other, Operator::Lt)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __le__: https://github.com/PyO3/pyo3/issues/4326 - fn __le__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __le__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Lte)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __eq__: https://github.com/PyO3/pyo3/issues/4326 - fn __eq__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __eq__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Eq)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __ne__: https://github.com/PyO3/pyo3/issues/4326 - fn __ne__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __ne__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::NotEq)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __ge__: https://github.com/PyO3/pyo3/issues/4326 - fn __ge__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __ge__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Gte)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __gt__: https://github.com/PyO3/pyo3/issues/4326 - fn __gt__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __gt__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Gt)?; Ok(PyArrayRef::from(inner)) @@ -486,7 +494,7 @@ impl PyArray { /// 5 /// ] /// ``` - fn filter(slf: Bound, mask: PyArrayRef) -> PyResult { + fn filter(slf: Bound, mask: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let mask = (&*mask as &dyn Array).to_bool().to_mask_fill_null_false(); let inner = vortex::compute::filter(&*slf, &mask)?; @@ -563,10 +571,10 @@ impl PyArray { /// OverflowError: can't convert negative int to unsigned /// ``` // TODO(ngates): return a vortex.Scalar - fn scalar_at(slf: Bound, index: usize) -> PyResult> { + fn scalar_at(slf: Bound, index: usize) -> Result, PyVortexError> { let py = slf.py(); let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); - PyScalar::init(py, slf.scalar_at(index)?) + Ok(PyScalar::init(py, slf.scalar_at(index)?)?) } /// Filter, permute, and/or repeat elements by their index. @@ -611,14 +619,15 @@ impl PyArray { /// "a" /// ] /// ``` - fn take(slf: Bound, indices: PyArrayRef) -> PyResult { + fn take(slf: Bound, indices: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); if !indices.dtype().is_int() { return Err(PyValueError::new_err(format!( "indices: expected int or uint array, but found: {}", indices.dtype().python_repr() - ))); + )) + .into()); } let inner = take(&slf, &*indices)?; @@ -668,7 +677,7 @@ impl PyArray { .to_string()) } - fn serialize(slf: &Bound, ctx: &PyArrayContext) -> PyResult>> { + fn serialize(slf: &Bound, ctx: &PyArrayContext) -> Result>, PyVortexError> { // FIXME(ngates): do not copy to vec, use buffer protocol let array = PyArrayRef::extract(slf.as_any().as_borrowed())?; Ok(array @@ -684,7 +693,7 @@ impl PyArray { /// the data needed for pickle to reconstruct the array. fn __reduce__<'py>( slf: &'py Bound<'py, Self>, - ) -> PyResult<(Bound<'py, PyAny>, Bound<'py, PyAny>)> { + ) -> Result<(Bound<'py, PyAny>, Bound<'py, PyAny>), PyVortexError> { let py = slf.py(); let array = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); @@ -711,7 +720,7 @@ impl PyArray { fn __reduce_ex__<'py>( slf: &'py Bound<'py, Self>, protocol: i32, - ) -> PyResult<(Bound<'py, PyAny>, Bound<'py, PyAny>)> { + ) -> Result<(Bound<'py, PyAny>, Bound<'py, PyAny>), PyVortexError> { let py = slf.py(); if protocol < 5 { diff --git a/vortex-python/src/arrays/py/array.rs b/vortex-python/src/arrays/py/array.rs index 3d471ec52de..ab65e3fece1 100644 --- a/vortex-python/src/arrays/py/array.rs +++ b/vortex-python/src/arrays/py/array.rs @@ -11,9 +11,9 @@ use pyo3::prelude::*; use vortex::array::stats::ArrayStats; use vortex::array::vtable::ArrayId; use vortex::dtype::DType; -use vortex::error::VortexError; use crate::arrays::py::PyPythonArray; +use crate::error::PyVortexError; /// Wrapper struct encapsulating a Vortex array implemented using a Python object. /// @@ -48,7 +48,7 @@ impl<'py> FromPyObject<'_, 'py> for PythonArray { impl<'py> IntoPyObject<'py> for PythonArray { type Target = PyAny; type Output = Bound<'py, PyAny>; - type Error = VortexError; + type Error = PyVortexError; fn into_pyobject(self, py: Python<'py>) -> Result { Ok(self.object.bind(py).to_owned()) diff --git a/vortex-python/src/arrays/py/mod.rs b/vortex-python/src/arrays/py/mod.rs index de02a866c2d..0b2f88dd20b 100644 --- a/vortex-python/src/arrays/py/mod.rs +++ b/vortex-python/src/arrays/py/mod.rs @@ -12,11 +12,12 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::PyAnyMethods; pub(crate) use python::*; use vortex::array::vtable::ArrayId; -use vortex::error::VortexResult; pub(crate) use vtable::*; +use crate::error::PyVortexError; + /// Extract the array id from a Python class `id` attribute. -pub fn id_from_obj(cls: &Bound) -> VortexResult { +pub fn id_from_obj(cls: &Bound) -> Result { Ok(ArrayId::new_arc( cls.getattr("id") .map_err(|_| { diff --git a/vortex-python/src/arrays/py/python.rs b/vortex-python/src/arrays/py/python.rs index e5e5c5c1859..c8160255d66 100644 --- a/vortex-python/src/arrays/py/python.rs +++ b/vortex-python/src/arrays/py/python.rs @@ -10,6 +10,7 @@ use vortex::dtype::DType; use crate::arrays::PyArray; use crate::arrays::py::id_from_obj; use crate::dtype::PyDType; +use crate::error::PyVortexError; /// Base class for implementing a Vortex encoding in Python. /// @@ -31,7 +32,7 @@ impl PyPythonArray { cls: &Bound<'_, PyType>, len: usize, dtype: PyDType, - ) -> PyResult> { + ) -> Result, PyVortexError> { let id = id_from_obj(cls)?; Ok(PyClassInitializer::from(PyArray).add_subclass(Self { id, diff --git a/vortex-python/src/arrays/py/vtable.rs b/vortex-python/src/arrays/py/vtable.rs index 8e65e50993e..d250d477b01 100644 --- a/vortex-python/src/arrays/py/vtable.rs +++ b/vortex-python/src/arrays/py/vtable.rs @@ -65,13 +65,17 @@ impl VTable for PythonVTable { fn metadata(array: &PythonArray) -> VortexResult { Python::attach(|py| { let obj = array.object.bind(py); - if !obj.hasattr(intern!(py, "metadata"))? { + if !obj + .hasattr(intern!(py, "metadata")) + .map_err(|e| vortex_err!("{}", e))? + { // The class does not have a metadata attribute so does not support serialization. return Ok(RawMetadata(vec![])); } let bytes = obj - .call_method("__vx_metadata__", (), None)? + .call_method("__vx_metadata__", (), None) + .map_err(|e| vortex_err!("{}", e))? .cast::() .map_err(|_| vortex_err!("Expected array metadata to be Python bytes"))? .as_bytes() diff --git a/vortex-python/src/compress.rs b/vortex-python/src/compress.rs index 093fdba7512..5d2c3bd93c9 100644 --- a/vortex-python/src/compress.rs +++ b/vortex-python/src/compress.rs @@ -5,6 +5,7 @@ use pyo3::prelude::*; use vortex::compressor::BtrBlocksCompressor; use crate::arrays::PyArrayRef; +use crate::error::PyVortexError; use crate::install_module; pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { @@ -49,7 +50,7 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { /// >>> str(vx.compress(a)) /// 'vortex.alp(f64?, len=1000)' #[pyfunction] -pub fn compress(array: PyArrayRef) -> PyResult { +pub fn compress(array: PyArrayRef) -> Result { let compressed = BtrBlocksCompressor::default().compress(array.inner())?; Ok(PyArrayRef::from(compressed)) } diff --git a/vortex-python/src/dataset.rs b/vortex-python/src/dataset.rs index b959d3b5362..b8f769974b8 100644 --- a/vortex-python/src/dataset.rs +++ b/vortex-python/src/dataset.rs @@ -29,6 +29,7 @@ use crate::TOKIO_RUNTIME; use crate::arrays::PyArrayRef; use crate::arrow::IntoPyArrow; use crate::arrow::ToPyArrow; +use crate::error::PyVortexError; use crate::expr::PyExpr; use crate::install_module; use crate::object_store_urls::object_store_from_url; @@ -133,7 +134,7 @@ impl PyVortexDataset { row_filter: Option<&Bound<'py, PyExpr>>, indices: Option, row_range: Option<(u64, u64)>, - ) -> PyResult { + ) -> Result { let array = read_array_from_reader( &self.vxf, projection_from_python(columns)?, @@ -151,7 +152,7 @@ impl PyVortexDataset { row_filter: Option<&Bound<'_, PyExpr>>, split_by: Option, row_range: Option<(u64, u64)>, - ) -> PyResult> { + ) -> Result, PyVortexError> { let mut scan = self_ .vxf .scan()? @@ -167,7 +168,7 @@ impl PyVortexDataset { let reader: Box = Box::new(scan.into_record_batch_reader(schema, &*RUNTIME)?); - reader.into_pyarrow(self_.py()) + Ok(reader.into_pyarrow(self_.py())?) } /// The number of rows matching the filter. @@ -177,13 +178,15 @@ impl PyVortexDataset { row_filter: Option<&Bound<'_, PyExpr>>, split_by: Option, row_range: Option<(u64, u64)>, - ) -> PyResult { + ) -> Result { if row_filter.is_none() { let row_count = match row_range { Some(range) => range.1 - range.0, None => self_.vxf.row_count(), }; - return row_count.try_into().map_err(PyValueError::new_err); + return row_count + .try_into() + .map_err(|e| PyValueError::new_err(e).into()); } let mut scan = self_ @@ -200,15 +203,14 @@ impl PyVortexDataset { let n_rows: usize = scan .into_array_iter(&*RUNTIME)? .map_ok(|array| array.len()) - .process_results(|iter| iter.sum()) - .map_err(|err| PyValueError::new_err(format!("vortex error: {}", err)))?; + .process_results(|iter| iter.sum())?; Ok(n_rows) } /// The natural splits of this Dataset. #[pyo3(signature = (*))] - pub fn splits(&self) -> VortexResult> { + pub fn splits(&self) -> Result, PyVortexError> { Ok(self .vxf .splits()? @@ -219,6 +221,6 @@ impl PyVortexDataset { } #[pyfunction] -pub fn dataset_from_url(py: Python, url: &str) -> PyResult { +pub fn dataset_from_url(py: Python, url: &str) -> Result { Ok(py.detach(|| TOKIO_RUNTIME.block_on(PyVortexDataset::from_url(url)))?) } diff --git a/vortex-python/src/dtype/factory.rs b/vortex-python/src/dtype/factory.rs index 07200ad720a..200ac642c4a 100644 --- a/vortex-python/src/dtype/factory.rs +++ b/vortex-python/src/dtype/factory.rs @@ -21,6 +21,7 @@ use vortex::dtype::PType; use vortex::dtype::StructFields; use crate::dtype::PyDType; +use crate::error::PyVortexError; /// Construct the data type for a column containing only the null value. /// @@ -260,9 +261,9 @@ pub(super) fn dtype_decimal( precision: u8, scale: i8, nullable: bool, -) -> PyResult> { +) -> Result, PyVortexError> { let decimal_type = DType::Decimal(DecimalDType::try_new(precision, scale)?, nullable.into()); - PyDType::init(py, decimal_type) + Ok(PyDType::init(py, decimal_type)?) } /// Construct a UTF-8-encoded string data type. diff --git a/vortex-python/src/dtype/mod.rs b/vortex-python/src/dtype/mod.rs index 405d08a9280..1d3e42f63b0 100644 --- a/vortex-python/src/dtype/mod.rs +++ b/vortex-python/src/dtype/mod.rs @@ -47,6 +47,7 @@ use crate::dtype::null::PyNullDType; use crate::dtype::primitive::PyPrimitiveDType; use crate::dtype::struct_::PyStructDType; use crate::dtype::utf8::PyUtf8DType; +use crate::error::PyVortexError; use crate::install_module; use crate::python_repr::PythonRepr; @@ -153,12 +154,12 @@ impl PyDType { #[pymethods] impl PyDType { - fn to_arrow_type(&self, py: Python) -> PyResult> { - self.0.to_arrow_dtype()?.to_pyarrow(py) + fn to_arrow_type(&self, py: Python) -> Result, PyVortexError> { + Ok(self.0.to_arrow_dtype()?.to_pyarrow(py)?) } - fn to_arrow_schema(&self, py: Python) -> PyResult> { - self.0.to_arrow_schema()?.to_pyarrow(py) + fn to_arrow_schema(&self, py: Python) -> Result, PyVortexError> { + Ok(self.0.to_arrow_schema()?.to_pyarrow(py)?) } fn __str__(&self) -> String { diff --git a/vortex-python/src/error.rs b/vortex-python/src/error.rs new file mode 100644 index 00000000000..ff283472e8f --- /dev/null +++ b/vortex-python/src/error.rs @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use pyo3::CastError; +use pyo3::PyErr; +use pyo3::exceptions::PyRuntimeError; +use vortex::error::VortexError; + +/// Error type to merge [`VortexError`] and [`PyErr`]. +pub enum PyVortexError { + Py(PyErr), + Vortex(VortexError), +} + +impl From for PyVortexError { + fn from(value: PyErr) -> Self { + Self::Py(value) + } +} + +impl From for PyVortexError { + fn from(value: VortexError) -> Self { + Self::Vortex(value) + } +} + +impl<'py, 'a> From> for PyVortexError { + fn from(value: CastError<'py, 'a>) -> Self { + Self::Py(value.into()) + } +} + +impl From for PyErr { + fn from(value: PyVortexError) -> Self { + match value { + PyVortexError::Py(py) => py, + PyVortexError::Vortex(vx) => PyRuntimeError::new_err(vx.to_string()), + } + } +} diff --git a/vortex-python/src/expr/mod.rs b/vortex-python/src/expr/mod.rs index 3d74c005361..cf47f260844 100644 --- a/vortex-python/src/expr/mod.rs +++ b/vortex-python/src/expr/mod.rs @@ -22,6 +22,7 @@ use vortex::expr::not; use crate::arrays::PyArrayRef; use crate::arrays::into_array::PyIntoArray; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::install_module; use crate::scalar::factory::scalar_helper; @@ -213,7 +214,7 @@ impl PyExpr { /// vortex.open : Open an on-disk Vortex array for scanning with an expression. /// vortex.VortexFile : An on-disk Vortex array ready to scan with an expression. /// vortex.VortexFile.scan : Scan an on-disk Vortex array with an expression. - fn evaluate(self_: PyRef<'_, Self>, array: PyIntoArray) -> PyResult { + fn evaluate(self_: PyRef<'_, Self>, array: PyIntoArray) -> Result { Ok(PyArrayRef::from(self_.evaluate(array.inner())?)) } } diff --git a/vortex-python/src/file.rs b/vortex-python/src/file.rs index 1b3f1f94eb4..3cefde9f0d0 100644 --- a/vortex-python/src/file.rs +++ b/vortex-python/src/file.rs @@ -31,6 +31,7 @@ use crate::arrays::PyArrayRef; use crate::arrow::IntoPyArrow; use crate::dataset::PyVortexDataset; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::expr::PyExpr; use crate::install_module; use crate::iter::PyArrayIterator; @@ -49,7 +50,11 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { #[pyfunction] #[pyo3(signature = (path, *, without_segment_cache = false))] -pub fn open(py: Python, path: &str, without_segment_cache: bool) -> PyResult { +pub fn open( + py: Python, + path: &str, + without_segment_cache: bool, +) -> Result { let vxf = py.detach(|| { TOKIO_RUNTIME.block_on(async move { let mut options = SESSION.open_options(); @@ -89,7 +94,7 @@ impl PyVortexFile { expr: Option, indices: Option, batch_size: Option, - ) -> PyResult { + ) -> Result { let builder = slf.get().scan_builder( projection.map(|p| p.0), expr.map(|e| e.into_inner()), @@ -109,7 +114,7 @@ impl PyVortexFile { expr: Option, indices: Option, batch_size: Option, - ) -> PyResult { + ) -> Result { let builder = slf.get().scan_builder( projection.map(|p| p.0), expr.map(|e| e.into_inner()), @@ -131,7 +136,7 @@ impl PyVortexFile { projection: Option, expr: Option, batch_size: Option, - ) -> PyResult> { + ) -> Result, PyVortexError> { let vxf = slf.get().vxf.clone(); let reader = slf.py().detach(|| { @@ -149,15 +154,15 @@ impl PyVortexFile { })?; let rbr: Box = Box::new(reader); - rbr.into_pyarrow(slf.py()) + Ok(rbr.into_pyarrow(slf.py())?) } - fn to_dataset(slf: Bound) -> PyResult { + fn to_dataset(slf: Bound) -> Result { Ok(PyVortexDataset::try_new(slf.get().vxf.clone())?) } #[pyo3(signature = (*))] - pub fn splits(&self) -> VortexResult> { + pub fn splits(&self) -> Result, PyVortexError> { Ok(self .vxf .splits()? diff --git a/vortex-python/src/io.rs b/vortex-python/src/io.rs index 4b70ff366d6..4f42504cb0b 100644 --- a/vortex-python/src/io.rs +++ b/vortex-python/src/io.rs @@ -29,6 +29,7 @@ use crate::arrays::PyArray; use crate::arrays::PyArrayRef; use crate::arrow::FromPyArrow; use crate::dataset::PyVortexDataset; +use crate::error::PyVortexError; use crate::expr::PyExpr; use crate::install_module; use crate::iter::PyArrayIterator; @@ -111,7 +112,7 @@ pub fn read_url<'py>( row_filter: Option<&Bound<'py, PyExpr>>, indices: Option, row_range: Option<(u64, u64)>, -) -> PyResult { +) -> Result { let dataset = py.detach(|| TOKIO_RUNTIME.block_on(PyVortexDataset::from_url(url)))?; dataset.to_array(projection, row_filter, indices, row_range) } @@ -168,7 +169,7 @@ pub fn read_url<'py>( /// :func:`vortex.io.VortexWriteOptions` #[pyfunction] #[pyo3(signature = (iter, path))] -pub fn write(py: Python, iter: PyIntoArrayIterator, path: &str) -> PyResult<()> { +pub fn write(py: Python, iter: PyIntoArrayIterator, path: &str) -> Result<(), PyVortexError> { py.detach(|| { TOKIO_RUNTIME.block_on(async move { let file = File::create(path).await?; @@ -282,7 +283,12 @@ impl PyVortexWriteOptions { /// /// :func:`vortex.io.write` #[pyo3(signature = (iter, path))] - pub fn write_path(&self, py: Python, iter: PyIntoArrayIterator, path: &str) -> PyResult<()> { + pub fn write_path( + &self, + py: Python, + iter: PyIntoArrayIterator, + path: &str, + ) -> Result<(), PyVortexError> { py.detach(|| { TOKIO_RUNTIME.block_on(async move { let file = File::create(path).await?; diff --git a/vortex-python/src/iter/mod.rs b/vortex-python/src/iter/mod.rs index 77db4badf58..23a108114e6 100644 --- a/vortex-python/src/iter/mod.rs +++ b/vortex-python/src/iter/mod.rs @@ -29,6 +29,7 @@ use vortex::dtype::DType; use crate::arrays::PyArrayRef; use crate::arrow::IntoPyArrow; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::install_module; use crate::iter::python::PythonArrayIterator; @@ -81,7 +82,7 @@ impl PyArrayIterator { } /// Returns the next chunk from the iterator. - fn __next__(&self, py: Python) -> PyResult> { + fn __next__(&self, py: Python) -> Result, PyVortexError> { py.detach(|| { Ok(self .iter @@ -95,7 +96,7 @@ impl PyArrayIterator { /// Read all chunks into a single :class:`vortex.Array`. If there are multiple chunks, /// this will be a :class:`vortex.ChunkedArray`, otherwise it will be a single array. - fn read_all(&self, py: Python) -> PyResult { + fn read_all(&self, py: Python) -> Result { let array = py.detach(|| { if let Some(iter) = self.iter.lock().take() { iter.read_all() @@ -110,7 +111,7 @@ impl PyArrayIterator { /// Convert the :class:`vortex.ArrayIterator` into a :class:`pyarrow.RecordBatchReader`. /// /// Note that this performs the conversion on the current thread. - fn to_arrow(slf: Bound) -> PyResult> { + fn to_arrow(slf: Bound) -> Result, PyVortexError> { let schema = Arc::new(slf.get().dtype().to_arrow_schema()?); let data_type = DataType::Struct(schema.fields().clone()); @@ -132,7 +133,7 @@ impl PyArrayIterator { schema, )); - record_batch_reader.into_pyarrow(slf.py()) + Ok(record_batch_reader.into_pyarrow(slf.py())?) } /// Create a :class:`vortex.ArrayIterator` from an iterator of :class:`vortex.Array`. diff --git a/vortex-python/src/iter/python.rs b/vortex-python/src/iter/python.rs index 194bb2c7a5b..2c946f70367 100644 --- a/vortex-python/src/iter/python.rs +++ b/vortex-python/src/iter/python.rs @@ -9,6 +9,7 @@ use vortex::array::ArrayRef; use vortex::array::iter::ArrayIterator; use vortex::dtype::DType; use vortex::error::VortexResult; +use vortex::error::vortex_err; use crate::arrays::PyArrayRef; @@ -51,7 +52,7 @@ impl Iterator for PythonArrayIterator { Ok(array) } }) - .map_err(|pyerr| pyerr.into()) + .map_err(|pyerr| vortex_err!("{}", pyerr)) }) }) } diff --git a/vortex-python/src/lib.rs b/vortex-python/src/lib.rs index 48f0f59f083..e47814be016 100644 --- a/vortex-python/src/lib.rs +++ b/vortex-python/src/lib.rs @@ -12,6 +12,7 @@ mod arrow; mod compress; mod dataset; pub(crate) mod dtype; +mod error; mod expr; mod file; mod io; diff --git a/vortex-python/src/registry.rs b/vortex-python/src/registry.rs index 3e9edebfddd..d93fb79fa86 100644 --- a/vortex-python/src/registry.rs +++ b/vortex-python/src/registry.rs @@ -10,6 +10,7 @@ use vortex::array::session::ArraySessionExt; use crate::SESSION; use crate::arrays::py::PythonVTable; use crate::arrays::py::id_from_obj; +use crate::error::PyVortexError; use crate::install_module; /// Register serde functions and classes. @@ -27,7 +28,7 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { /// /// It's not currently possible to register a layout encoding from Python. #[pyfunction] -pub(crate) fn register(cls: &Bound) -> PyResult<()> { +pub(crate) fn register(cls: &Bound) -> Result<(), PyVortexError> { let id = id_from_obj(cls)?; // TODO(ngates): we would need to register the Python class object in a PyVortexSession // to call back into it during deserialize operations. diff --git a/vortex-python/src/scalar/factory.rs b/vortex-python/src/scalar/factory.rs index db7176fc0de..f590a56f0c4 100644 --- a/vortex-python/src/scalar/factory.rs +++ b/vortex-python/src/scalar/factory.rs @@ -21,6 +21,7 @@ use vortex::dtype::StructFields; use vortex::scalar::Scalar; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::scalar::PyScalar; use crate::scalar::bool; @@ -38,7 +39,10 @@ pub fn scalar<'py>( ) } -pub fn scalar_helper(value: &Bound<'_, PyAny>, dtype: Option<&DType>) -> PyResult { +pub fn scalar_helper( + value: &Bound<'_, PyAny>, + dtype: Option<&DType>, +) -> Result { let scalar = scalar_helper_inner(value, dtype)?; // If a dtype was provided, attempt to cast the scalar to that dtype. diff --git a/vortex-python/src/scalar/struct_.rs b/vortex-python/src/scalar/struct_.rs index 64924361753..0197de9d2f9 100644 --- a/vortex-python/src/scalar/struct_.rs +++ b/vortex-python/src/scalar/struct_.rs @@ -5,13 +5,13 @@ use pyo3::IntoPyObject; use pyo3::Py; use pyo3::PyAny; use pyo3::PyRef; -use pyo3::PyResult; use pyo3::pyclass; use pyo3::pymethods; use vortex::error::vortex_err; use vortex::scalar::StructScalar; use crate::PyVortex; +use crate::error::PyVortexError; use crate::scalar::AsScalarRef; use crate::scalar::PyScalar; use crate::scalar::ScalarSubclass; @@ -27,11 +27,13 @@ impl ScalarSubclass for PyStructScalar { #[pymethods] impl PyStructScalar { /// Return the child scalar with the given field name. - pub fn field(self_: PyRef<'_, Self>, name: &str) -> PyResult> { + pub fn field(self_: PyRef<'_, Self>, name: &str) -> Result, PyVortexError> { let scalar = self_.as_scalar_ref(); let child = scalar .field(name) .ok_or_else(|| vortex_err!("No field {name}"))?; - PyVortex(&child).into_pyobject(self_.py()).map(|v| v.into()) + Ok(PyVortex(&child) + .into_pyobject(self_.py()) + .map(|v| v.into())?) } } diff --git a/vortex-python/src/scan.rs b/vortex-python/src/scan.rs index 6ea00bda112..7f97b781b1b 100644 --- a/vortex-python/src/scan.rs +++ b/vortex-python/src/scan.rs @@ -8,6 +8,7 @@ use vortex::array::ArrayRef; use vortex::scan::RepeatedScan; use crate::RUNTIME; +use crate::error::PyVortexError; use crate::install_module; use crate::iter::PyArrayIterator; use crate::scalar::PyScalar; @@ -35,7 +36,7 @@ impl PyRepeatedScan { slf: Bound, start: Option, stop: Option, - ) -> PyResult { + ) -> Result { let row_count = slf.get().row_count; let row_range = match (start, stop) { (Some(start), Some(stop)) => Some(start..stop), @@ -49,13 +50,14 @@ impl PyRepeatedScan { ))) } - fn scalar_at(slf: Bound, index: u64) -> PyResult> { + fn scalar_at(slf: Bound, index: u64) -> Result, PyVortexError> { let row_count = slf.get().row_count; if index >= row_count { return Err(PyIndexError::new_err(format!( "Index out of bounds: {} >= {}", index, row_count - ))); + )) + .into()); } for batch in slf @@ -68,12 +70,9 @@ impl PyRepeatedScan { continue; } let scalar = array.scalar_at(0)?; - return PyScalar::init(slf.py(), scalar); + return Ok(PyScalar::init(slf.py(), scalar)?); } - Err(PyIndexError::new_err(format!( - "Index {} not found in the scan", - index - ))) + Err(PyIndexError::new_err(format!("Index {} not found in the scan", index)).into()) } } diff --git a/vortex-python/src/serde/mod.rs b/vortex-python/src/serde/mod.rs index 32d697e6f4b..51017a31ad0 100644 --- a/vortex-python/src/serde/mod.rs +++ b/vortex-python/src/serde/mod.rs @@ -7,6 +7,7 @@ pub(crate) mod parts; use bytes::Bytes; use pyo3::Bound; use pyo3::Python; +use pyo3::buffer::PyBuffer; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use vortex::array::session::ArraySessionExt; @@ -16,6 +17,7 @@ use vortex::ipc::messages::PollRead; use crate::SESSION; use crate::arrays::PyArrayRef; +use crate::error::PyVortexError; use crate::install_module; use crate::serde::context::PyArrayContext; use crate::serde::parts::PyArrayParts; @@ -51,17 +53,20 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { /// Array /// The decoded Vortex array #[pyfunction] -fn decode_ipc_array(array_bytes: Vec, dtype_bytes: Vec) -> PyResult { +fn decode_ipc_array( + array_bytes: Vec, + dtype_bytes: Vec, +) -> Result { let mut decoder = MessageDecoder::default(); let mut dtype_buf = Bytes::from(dtype_bytes); let dtype = match decoder.read_next(&mut dtype_buf)? { PollRead::Some(DecoderMessage::DType(dtype)) => dtype, PollRead::Some(_) => { - return Err(PyValueError::new_err("Expected DType message")); + return Err(PyValueError::new_err("Expected DType message").into()); } PollRead::NeedMore(_) => { - return Err(PyValueError::new_err("Incomplete DType message")); + return Err(PyValueError::new_err("Incomplete DType message").into()); } }; @@ -71,10 +76,10 @@ fn decode_ipc_array(array_bytes: Vec, dtype_bytes: Vec) -> PyResult { - return Err(PyValueError::new_err("Expected Array message")); + return Err(PyValueError::new_err("Expected Array message").into()); } PollRead::NeedMore(_) => { - return Err(PyValueError::new_err("Incomplete Array message")); + return Err(PyValueError::new_err("Incomplete Array message").into()); } }; @@ -102,9 +107,7 @@ fn decode_ipc_array_buffers<'py>( py: Python<'py>, array_buffers: Vec>, dtype_buffers: Vec>, -) -> PyResult { - use pyo3::buffer::PyBuffer; - +) -> Result { let mut decoder = MessageDecoder::default(); // Concatenate dtype buffers @@ -125,10 +128,10 @@ fn decode_ipc_array_buffers<'py>( let dtype = match decoder.read_next(&mut dtype_buf)? { PollRead::Some(DecoderMessage::DType(dtype)) => dtype, PollRead::Some(_) => { - return Err(PyValueError::new_err("Expected DType message")); + return Err(PyValueError::new_err("Expected DType message").into()); } PollRead::NeedMore(_) => { - return Err(PyValueError::new_err("Incomplete DType message")); + return Err(PyValueError::new_err("Incomplete DType message").into()); } }; @@ -151,10 +154,10 @@ fn decode_ipc_array_buffers<'py>( parts.decode(&dtype, row_count, &ctx, SESSION.arrays().registry())? } PollRead::Some(_) => { - return Err(PyValueError::new_err("Expected Array message")); + return Err(PyValueError::new_err("Expected Array message").into()); } PollRead::NeedMore(_) => { - return Err(PyValueError::new_err("Incomplete Array message")); + return Err(PyValueError::new_err("Incomplete Array message").into()); } }; diff --git a/vortex-python/src/serde/parts.rs b/vortex-python/src/serde/parts.rs index faf9ab29537..dc2a269ef07 100644 --- a/vortex-python/src/serde/parts.rs +++ b/vortex-python/src/serde/parts.rs @@ -6,7 +6,6 @@ use std::ops::Deref; use pyo3::Bound; use pyo3::PyAny; use pyo3::PyRef; -use pyo3::PyResult; use pyo3::Python; use pyo3::prelude::PyAnyMethods; use pyo3::pyclass; @@ -18,6 +17,7 @@ use vortex::buffer::ByteBuffer; use crate::SESSION; use crate::arrays::PyArrayRef; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::serde::context::PyArrayContext; /// ArrayParts is a parsed representation of a serialized array. @@ -44,7 +44,7 @@ impl From for PyArrayParts { impl PyArrayParts { /// Parse a serialized array into its parts. #[staticmethod] - fn parse(data: &[u8]) -> PyResult { + fn parse(data: &[u8]) -> Result { // TODO(ngates): create a buffer from a slice of bytes? let buffer = ByteBuffer::copy_from(data); Ok(PyArrayParts(ArrayParts::try_from(buffer)?)) @@ -55,7 +55,12 @@ impl PyArrayParts { /// # Returns /// /// The decoded array. - fn decode(&self, ctx: &PyArrayContext, dtype: PyDType, len: usize) -> PyResult { + fn decode( + &self, + ctx: &PyArrayContext, + dtype: PyDType, + len: usize, + ) -> Result { Ok(PyArrayRef::from(self.0.decode( dtype.inner(), len, @@ -79,7 +84,10 @@ impl PyArrayParts { /// Return the buffers of the array, currently as :class:`pyarrow.Buffer`. // TODO(ngates): ideally we'd use the buffer protocol, but that requires the 3.11 ABI. #[getter] - fn buffers<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult>> { + fn buffers<'py>( + slf: PyRef<'py, Self>, + py: Python<'py>, + ) -> Result>, PyVortexError> { if slf.nbuffers() == 0 { return Ok(Vec::new()); } diff --git a/vortex/Cargo.toml b/vortex/Cargo.toml index 31f6daf0932..20fff1917f8 100644 --- a/vortex/Cargo.toml +++ b/vortex/Cargo.toml @@ -72,7 +72,6 @@ default = ["files", "zstd"] files = ["dep:vortex-file"] memmap2 = ["vortex-buffer/memmap2"] object_store = ["vortex-file/object_store", "vortex-io/object_store"] -python = ["vortex-error/python"] tokio = ["vortex-file/tokio"] zstd = ["dep:vortex-zstd", "vortex-file/zstd", "vortex-layout/zstd"] pretty = ["vortex-array/table-display"] From 22a773ab1004f1b2a9a499d18133248471067e28 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 22 Jan 2026 11:19:30 +0000 Subject: [PATCH 2/7] Type alias PyVortexResult Signed-off-by: Adam Gutglick --- vortex-python/src/arrays/builtins/struct_.rs | 4 +-- vortex-python/src/arrays/compressed.rs | 4 +-- vortex-python/src/arrays/from_arrow.rs | 6 ++-- vortex-python/src/arrays/mod.rs | 31 +++++++++----------- vortex-python/src/arrays/py/mod.rs | 4 +-- vortex-python/src/arrays/py/python.rs | 4 +-- vortex-python/src/compress.rs | 4 +-- vortex-python/src/dataset.rs | 12 ++++---- vortex-python/src/dtype/factory.rs | 4 +-- vortex-python/src/dtype/mod.rs | 6 ++-- vortex-python/src/error.rs | 3 ++ vortex-python/src/expr/mod.rs | 4 +-- vortex-python/src/file.rs | 18 +++++------- vortex-python/src/io.rs | 8 ++--- vortex-python/src/iter/mod.rs | 8 ++--- vortex-python/src/registry.rs | 4 +-- vortex-python/src/scalar/factory.rs | 7 ++--- vortex-python/src/scalar/struct_.rs | 4 +-- vortex-python/src/scan.rs | 6 ++-- vortex-python/src/serde/mod.rs | 9 ++---- vortex-python/src/serde/parts.rs | 8 ++--- 21 files changed, 74 insertions(+), 84 deletions(-) diff --git a/vortex-python/src/arrays/builtins/struct_.rs b/vortex-python/src/arrays/builtins/struct_.rs index 0098948eb3d..e5866aec3ad 100644 --- a/vortex-python/src/arrays/builtins/struct_.rs +++ b/vortex-python/src/arrays/builtins/struct_.rs @@ -12,7 +12,7 @@ use crate::arrays::PyArrayRef; use crate::arrays::native::AsArrayRef; use crate::arrays::native::EncodingSubclass; use crate::arrays::native::PyNativeArray; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; /// Concrete class for arrays with `vortex.struct` encoding. #[pyclass(name = "StructArray", module = "vortex", extends=PyNativeArray, frozen)] @@ -25,7 +25,7 @@ impl EncodingSubclass for PyStructArray { #[pymethods] impl PyStructArray { /// Returns the given field of the struct array. - pub fn field(self_: PyRef<'_, Self>, name: &str) -> Result { + pub fn field(self_: PyRef<'_, Self>, name: &str) -> PyVortexResult { let field = self_.as_array_ref().field_by_name(name)?.clone(); Ok(PyArrayRef::from(field)) } diff --git a/vortex-python/src/arrays/compressed.rs b/vortex-python/src/arrays/compressed.rs index ba68a56b834..95b4035b8d3 100644 --- a/vortex-python/src/arrays/compressed.rs +++ b/vortex-python/src/arrays/compressed.rs @@ -19,7 +19,7 @@ use crate::PyVortex; use crate::arrays::PyArrayRef; use crate::arrays::native::EncodingSubclass; use crate::arrays::native::PyNativeArray; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; /// Concrete class for arrays with `vortex.alp` encoding. #[pyclass(name = "AlpArray", module = "vortex", extends=PyNativeArray, frozen)] @@ -88,7 +88,7 @@ impl EncodingSubclass for PyZigZagArray { #[pymethods] impl PyZigZagArray { #[staticmethod] - pub fn encode(array: PyArrayRef) -> Result { + pub fn encode(array: PyArrayRef) -> PyVortexResult { Ok(PyVortex( zigzag_encode(array.inner().clone().to_primitive())?.into_array(), )) diff --git a/vortex-python/src/arrays/from_arrow.rs b/vortex-python/src/arrays/from_arrow.rs index 14a57db5940..bee687790e3 100644 --- a/vortex-python/src/arrays/from_arrow.rs +++ b/vortex-python/src/arrays/from_arrow.rs @@ -19,10 +19,10 @@ use vortex::error::VortexError; use crate::arrays::PyArrayRef; use crate::arrow::FromPyArrow; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; /// Convert an Arrow object to a Vortex array. -pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> Result { +pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> PyVortexResult { let pa = obj.py().import("pyarrow")?; let pa_array = pa.getattr("Array")?; let chunked_array = pa.getattr("ChunkedArray")?; @@ -55,7 +55,7 @@ pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> Result Result<_, PyVortexError> { + .map(|b| -> PyVortexResult<_> { Ok(ArrayRef::from_arrow(b.map_err(VortexError::from)?, false)) }) .collect::, _>>()?; diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index b8a248d0dab..d885b381d5b 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -43,6 +43,7 @@ use crate::arrays::py::PythonArray; use crate::arrow::ToPyArrow; use crate::dtype::PyDType; use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::install_module; use crate::python_repr::PythonRepr; use crate::scalar::PyScalar; @@ -214,7 +215,7 @@ impl PyArray { /// ------- /// :class:`~vortex.Array` #[staticmethod] - fn from_arrow(obj: Bound<'_, PyAny>) -> Result { + fn from_arrow(obj: Bound<'_, PyAny>) -> PyVortexResult { from_arrow::from_arrow(&obj.as_borrowed()) } @@ -260,7 +261,7 @@ impl PyArray { fn from_range( range: Bound, dtype: Option>, - ) -> Result { + ) -> PyVortexResult { let range = range.cast::()?; let start = range.start()?; let stop = range.stop()?; @@ -317,9 +318,7 @@ impl PyArray { /// ] /// ``` /// - fn to_arrow_array<'py>( - self_: &'py Bound<'py, Self>, - ) -> Result, PyVortexError> { + fn to_arrow_array<'py>(self_: &'py Bound<'py, Self>) -> PyVortexResult> { // NOTE(ngates): for struct arrays, we could also return a RecordBatchStreamReader. let array = PyArrayRef::extract(self_.as_any().as_borrowed())?.into_inner(); let py = self_.py(); @@ -332,9 +331,7 @@ impl PyArray { let chunks = chunked_array .chunks() .iter() - .map(|chunk| -> Result<_, PyVortexError> { - Ok(chunk.clone().into_arrow(&arrow_dtype)?) - }) + .map(|chunk| -> PyVortexResult<_> { Ok(chunk.clone().into_arrow(&arrow_dtype)?) }) .collect::, _>>()?; let pa_data_type = arrow_dtype.clone().to_pyarrow(py)?; @@ -425,42 +422,42 @@ impl PyArray { } ///Rust docs are *not* copied into Python for __lt__: https://github.com/PyO3/pyo3/issues/4326 - fn __lt__(slf: Bound, other: PyArrayRef) -> Result { + fn __lt__(slf: Bound, other: PyArrayRef) -> PyVortexResult { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&slf, &*other, Operator::Lt)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __le__: https://github.com/PyO3/pyo3/issues/4326 - fn __le__(slf: Bound, other: PyArrayRef) -> Result { + fn __le__(slf: Bound, other: PyArrayRef) -> PyVortexResult { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Lte)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __eq__: https://github.com/PyO3/pyo3/issues/4326 - fn __eq__(slf: Bound, other: PyArrayRef) -> Result { + fn __eq__(slf: Bound, other: PyArrayRef) -> PyVortexResult { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Eq)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __ne__: https://github.com/PyO3/pyo3/issues/4326 - fn __ne__(slf: Bound, other: PyArrayRef) -> Result { + fn __ne__(slf: Bound, other: PyArrayRef) -> PyVortexResult { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::NotEq)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __ge__: https://github.com/PyO3/pyo3/issues/4326 - fn __ge__(slf: Bound, other: PyArrayRef) -> Result { + fn __ge__(slf: Bound, other: PyArrayRef) -> PyVortexResult { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Gte)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __gt__: https://github.com/PyO3/pyo3/issues/4326 - fn __gt__(slf: Bound, other: PyArrayRef) -> Result { + fn __gt__(slf: Bound, other: PyArrayRef) -> PyVortexResult { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Gt)?; Ok(PyArrayRef::from(inner)) @@ -494,7 +491,7 @@ impl PyArray { /// 5 /// ] /// ``` - fn filter(slf: Bound, mask: PyArrayRef) -> Result { + fn filter(slf: Bound, mask: PyArrayRef) -> PyVortexResult { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let mask = (&*mask as &dyn Array).to_bool().to_mask_fill_null_false(); let inner = vortex::compute::filter(&*slf, &mask)?; @@ -619,7 +616,7 @@ impl PyArray { /// "a" /// ] /// ``` - fn take(slf: Bound, indices: PyArrayRef) -> Result { + fn take(slf: Bound, indices: PyArrayRef) -> PyVortexResult { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); if !indices.dtype().is_int() { @@ -677,7 +674,7 @@ impl PyArray { .to_string()) } - fn serialize(slf: &Bound, ctx: &PyArrayContext) -> Result>, PyVortexError> { + fn serialize(slf: &Bound, ctx: &PyArrayContext) -> PyVortexResult>> { // FIXME(ngates): do not copy to vec, use buffer protocol let array = PyArrayRef::extract(slf.as_any().as_borrowed())?; Ok(array diff --git a/vortex-python/src/arrays/py/mod.rs b/vortex-python/src/arrays/py/mod.rs index 0b2f88dd20b..f15ffbaed9b 100644 --- a/vortex-python/src/arrays/py/mod.rs +++ b/vortex-python/src/arrays/py/mod.rs @@ -14,10 +14,10 @@ pub(crate) use python::*; use vortex::array::vtable::ArrayId; pub(crate) use vtable::*; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; /// Extract the array id from a Python class `id` attribute. -pub fn id_from_obj(cls: &Bound) -> Result { +pub fn id_from_obj(cls: &Bound) -> PyVortexResult { Ok(ArrayId::new_arc( cls.getattr("id") .map_err(|_| { diff --git a/vortex-python/src/arrays/py/python.rs b/vortex-python/src/arrays/py/python.rs index c8160255d66..922e09305e1 100644 --- a/vortex-python/src/arrays/py/python.rs +++ b/vortex-python/src/arrays/py/python.rs @@ -10,7 +10,7 @@ use vortex::dtype::DType; use crate::arrays::PyArray; use crate::arrays::py::id_from_obj; use crate::dtype::PyDType; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; /// Base class for implementing a Vortex encoding in Python. /// @@ -32,7 +32,7 @@ impl PyPythonArray { cls: &Bound<'_, PyType>, len: usize, dtype: PyDType, - ) -> Result, PyVortexError> { + ) -> PyVortexResult> { let id = id_from_obj(cls)?; Ok(PyClassInitializer::from(PyArray).add_subclass(Self { id, diff --git a/vortex-python/src/compress.rs b/vortex-python/src/compress.rs index 5d2c3bd93c9..ef63426acae 100644 --- a/vortex-python/src/compress.rs +++ b/vortex-python/src/compress.rs @@ -5,7 +5,7 @@ use pyo3::prelude::*; use vortex::compressor::BtrBlocksCompressor; use crate::arrays::PyArrayRef; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::install_module; pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { @@ -50,7 +50,7 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { /// >>> str(vx.compress(a)) /// 'vortex.alp(f64?, len=1000)' #[pyfunction] -pub fn compress(array: PyArrayRef) -> Result { +pub fn compress(array: PyArrayRef) -> PyVortexResult { let compressed = BtrBlocksCompressor::default().compress(array.inner())?; Ok(PyArrayRef::from(compressed)) } diff --git a/vortex-python/src/dataset.rs b/vortex-python/src/dataset.rs index b8f769974b8..4608edd6c64 100644 --- a/vortex-python/src/dataset.rs +++ b/vortex-python/src/dataset.rs @@ -29,7 +29,7 @@ use crate::TOKIO_RUNTIME; use crate::arrays::PyArrayRef; use crate::arrow::IntoPyArrow; use crate::arrow::ToPyArrow; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::expr::PyExpr; use crate::install_module; use crate::object_store_urls::object_store_from_url; @@ -134,7 +134,7 @@ impl PyVortexDataset { row_filter: Option<&Bound<'py, PyExpr>>, indices: Option, row_range: Option<(u64, u64)>, - ) -> Result { + ) -> PyVortexResult { let array = read_array_from_reader( &self.vxf, projection_from_python(columns)?, @@ -152,7 +152,7 @@ impl PyVortexDataset { row_filter: Option<&Bound<'_, PyExpr>>, split_by: Option, row_range: Option<(u64, u64)>, - ) -> Result, PyVortexError> { + ) -> PyVortexResult> { let mut scan = self_ .vxf .scan()? @@ -178,7 +178,7 @@ impl PyVortexDataset { row_filter: Option<&Bound<'_, PyExpr>>, split_by: Option, row_range: Option<(u64, u64)>, - ) -> Result { + ) -> PyVortexResult { if row_filter.is_none() { let row_count = match row_range { Some(range) => range.1 - range.0, @@ -210,7 +210,7 @@ impl PyVortexDataset { /// The natural splits of this Dataset. #[pyo3(signature = (*))] - pub fn splits(&self) -> Result, PyVortexError> { + pub fn splits(&self) -> PyVortexResult> { Ok(self .vxf .splits()? @@ -221,6 +221,6 @@ impl PyVortexDataset { } #[pyfunction] -pub fn dataset_from_url(py: Python, url: &str) -> Result { +pub fn dataset_from_url(py: Python, url: &str) -> PyVortexResult { Ok(py.detach(|| TOKIO_RUNTIME.block_on(PyVortexDataset::from_url(url)))?) } diff --git a/vortex-python/src/dtype/factory.rs b/vortex-python/src/dtype/factory.rs index 200ac642c4a..882d54a8aba 100644 --- a/vortex-python/src/dtype/factory.rs +++ b/vortex-python/src/dtype/factory.rs @@ -21,7 +21,7 @@ use vortex::dtype::PType; use vortex::dtype::StructFields; use crate::dtype::PyDType; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; /// Construct the data type for a column containing only the null value. /// @@ -261,7 +261,7 @@ pub(super) fn dtype_decimal( precision: u8, scale: i8, nullable: bool, -) -> Result, PyVortexError> { +) -> PyVortexResult> { let decimal_type = DType::Decimal(DecimalDType::try_new(precision, scale)?, nullable.into()); Ok(PyDType::init(py, decimal_type)?) } diff --git a/vortex-python/src/dtype/mod.rs b/vortex-python/src/dtype/mod.rs index 1d3e42f63b0..0b2e6b930de 100644 --- a/vortex-python/src/dtype/mod.rs +++ b/vortex-python/src/dtype/mod.rs @@ -47,7 +47,7 @@ use crate::dtype::null::PyNullDType; use crate::dtype::primitive::PyPrimitiveDType; use crate::dtype::struct_::PyStructDType; use crate::dtype::utf8::PyUtf8DType; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::install_module; use crate::python_repr::PythonRepr; @@ -154,11 +154,11 @@ impl PyDType { #[pymethods] impl PyDType { - fn to_arrow_type(&self, py: Python) -> Result, PyVortexError> { + fn to_arrow_type(&self, py: Python) -> PyVortexResult> { Ok(self.0.to_arrow_dtype()?.to_pyarrow(py)?) } - fn to_arrow_schema(&self, py: Python) -> Result, PyVortexError> { + fn to_arrow_schema(&self, py: Python) -> PyVortexResult> { Ok(self.0.to_arrow_schema()?.to_pyarrow(py)?) } diff --git a/vortex-python/src/error.rs b/vortex-python/src/error.rs index ff283472e8f..d419892d047 100644 --- a/vortex-python/src/error.rs +++ b/vortex-python/src/error.rs @@ -12,6 +12,9 @@ pub enum PyVortexError { Vortex(VortexError), } +/// A [`Result`] alias where the error is [`PyVortexError`]. +pub type PyVortexResult = Result; + impl From for PyVortexError { fn from(value: PyErr) -> Self { Self::Py(value) diff --git a/vortex-python/src/expr/mod.rs b/vortex-python/src/expr/mod.rs index cf47f260844..699abe2aef1 100644 --- a/vortex-python/src/expr/mod.rs +++ b/vortex-python/src/expr/mod.rs @@ -22,7 +22,7 @@ use vortex::expr::not; use crate::arrays::PyArrayRef; use crate::arrays::into_array::PyIntoArray; use crate::dtype::PyDType; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::install_module; use crate::scalar::factory::scalar_helper; @@ -214,7 +214,7 @@ impl PyExpr { /// vortex.open : Open an on-disk Vortex array for scanning with an expression. /// vortex.VortexFile : An on-disk Vortex array ready to scan with an expression. /// vortex.VortexFile.scan : Scan an on-disk Vortex array with an expression. - fn evaluate(self_: PyRef<'_, Self>, array: PyIntoArray) -> Result { + fn evaluate(self_: PyRef<'_, Self>, array: PyIntoArray) -> PyVortexResult { Ok(PyArrayRef::from(self_.evaluate(array.inner())?)) } } diff --git a/vortex-python/src/file.rs b/vortex-python/src/file.rs index 3cefde9f0d0..4ba481bc3e0 100644 --- a/vortex-python/src/file.rs +++ b/vortex-python/src/file.rs @@ -31,7 +31,7 @@ use crate::arrays::PyArrayRef; use crate::arrow::IntoPyArrow; use crate::dataset::PyVortexDataset; use crate::dtype::PyDType; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::expr::PyExpr; use crate::install_module; use crate::iter::PyArrayIterator; @@ -50,11 +50,7 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { #[pyfunction] #[pyo3(signature = (path, *, without_segment_cache = false))] -pub fn open( - py: Python, - path: &str, - without_segment_cache: bool, -) -> Result { +pub fn open(py: Python, path: &str, without_segment_cache: bool) -> PyVortexResult { let vxf = py.detach(|| { TOKIO_RUNTIME.block_on(async move { let mut options = SESSION.open_options(); @@ -94,7 +90,7 @@ impl PyVortexFile { expr: Option, indices: Option, batch_size: Option, - ) -> Result { + ) -> PyVortexResult { let builder = slf.get().scan_builder( projection.map(|p| p.0), expr.map(|e| e.into_inner()), @@ -114,7 +110,7 @@ impl PyVortexFile { expr: Option, indices: Option, batch_size: Option, - ) -> Result { + ) -> PyVortexResult { let builder = slf.get().scan_builder( projection.map(|p| p.0), expr.map(|e| e.into_inner()), @@ -136,7 +132,7 @@ impl PyVortexFile { projection: Option, expr: Option, batch_size: Option, - ) -> Result, PyVortexError> { + ) -> PyVortexResult> { let vxf = slf.get().vxf.clone(); let reader = slf.py().detach(|| { @@ -157,12 +153,12 @@ impl PyVortexFile { Ok(rbr.into_pyarrow(slf.py())?) } - fn to_dataset(slf: Bound) -> Result { + fn to_dataset(slf: Bound) -> PyVortexResult { Ok(PyVortexDataset::try_new(slf.get().vxf.clone())?) } #[pyo3(signature = (*))] - pub fn splits(&self) -> Result, PyVortexError> { + pub fn splits(&self) -> PyVortexResult> { Ok(self .vxf .splits()? diff --git a/vortex-python/src/io.rs b/vortex-python/src/io.rs index 4f42504cb0b..8dbadc2db17 100644 --- a/vortex-python/src/io.rs +++ b/vortex-python/src/io.rs @@ -29,7 +29,7 @@ use crate::arrays::PyArray; use crate::arrays::PyArrayRef; use crate::arrow::FromPyArrow; use crate::dataset::PyVortexDataset; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::expr::PyExpr; use crate::install_module; use crate::iter::PyArrayIterator; @@ -112,7 +112,7 @@ pub fn read_url<'py>( row_filter: Option<&Bound<'py, PyExpr>>, indices: Option, row_range: Option<(u64, u64)>, -) -> Result { +) -> PyVortexResult { let dataset = py.detach(|| TOKIO_RUNTIME.block_on(PyVortexDataset::from_url(url)))?; dataset.to_array(projection, row_filter, indices, row_range) } @@ -169,7 +169,7 @@ pub fn read_url<'py>( /// :func:`vortex.io.VortexWriteOptions` #[pyfunction] #[pyo3(signature = (iter, path))] -pub fn write(py: Python, iter: PyIntoArrayIterator, path: &str) -> Result<(), PyVortexError> { +pub fn write(py: Python, iter: PyIntoArrayIterator, path: &str) -> PyVortexResult<()> { py.detach(|| { TOKIO_RUNTIME.block_on(async move { let file = File::create(path).await?; @@ -288,7 +288,7 @@ impl PyVortexWriteOptions { py: Python, iter: PyIntoArrayIterator, path: &str, - ) -> Result<(), PyVortexError> { + ) -> PyVortexResult<()> { py.detach(|| { TOKIO_RUNTIME.block_on(async move { let file = File::create(path).await?; diff --git a/vortex-python/src/iter/mod.rs b/vortex-python/src/iter/mod.rs index 23a108114e6..c878ae33e93 100644 --- a/vortex-python/src/iter/mod.rs +++ b/vortex-python/src/iter/mod.rs @@ -29,7 +29,7 @@ use vortex::dtype::DType; use crate::arrays::PyArrayRef; use crate::arrow::IntoPyArrow; use crate::dtype::PyDType; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::install_module; use crate::iter::python::PythonArrayIterator; @@ -82,7 +82,7 @@ impl PyArrayIterator { } /// Returns the next chunk from the iterator. - fn __next__(&self, py: Python) -> Result, PyVortexError> { + fn __next__(&self, py: Python) -> PyVortexResult> { py.detach(|| { Ok(self .iter @@ -96,7 +96,7 @@ impl PyArrayIterator { /// Read all chunks into a single :class:`vortex.Array`. If there are multiple chunks, /// this will be a :class:`vortex.ChunkedArray`, otherwise it will be a single array. - fn read_all(&self, py: Python) -> Result { + fn read_all(&self, py: Python) -> PyVortexResult { let array = py.detach(|| { if let Some(iter) = self.iter.lock().take() { iter.read_all() @@ -111,7 +111,7 @@ impl PyArrayIterator { /// Convert the :class:`vortex.ArrayIterator` into a :class:`pyarrow.RecordBatchReader`. /// /// Note that this performs the conversion on the current thread. - fn to_arrow(slf: Bound) -> Result, PyVortexError> { + fn to_arrow(slf: Bound) -> PyVortexResult> { let schema = Arc::new(slf.get().dtype().to_arrow_schema()?); let data_type = DataType::Struct(schema.fields().clone()); diff --git a/vortex-python/src/registry.rs b/vortex-python/src/registry.rs index d93fb79fa86..fe5d5933524 100644 --- a/vortex-python/src/registry.rs +++ b/vortex-python/src/registry.rs @@ -10,7 +10,7 @@ use vortex::array::session::ArraySessionExt; use crate::SESSION; use crate::arrays::py::PythonVTable; use crate::arrays::py::id_from_obj; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::install_module; /// Register serde functions and classes. @@ -28,7 +28,7 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { /// /// It's not currently possible to register a layout encoding from Python. #[pyfunction] -pub(crate) fn register(cls: &Bound) -> Result<(), PyVortexError> { +pub(crate) fn register(cls: &Bound) -> PyVortexResult<()> { let id = id_from_obj(cls)?; // TODO(ngates): we would need to register the Python class object in a PyVortexSession // to call back into it during deserialize operations. diff --git a/vortex-python/src/scalar/factory.rs b/vortex-python/src/scalar/factory.rs index f590a56f0c4..cbd65c3c726 100644 --- a/vortex-python/src/scalar/factory.rs +++ b/vortex-python/src/scalar/factory.rs @@ -21,7 +21,7 @@ use vortex::dtype::StructFields; use vortex::scalar::Scalar; use crate::dtype::PyDType; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::scalar::PyScalar; use crate::scalar::bool; @@ -39,10 +39,7 @@ pub fn scalar<'py>( ) } -pub fn scalar_helper( - value: &Bound<'_, PyAny>, - dtype: Option<&DType>, -) -> Result { +pub fn scalar_helper(value: &Bound<'_, PyAny>, dtype: Option<&DType>) -> PyVortexResult { let scalar = scalar_helper_inner(value, dtype)?; // If a dtype was provided, attempt to cast the scalar to that dtype. diff --git a/vortex-python/src/scalar/struct_.rs b/vortex-python/src/scalar/struct_.rs index 0197de9d2f9..bdcfd65ecb9 100644 --- a/vortex-python/src/scalar/struct_.rs +++ b/vortex-python/src/scalar/struct_.rs @@ -11,7 +11,7 @@ use vortex::error::vortex_err; use vortex::scalar::StructScalar; use crate::PyVortex; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::scalar::AsScalarRef; use crate::scalar::PyScalar; use crate::scalar::ScalarSubclass; @@ -27,7 +27,7 @@ impl ScalarSubclass for PyStructScalar { #[pymethods] impl PyStructScalar { /// Return the child scalar with the given field name. - pub fn field(self_: PyRef<'_, Self>, name: &str) -> Result, PyVortexError> { + pub fn field(self_: PyRef<'_, Self>, name: &str) -> PyVortexResult> { let scalar = self_.as_scalar_ref(); let child = scalar .field(name) diff --git a/vortex-python/src/scan.rs b/vortex-python/src/scan.rs index 7f97b781b1b..a3bb84f74c1 100644 --- a/vortex-python/src/scan.rs +++ b/vortex-python/src/scan.rs @@ -8,7 +8,7 @@ use vortex::array::ArrayRef; use vortex::scan::RepeatedScan; use crate::RUNTIME; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::install_module; use crate::iter::PyArrayIterator; use crate::scalar::PyScalar; @@ -36,7 +36,7 @@ impl PyRepeatedScan { slf: Bound, start: Option, stop: Option, - ) -> Result { + ) -> PyVortexResult { let row_count = slf.get().row_count; let row_range = match (start, stop) { (Some(start), Some(stop)) => Some(start..stop), @@ -50,7 +50,7 @@ impl PyRepeatedScan { ))) } - fn scalar_at(slf: Bound, index: u64) -> Result, PyVortexError> { + fn scalar_at(slf: Bound, index: u64) -> PyVortexResult> { let row_count = slf.get().row_count; if index >= row_count { return Err(PyIndexError::new_err(format!( diff --git a/vortex-python/src/serde/mod.rs b/vortex-python/src/serde/mod.rs index 51017a31ad0..2b992596845 100644 --- a/vortex-python/src/serde/mod.rs +++ b/vortex-python/src/serde/mod.rs @@ -17,7 +17,7 @@ use vortex::ipc::messages::PollRead; use crate::SESSION; use crate::arrays::PyArrayRef; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::install_module; use crate::serde::context::PyArrayContext; use crate::serde::parts::PyArrayParts; @@ -53,10 +53,7 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { /// Array /// The decoded Vortex array #[pyfunction] -fn decode_ipc_array( - array_bytes: Vec, - dtype_bytes: Vec, -) -> Result { +fn decode_ipc_array(array_bytes: Vec, dtype_bytes: Vec) -> PyVortexResult { let mut decoder = MessageDecoder::default(); let mut dtype_buf = Bytes::from(dtype_bytes); @@ -107,7 +104,7 @@ fn decode_ipc_array_buffers<'py>( py: Python<'py>, array_buffers: Vec>, dtype_buffers: Vec>, -) -> Result { +) -> PyVortexResult { let mut decoder = MessageDecoder::default(); // Concatenate dtype buffers diff --git a/vortex-python/src/serde/parts.rs b/vortex-python/src/serde/parts.rs index dc2a269ef07..ce2dc501fe1 100644 --- a/vortex-python/src/serde/parts.rs +++ b/vortex-python/src/serde/parts.rs @@ -17,7 +17,7 @@ use vortex::buffer::ByteBuffer; use crate::SESSION; use crate::arrays::PyArrayRef; use crate::dtype::PyDType; -use crate::error::PyVortexError; +use crate::error::PyVortexResult; use crate::serde::context::PyArrayContext; /// ArrayParts is a parsed representation of a serialized array. @@ -44,7 +44,7 @@ impl From for PyArrayParts { impl PyArrayParts { /// Parse a serialized array into its parts. #[staticmethod] - fn parse(data: &[u8]) -> Result { + fn parse(data: &[u8]) -> PyVortexResult { // TODO(ngates): create a buffer from a slice of bytes? let buffer = ByteBuffer::copy_from(data); Ok(PyArrayParts(ArrayParts::try_from(buffer)?)) @@ -60,7 +60,7 @@ impl PyArrayParts { ctx: &PyArrayContext, dtype: PyDType, len: usize, - ) -> Result { + ) -> PyVortexResult { Ok(PyArrayRef::from(self.0.decode( dtype.inner(), len, @@ -87,7 +87,7 @@ impl PyArrayParts { fn buffers<'py>( slf: PyRef<'py, Self>, py: Python<'py>, - ) -> Result>, PyVortexError> { + ) -> PyVortexResult>> { if slf.nbuffers() == 0 { return Ok(Vec::new()); } From 535e1b0e1d57fdc0b4b920541625c77bd24633dd Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 22 Jan 2026 15:05:18 +0000 Subject: [PATCH 3/7] last fix Signed-off-by: Adam Gutglick --- vortex-python/src/arrays/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index d885b381d5b..99ceae2f9f9 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -568,7 +568,7 @@ impl PyArray { /// OverflowError: can't convert negative int to unsigned /// ``` // TODO(ngates): return a vortex.Scalar - fn scalar_at(slf: Bound, index: usize) -> Result, PyVortexError> { + fn scalar_at(slf: Bound, index: usize) -> PyVortexResult> { let py = slf.py(); let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); Ok(PyScalar::init(py, slf.scalar_at(index)?)?) @@ -690,7 +690,7 @@ impl PyArray { /// the data needed for pickle to reconstruct the array. fn __reduce__<'py>( slf: &'py Bound<'py, Self>, - ) -> Result<(Bound<'py, PyAny>, Bound<'py, PyAny>), PyVortexError> { + ) -> PyVortexResult<(Bound<'py, PyAny>, Bound<'py, PyAny>)> { let py = slf.py(); let array = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); @@ -717,7 +717,7 @@ impl PyArray { fn __reduce_ex__<'py>( slf: &'py Bound<'py, Self>, protocol: i32, - ) -> Result<(Bound<'py, PyAny>, Bound<'py, PyAny>), PyVortexError> { + ) -> PyVortexResult<(Bound<'py, PyAny>, Bound<'py, PyAny>)> { let py = slf.py(); if protocol < 5 { From 23b633c793832d4b44968cb370072db49db7b6c8 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 22 Jan 2026 15:58:47 +0000 Subject: [PATCH 4/7] add some test Signed-off-by: Adam Gutglick --- vortex-python/test/test_array.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vortex-python/test/test_array.py b/vortex-python/test/test_array.py index ddf405ba1a6..08cf6659b59 100644 --- a/vortex-python/test/test_array.py +++ b/vortex-python/test/test_array.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright the Vortex contributors import pyarrow as pa +import pytest import vortex @@ -36,3 +37,9 @@ def test_empty_array(): a = pa.array([], type=pa.uint8()) primitive = vortex.array(a) assert primitive.to_arrow_array().type == pa.uint8() + + +@pytest.mark.xfail(raises=IndexError) +def test_scalar_at_out_of_bounds(): + a = vortex.array([1, 2, 3]) + _s = a.scalar_at(4) From a5bd5eb2f78de022d0e292c87fc1561ab394e005 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 22 Jan 2026 16:12:06 +0000 Subject: [PATCH 5/7] Fix py scalar_at err type Signed-off-by: Adam Gutglick --- vortex-python/src/arrays/mod.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index 99ceae2f9f9..9e0dd62a63c 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -13,6 +13,7 @@ mod range_to_sequence; use arrow_array::Array as ArrowArray; use arrow_array::ArrayRef as ArrowArrayRef; use pyo3::IntoPyObjectExt; +use pyo3::exceptions::PyIndexError; use pyo3::exceptions::PyTypeError; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -571,6 +572,13 @@ impl PyArray { fn scalar_at(slf: Bound, index: usize) -> PyVortexResult> { let py = slf.py(); let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); + if index >= slf.len() { + return Err(PyIndexError::new_err(format!( + "Index {index} out of bounds from 0 to {}", + slf.len() + )) + .into()); + } Ok(PyScalar::init(py, slf.scalar_at(index)?)?) } From 53515ac88c4ad54e131dbbc78f1ba95ecbc40e2f Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 22 Jan 2026 16:18:43 +0000 Subject: [PATCH 6/7] exact match for test Signed-off-by: Adam Gutglick --- vortex-python/test/test_array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vortex-python/test/test_array.py b/vortex-python/test/test_array.py index 08cf6659b59..cd7c9f4f4a8 100644 --- a/vortex-python/test/test_array.py +++ b/vortex-python/test/test_array.py @@ -41,5 +41,5 @@ def test_empty_array(): @pytest.mark.xfail(raises=IndexError) def test_scalar_at_out_of_bounds(): - a = vortex.array([1, 2, 3]) - _s = a.scalar_at(4) + a = vortex.array([10, 42, 999, 1992]) + _s = a.scalar_at(10) From 228d8d407048b794cadcaeea935b20524e4e8ded Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 22 Jan 2026 16:23:53 +0000 Subject: [PATCH 7/7] Fix a typo Signed-off-by: Adam Gutglick --- vortex-python/src/arrays/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index 9e0dd62a63c..7445683710c 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -557,7 +557,7 @@ impl PyArray { /// >>> vx.array([10, 42, 999, 1992]).scalar_at(10) /// Traceback (most recent call last): /// ... - /// ValueError: index 10 out of bounds from 0 to 4 + /// IndexError: Index 10 out of bounds from 0 to 4 /// ``` /// /// Unlike Python, negative indices are not supported: