From e45fdb2a89b17fd295f33165d5beb5061875bdc3 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 19 Aug 2025 13:14:36 +0200
Subject: [PATCH 01/26] Add pyarrow-stubs

---
 python/pyarrow/__init__.pyi                   |  656 ++
 python/pyarrow/__lib_pxi/__init__.pyi         |    0
 python/pyarrow/__lib_pxi/array.pyi            | 4274 +++++++++
 python/pyarrow/__lib_pxi/benchmark.pyi        |    1 +
 python/pyarrow/__lib_pxi/builder.pyi          |   89 +
 python/pyarrow/__lib_pxi/compat.pyi           |    5 +
 python/pyarrow/__lib_pxi/config.pyi           |   41 +
 python/pyarrow/__lib_pxi/device.pyi           |   88 +
 python/pyarrow/__lib_pxi/error.pyi            |   53 +
 python/pyarrow/__lib_pxi/io.pyi               | 1474 ++++
 python/pyarrow/__lib_pxi/ipc.pyi              |  705 ++
 python/pyarrow/__lib_pxi/memory.pyi           |  174 +
 python/pyarrow/__lib_pxi/pandas_shim.pyi      |   51 +
 python/pyarrow/__lib_pxi/scalar.pyi           | 1017 +++
 python/pyarrow/__lib_pxi/table.pyi            | 5617 ++++++++++++
 python/pyarrow/__lib_pxi/tensor.pyi           |  688 ++
 python/pyarrow/__lib_pxi/types.pyi            | 4413 ++++++++++
 python/pyarrow/_azurefs.pyi                   |   74 +
 python/pyarrow/_compute.pyi                   | 1721 ++++
 python/pyarrow/_csv.pyi                       |  641 ++
 python/pyarrow/_cuda.pyi                      |  556 ++
 python/pyarrow/_dataset.pyi                   | 2301 +++++
 python/pyarrow/_dataset_orc.pyi               |    6 +
 python/pyarrow/_dataset_parquet.pyi           |  314 +
 .../pyarrow/_dataset_parquet_encryption.pyi   |   85 +
 python/pyarrow/_feather.pyi                   |   29 +
 python/pyarrow/_flight.pyi                    | 1380 +++
 python/pyarrow/_fs.pyi                        | 1005 +++
 python/pyarrow/_gcsfs.pyi                     |   83 +
 python/pyarrow/_hdfs.pyi                      |   75 +
 python/pyarrow/_json.pyi                      |  169 +
 python/pyarrow/_orc.pyi                       |   56 +
 python/pyarrow/_parquet.pyi                   |  445 +
 python/pyarrow/_parquet_encryption.pyi        |   67 +
 python/pyarrow/_s3fs.pyi                      |   74 +
 python/pyarrow/_stubs_typing.pyi              |   80 +
 python/pyarrow/_substrait.pyi                 |   39 +
 python/pyarrow/acero.pyi                      |   85 +
 python/pyarrow/benchmark.pyi                  |    3 +
 python/pyarrow/cffi.pyi                       |    4 +
 python/pyarrow/compute.pyi                    | 7779 +++++++++++++++++
 python/pyarrow/csv.pyi                        |   27 +
 python/pyarrow/cuda.pyi                       |   25 +
 python/pyarrow/dataset.pyi                    |  229 +
 python/pyarrow/feather.pyi                    |   50 +
 python/pyarrow/flight.pyi                     |   95 +
 python/pyarrow/fs.pyi                         |   77 +
 python/pyarrow/gandiva.pyi                    |   65 +
 python/pyarrow/interchange/__init__.pyi       |    0
 python/pyarrow/interchange/buffer.pyi         |   58 +
 python/pyarrow/interchange/column.pyi         |  252 +
 python/pyarrow/interchange/dataframe.pyi      |  102 +
 python/pyarrow/interchange/from_dataframe.pyi |  244 +
 python/pyarrow/ipc.pyi                        |  123 +
 python/pyarrow/json.pyi                       |    3 +
 python/pyarrow/lib.pyi                        |  106 +
 python/pyarrow/orc.pyi                        |  279 +
 python/pyarrow/pandas_compat.pyi              |   54 +
 python/pyarrow/parquet/__init__.pyi           |    1 +
 python/pyarrow/parquet/core.pyi               | 2061 +++++
 python/pyarrow/parquet/encryption.pyi         |   15 +
 python/pyarrow/substrait.pyi                  |   21 +
 python/pyarrow/types.pyi                      |  194 +
 python/pyarrow/util.pyi                       |   27 +
 64 files changed, 40525 insertions(+)
 create mode 100644 python/pyarrow/__init__.pyi
 create mode 100644 python/pyarrow/__lib_pxi/__init__.pyi
 create mode 100644 python/pyarrow/__lib_pxi/array.pyi
 create mode 100644 python/pyarrow/__lib_pxi/benchmark.pyi
 create mode 100644 python/pyarrow/__lib_pxi/builder.pyi
 create mode 100644 python/pyarrow/__lib_pxi/compat.pyi
 create mode 100644 python/pyarrow/__lib_pxi/config.pyi
 create mode 100644 python/pyarrow/__lib_pxi/device.pyi
 create mode 100644 python/pyarrow/__lib_pxi/error.pyi
 create mode 100644 python/pyarrow/__lib_pxi/io.pyi
 create mode 100644 python/pyarrow/__lib_pxi/ipc.pyi
 create mode 100644 python/pyarrow/__lib_pxi/memory.pyi
 create mode 100644 python/pyarrow/__lib_pxi/pandas_shim.pyi
 create mode 100644 python/pyarrow/__lib_pxi/scalar.pyi
 create mode 100644 python/pyarrow/__lib_pxi/table.pyi
 create mode 100644 python/pyarrow/__lib_pxi/tensor.pyi
 create mode 100644 python/pyarrow/__lib_pxi/types.pyi
 create mode 100644 python/pyarrow/_azurefs.pyi
 create mode 100644 python/pyarrow/_compute.pyi
 create mode 100644 python/pyarrow/_csv.pyi
 create mode 100644 python/pyarrow/_cuda.pyi
 create mode 100644 python/pyarrow/_dataset.pyi
 create mode 100644 python/pyarrow/_dataset_orc.pyi
 create mode 100644 python/pyarrow/_dataset_parquet.pyi
 create mode 100644 python/pyarrow/_dataset_parquet_encryption.pyi
 create mode 100644 python/pyarrow/_feather.pyi
 create mode 100644 python/pyarrow/_flight.pyi
 create mode 100644 python/pyarrow/_fs.pyi
 create mode 100644 python/pyarrow/_gcsfs.pyi
 create mode 100644 python/pyarrow/_hdfs.pyi
 create mode 100644 python/pyarrow/_json.pyi
 create mode 100644 python/pyarrow/_orc.pyi
 create mode 100644 python/pyarrow/_parquet.pyi
 create mode 100644 python/pyarrow/_parquet_encryption.pyi
 create mode 100644 python/pyarrow/_s3fs.pyi
 create mode 100644 python/pyarrow/_stubs_typing.pyi
 create mode 100644 python/pyarrow/_substrait.pyi
 create mode 100644 python/pyarrow/acero.pyi
 create mode 100644 python/pyarrow/benchmark.pyi
 create mode 100644 python/pyarrow/cffi.pyi
 create mode 100644 python/pyarrow/compute.pyi
 create mode 100644 python/pyarrow/csv.pyi
 create mode 100644 python/pyarrow/cuda.pyi
 create mode 100644 python/pyarrow/dataset.pyi
 create mode 100644 python/pyarrow/feather.pyi
 create mode 100644 python/pyarrow/flight.pyi
 create mode 100644 python/pyarrow/fs.pyi
 create mode 100644 python/pyarrow/gandiva.pyi
 create mode 100644 python/pyarrow/interchange/__init__.pyi
 create mode 100644 python/pyarrow/interchange/buffer.pyi
 create mode 100644 python/pyarrow/interchange/column.pyi
 create mode 100644 python/pyarrow/interchange/dataframe.pyi
 create mode 100644 python/pyarrow/interchange/from_dataframe.pyi
 create mode 100644 python/pyarrow/ipc.pyi
 create mode 100644 python/pyarrow/json.pyi
 create mode 100644 python/pyarrow/lib.pyi
 create mode 100644 python/pyarrow/orc.pyi
 create mode 100644 python/pyarrow/pandas_compat.pyi
 create mode 100644 python/pyarrow/parquet/__init__.pyi
 create mode 100644 python/pyarrow/parquet/core.pyi
 create mode 100644 python/pyarrow/parquet/encryption.pyi
 create mode 100644 python/pyarrow/substrait.pyi
 create mode 100644 python/pyarrow/types.pyi
 create mode 100644 python/pyarrow/util.pyi

diff --git a/python/pyarrow/__init__.pyi b/python/pyarrow/__init__.pyi
new file mode 100644
index 00000000000..8a0d1e870c5
--- /dev/null
+++ b/python/pyarrow/__init__.pyi
@@ -0,0 +1,656 @@
+# ruff: noqa: F401, I001, E402
+__version__: str
+
+import pyarrow.lib as _lib
+
+_gc_enabled: bool
+
+from pyarrow.lib import (
+    BuildInfo,
+    RuntimeInfo,
+    set_timezone_db_path,
+    MonthDayNano,
+    VersionInfo,
+    cpp_build_info,
+    cpp_version,
+    cpp_version_info,
+    runtime_info,
+    cpu_count,
+    set_cpu_count,
+    enable_signal_handlers,
+    io_thread_count,
+    set_io_thread_count,
+)
+
+def show_versions() -> None: ...
+def show_info() -> None: ...
+def _module_is_available(module: str) -> bool: ...
+def _filesystem_is_available(fs: str) -> bool: ...
+
+from pyarrow.lib import (
+    null,
+    bool_,
+    int8,
+    int16,
+    int32,
+    int64,
+    uint8,
+    uint16,
+    uint32,
+    uint64,
+    time32,
+    time64,
+    timestamp,
+    date32,
+    date64,
+    duration,
+    month_day_nano_interval,
+    float16,
+    float32,
+    float64,
+    binary,
+    string,
+    utf8,
+    binary_view,
+    string_view,
+    large_binary,
+    large_string,
+    large_utf8,
+    decimal32,
+    decimal64,
+    decimal128,
+    decimal256,
+    list_,
+    large_list,
+    list_view,
+    large_list_view,
+    map_,
+    struct,
+    union,
+    sparse_union,
+    dense_union,
+    dictionary,
+    run_end_encoded,
+    json_,
+    uuid,
+    fixed_shape_tensor,
+    bool8,
+    opaque,
+    field,
+    type_for_alias,
+    DataType,
+    DictionaryType,
+    StructType,
+    ListType,
+    LargeListType,
+    FixedSizeListType,
+    ListViewType,
+    LargeListViewType,
+    MapType,
+    UnionType,
+    SparseUnionType,
+    DenseUnionType,
+    TimestampType,
+    Time32Type,
+    Time64Type,
+    DurationType,
+    FixedSizeBinaryType,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    BaseExtensionType,
+    ExtensionType,
+    RunEndEncodedType,
+    FixedShapeTensorType,
+    Bool8Type,
+    UuidType,
+    JsonType,
+    OpaqueType,
+    PyExtensionType,
+    UnknownExtensionType,
+    register_extension_type,
+    unregister_extension_type,
+    DictionaryMemo,
+    KeyValueMetadata,
+    Field,
+    Schema,
+    schema,
+    unify_schemas,
+    Array,
+    Tensor,
+    array,
+    chunked_array,
+    record_batch,
+    nulls,
+    repeat,
+    SparseCOOTensor,
+    SparseCSRMatrix,
+    SparseCSCMatrix,
+    SparseCSFTensor,
+    infer_type,
+    from_numpy_dtype,
+    NullArray,
+    NumericArray,
+    IntegerArray,
+    FloatingPointArray,
+    BooleanArray,
+    Int8Array,
+    UInt8Array,
+    Int16Array,
+    UInt16Array,
+    Int32Array,
+    UInt32Array,
+    Int64Array,
+    UInt64Array,
+    HalfFloatArray,
+    FloatArray,
+    DoubleArray,
+    ListArray,
+    LargeListArray,
+    FixedSizeListArray,
+    ListViewArray,
+    LargeListViewArray,
+    MapArray,
+    UnionArray,
+    BinaryArray,
+    StringArray,
+    LargeBinaryArray,
+    LargeStringArray,
+    BinaryViewArray,
+    StringViewArray,
+    FixedSizeBinaryArray,
+    DictionaryArray,
+    Date32Array,
+    Date64Array,
+    TimestampArray,
+    Time32Array,
+    Time64Array,
+    DurationArray,
+    MonthDayNanoIntervalArray,
+    Decimal32Array,
+    Decimal64Array,
+    Decimal128Array,
+    Decimal256Array,
+    StructArray,
+    ExtensionArray,
+    RunEndEncodedArray,
+    FixedShapeTensorArray,
+    Bool8Array,
+    UuidArray,
+    JsonArray,
+    OpaqueArray,
+    scalar,
+    NA,
+    _NULL as NULL,
+    Scalar,
+    NullScalar,
+    BooleanScalar,
+    Int8Scalar,
+    Int16Scalar,
+    Int32Scalar,
+    Int64Scalar,
+    UInt8Scalar,
+    UInt16Scalar,
+    UInt32Scalar,
+    UInt64Scalar,
+    HalfFloatScalar,
+    FloatScalar,
+    DoubleScalar,
+    Decimal32Scalar,
+    Decimal64Scalar,
+    Decimal128Scalar,
+    Decimal256Scalar,
+    ListScalar,
+    LargeListScalar,
+    FixedSizeListScalar,
+    ListViewScalar,
+    LargeListViewScalar,
+    Date32Scalar,
+    Date64Scalar,
+    Time32Scalar,
+    Time64Scalar,
+    TimestampScalar,
+    DurationScalar,
+    MonthDayNanoIntervalScalar,
+    BinaryScalar,
+    LargeBinaryScalar,
+    BinaryViewScalar,
+    StringScalar,
+    LargeStringScalar,
+    StringViewScalar,
+    FixedSizeBinaryScalar,
+    DictionaryScalar,
+    MapScalar,
+    StructScalar,
+    UnionScalar,
+    RunEndEncodedScalar,
+    ExtensionScalar,
+    Bool8Scalar,
+    UuidScalar,
+    JsonScalar,
+    OpaqueScalar,
+)
+
+# Buffers, allocation
+from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager
+
+from pyarrow.lib import (
+    Buffer,
+    ResizableBuffer,
+    foreign_buffer,
+    py_buffer,
+    Codec,
+    compress,
+    decompress,
+    allocate_buffer,
+)
+
+from pyarrow.lib import (
+    MemoryPool,
+    LoggingMemoryPool,
+    ProxyMemoryPool,
+    total_allocated_bytes,
+    set_memory_pool,
+    default_memory_pool,
+    system_memory_pool,
+    jemalloc_memory_pool,
+    mimalloc_memory_pool,
+    logging_memory_pool,
+    proxy_memory_pool,
+    log_memory_allocations,
+    jemalloc_set_decay_ms,
+    supported_memory_backends,
+)
+
+# I/O
+from pyarrow.lib import (
+    NativeFile,
+    PythonFile,
+    BufferedInputStream,
+    BufferedOutputStream,
+    CacheOptions,
+    CompressedInputStream,
+    CompressedOutputStream,
+    TransformInputStream,
+    transcoding_input_stream,
+    FixedSizeBufferWriter,
+    BufferReader,
+    BufferOutputStream,
+    OSFile,
+    MemoryMappedFile,
+    memory_map,
+    create_memory_map,
+    MockOutputStream,
+    input_stream,
+    output_stream,
+    have_libhdfs,
+)
+
+from pyarrow.lib import (
+    ChunkedArray,
+    RecordBatch,
+    Table,
+    table,
+    concat_arrays,
+    concat_tables,
+    TableGroupBy,
+    RecordBatchReader,
+)
+
+# Exceptions
+from pyarrow.lib import (
+    ArrowCancelled,
+    ArrowCapacityError,
+    ArrowException,
+    ArrowKeyError,
+    ArrowIndexError,
+    ArrowInvalid,
+    ArrowIOError,
+    ArrowMemoryError,
+    ArrowNotImplementedError,
+    ArrowTypeError,
+    ArrowSerializationError,
+)
+
+from pyarrow.ipc import serialize_pandas, deserialize_pandas
+import pyarrow.ipc as ipc
+
+import pyarrow.types as types
+
+# ----------------------------------------------------------------------
+# Deprecations
+
+from pyarrow.util import _deprecate_api, _deprecate_class
+
+from pyarrow.ipc import (
+    Message,
+    MessageReader,
+    MetadataVersion,
+    RecordBatchFileReader,
+    RecordBatchFileWriter,
+    RecordBatchStreamReader,
+    RecordBatchStreamWriter,
+)
+
+# ----------------------------------------------------------------------
+# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
+# wheels)
+def get_include() -> str: ...
+def _get_pkg_config_executable() -> str: ...
+def _has_pkg_config(pkgname: str) -> bool: ...
+def _read_pkg_config_variable(pkgname: str, cli_args: list[str]) -> str: ...
+def get_libraries() -> list[str]: ...
+def create_library_symlinks() -> None: ...
+def get_library_dirs() -> list[str]: ...
+
+__all__ = [
+    "__version__",
+    "_lib",
+    "_gc_enabled",
+    "BuildInfo",
+    "RuntimeInfo",
+    "set_timezone_db_path",
+    "MonthDayNano",
+    "VersionInfo",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "cpu_count",
+    "set_cpu_count",
+    "enable_signal_handlers",
+    "io_thread_count",
+    "set_io_thread_count",
+    "show_versions",
+    "show_info",
+    "_module_is_available",
+    "_filesystem_is_available",
+    "null",
+    "bool_",
+    "int8",
+    "int16",
+    "int32",
+    "int64",
+    "uint8",
+    "uint16",
+    "uint32",
+    "uint64",
+    "time32",
+    "time64",
+    "timestamp",
+    "date32",
+    "date64",
+    "duration",
+    "month_day_nano_interval",
+    "float16",
+    "float32",
+    "float64",
+    "binary",
+    "string",
+    "utf8",
+    "binary_view",
+    "string_view",
+    "large_binary",
+    "large_string",
+    "large_utf8",
+    "decimal32",
+    "decimal64",
+    "decimal128",
+    "decimal256",
+    "list_",
+    "large_list",
+    "list_view",
+    "large_list_view",
+    "map_",
+    "struct",
+    "union",
+    "sparse_union",
+    "dense_union",
+    "dictionary",
+    "run_end_encoded",
+    "json_",
+    "uuid",
+    "fixed_shape_tensor",
+    "bool8",
+    "opaque",
+    "field",
+    "type_for_alias",
+    "DataType",
+    "DictionaryType",
+    "StructType",
+    "ListType",
+    "LargeListType",
+    "FixedSizeListType",
+    "ListViewType",
+    "LargeListViewType",
+    "MapType",
+    "UnionType",
+    "SparseUnionType",
+    "DenseUnionType",
+    "TimestampType",
+    "Time32Type",
+    "Time64Type",
+    "DurationType",
+    "FixedSizeBinaryType",
+    "Decimal32Type",
+    "Decimal64Type",
+    "Decimal128Type",
+    "Decimal256Type",
+    "BaseExtensionType",
+    "ExtensionType",
+    "RunEndEncodedType",
+    "FixedShapeTensorType",
+    "Bool8Type",
+    "UuidType",
+    "JsonType",
+    "OpaqueType",
+    "PyExtensionType",
+    "UnknownExtensionType",
+    "register_extension_type",
+    "unregister_extension_type",
+    "DictionaryMemo",
+    "KeyValueMetadata",
+    "Field",
+    "Schema",
+    "schema",
+    "unify_schemas",
+    "Array",
+    "Tensor",
+    "array",
+    "chunked_array",
+    "record_batch",
+    "nulls",
+    "repeat",
+    "SparseCOOTensor",
+    "SparseCSRMatrix",
+    "SparseCSCMatrix",
+    "SparseCSFTensor",
+    "infer_type",
+    "from_numpy_dtype",
+    "NullArray",
+    "NumericArray",
+    "IntegerArray",
+    "FloatingPointArray",
+    "BooleanArray",
+    "Int8Array",
+    "UInt8Array",
+    "Int16Array",
+    "UInt16Array",
+    "Int32Array",
+    "UInt32Array",
+    "Int64Array",
+    "UInt64Array",
+    "HalfFloatArray",
+    "FloatArray",
+    "DoubleArray",
+    "ListArray",
+    "LargeListArray",
+    "FixedSizeListArray",
+    "ListViewArray",
+    "LargeListViewArray",
+    "MapArray",
+    "UnionArray",
+    "BinaryArray",
+    "StringArray",
+    "LargeBinaryArray",
+    "LargeStringArray",
+    "BinaryViewArray",
+    "StringViewArray",
+    "FixedSizeBinaryArray",
+    "DictionaryArray",
+    "Date32Array",
+    "Date64Array",
+    "TimestampArray",
+    "Time32Array",
+    "Time64Array",
+    "DurationArray",
+    "MonthDayNanoIntervalArray",
+    "Decimal32Array",
+    "Decimal64Array",
+    "Decimal128Array",
+    "Decimal256Array",
+    "StructArray",
+    "ExtensionArray",
+    "Bool8Array",
+    "UuidArray",
+    "JsonArray",
+    "OpaqueArray",
+    "RunEndEncodedArray",
+    "FixedShapeTensorArray",
+    "scalar",
+    "NA",
+    "NULL",
+    "Scalar",
+    "NullScalar",
+    "BooleanScalar",
+    "Int8Scalar",
+    "Int16Scalar",
+    "Int32Scalar",
+    "Int64Scalar",
+    "UInt8Scalar",
+    "UInt16Scalar",
+    "UInt32Scalar",
+    "UInt64Scalar",
+    "HalfFloatScalar",
+    "FloatScalar",
+    "DoubleScalar",
+    "Decimal32Scalar",
+    "Decimal64Scalar",
+    "Decimal128Scalar",
+    "Decimal256Scalar",
+    "ListScalar",
+    "LargeListScalar",
+    "FixedSizeListScalar",
+    "ListViewScalar",
+    "LargeListViewScalar",
+    "Date32Scalar",
+    "Date64Scalar",
+    "Time32Scalar",
+    "Time64Scalar",
+    "TimestampScalar",
+    "DurationScalar",
+    "MonthDayNanoIntervalScalar",
+    "BinaryScalar",
+    "LargeBinaryScalar",
+    "BinaryViewScalar",
+    "StringScalar",
+    "LargeStringScalar",
+    "StringViewScalar",
+    "FixedSizeBinaryScalar",
+    "DictionaryScalar",
+    "MapScalar",
+    "StructScalar",
+    "UnionScalar",
+    "RunEndEncodedScalar",
+    "ExtensionScalar",
+    "Bool8Scalar",
+    "UuidScalar",
+    "JsonScalar",
+    "OpaqueScalar",
+    "DeviceAllocationType",
+    "Device",
+    "MemoryManager",
+    "default_cpu_memory_manager",
+    "Buffer",
+    "ResizableBuffer",
+    "foreign_buffer",
+    "py_buffer",
+    "Codec",
+    "compress",
+    "decompress",
+    "allocate_buffer",
+    "MemoryPool",
+    "LoggingMemoryPool",
+    "ProxyMemoryPool",
+    "total_allocated_bytes",
+    "set_memory_pool",
+    "default_memory_pool",
+    "system_memory_pool",
+    "jemalloc_memory_pool",
+    "mimalloc_memory_pool",
+    "logging_memory_pool",
+    "proxy_memory_pool",
+    "log_memory_allocations",
+    "jemalloc_set_decay_ms",
+    "supported_memory_backends",
+    "NativeFile",
+    "PythonFile",
+    "BufferedInputStream",
+    "BufferedOutputStream",
+    "CacheOptions",
+    "CompressedInputStream",
+    "CompressedOutputStream",
+    "TransformInputStream",
+    "transcoding_input_stream",
+    "FixedSizeBufferWriter",
+    "BufferReader",
+    "BufferOutputStream",
+    "OSFile",
+    "MemoryMappedFile",
+    "memory_map",
+    "create_memory_map",
+    "MockOutputStream",
+    "input_stream",
+    "output_stream",
+    "have_libhdfs",
+    "ChunkedArray",
+    "RecordBatch",
+    "Table",
+    "table",
+    "concat_arrays",
+    "concat_tables",
+    "TableGroupBy",
+    "RecordBatchReader",
+    "ArrowCancelled",
+    "ArrowCapacityError",
+    "ArrowException",
+    "ArrowKeyError",
+    "ArrowIndexError",
+    "ArrowInvalid",
+    "ArrowIOError",
+    "ArrowMemoryError",
+    "ArrowNotImplementedError",
+    "ArrowTypeError",
+    "ArrowSerializationError",
+    "serialize_pandas",
+    "deserialize_pandas",
+    "ipc",
+    "types",
+    "_deprecate_api",
+    "_deprecate_class",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "get_include",
+    "_get_pkg_config_executable",
+    "_has_pkg_config",
+    "_read_pkg_config_variable",
+    "get_libraries",
+    "create_library_symlinks",
+    "get_library_dirs",
+]
diff --git a/python/pyarrow/__lib_pxi/__init__.pyi b/python/pyarrow/__lib_pxi/__init__.pyi
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pyarrow/__lib_pxi/array.pyi b/python/pyarrow/__lib_pxi/array.pyi
new file mode 100644
index 00000000000..ec1cda30a88
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/array.pyi
@@ -0,0 +1,4274 @@
+import datetime as dt
+import sys
+
+from collections.abc import Callable
+from decimal import Decimal
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import (
+    Any,
+    Generic,
+    Iterable,
+    Iterator,
+    Literal,
+    TypeVar,
+    overload,
+)
+
+import numpy as np
+import pandas as pd
+
+from pandas.core.dtypes.base import ExtensionDtype
+from pyarrow._compute import CastOptions
+from pyarrow._stubs_typing import (
+    ArrayLike,
+    Indices,
+    Mask,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+)
+from pyarrow.lib import (
+    Buffer,
+    Device,
+    MemoryManager,
+    MemoryPool,
+    MonthDayNano,
+    Tensor,
+    _Weakrefable,
+)
+from typing_extensions import deprecated
+
+from . import scalar, types
+from .device import DeviceAllocationType
+from .scalar import NullableCollection, Scalar
+from .types import (
+    DataType,
+    Field,
+    MapType,
+    _AsPyType,
+    _BasicDataType,
+    _BasicValueT,
+    _DataTypeT,
+    _IndexT,
+    _RunEndType,
+    _Size,
+)
+
+@overload
+def array(
+    values: NullableCollection[bool],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BooleanArray: ...
+@overload
+def array(
+    values: NullableCollection[int],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int64Array: ...
+@overload
+def array(
+    values: NullableCollection[float],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DoubleArray: ...
+@overload
+def array(
+    values: NullableCollection[Decimal],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal128Array: ...
+@overload
+def array(
+    values: NullableCollection[dict[str, Any]],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> StructArray: ...
+@overload
+def array(
+    values: NullableCollection[dt.date],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Date32Array: ...
+@overload
+def array(
+    values: NullableCollection[dt.time],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Array[Literal["us"]]: ...
+@overload
+def array(
+    values: NullableCollection[dt.timedelta],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["us"]]: ...
+@overload
+def array(
+    values: NullableCollection[MonthDayNano],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def array(
+    values: NullableCollection[str],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> StringArray: ...
+@overload
+def array(
+    values: NullableCollection[bytes],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryArray: ...
+@overload
+def array(
+    values: NullableCollection[list[Any]],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> ListArray[Any]: ...
+@overload
+def array(
+    values: NullableCollection[_ScalarT],
+    type: None = None,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Array[_ScalarT]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["null"] | types.NullType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> NullArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["bool", "boolean"] | types.BoolType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BooleanArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i1", "int8"] | types.Int8Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int8Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i2", "int16"] | types.Int16Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int16Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i4", "int32"] | types.Int32Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int32Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i8", "int64"] | types.Int64Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Int64Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u1", "uint8"] | types.UInt8Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> UInt8Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u2", "uint16"] | types.UInt16Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> UInt16Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u4", "uint32"] | types.Uint32Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> UInt32Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u8", "uint64"] | types.UInt64Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> UInt64Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f2", "halffloat", "float16"] | types.Float16Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> HalfFloatArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f4", "float", "float32"] | types.Float32Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> FloatArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f8", "double", "float64"] | types.Float64Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DoubleArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["string", "str", "utf8"] | types.StringType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> StringArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["binary"] | types.BinaryType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["large_string", "large_str", "large_utf8"] | types.LargeStringType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> LargeStringArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["large_binary"] | types.LargeBinaryType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> LargeBinaryArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["binary_view"] | types.BinaryViewType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryViewArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["string_view"] | types.StringViewType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> StringViewArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["date32", "date32[day]"] | types.Date32Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Date32Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["date64", "date64[ms]"] | types.Date64Type,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Date64Array: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time32[s]"] | types.Time32Type[Literal["s"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time32Array[Literal["s"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time32[ms]"] | types.Time32Type[Literal["ms"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time32Array[Literal["ms"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time64[us]"] | types.Time64Type[Literal["us"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Array[Literal["us"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time64[ns]"] | types.Time64Type[Literal["ns"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Array[Literal["ns"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[s]"] | types.TimestampType[Literal["s"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[Literal["s"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[ms]"] | types.TimestampType[Literal["ms"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[Literal["ms"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[us]"] | types.TimestampType[Literal["us"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[Literal["us"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[s]"] | types.DurationType[Literal["s"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["s"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[ms]"] | types.DurationType[Literal["ms"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["ms"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[us]"] | types.DurationType[Literal["us"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["us"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[ns]"] | types.DurationType[Literal["ns"]],
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[Literal["ns"]]: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["month_day_nano_interval"] | types.MonthDayNanoIntervalType,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def array(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: _DataTypeT,
+    mask: Mask | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Array[Scalar[_DataTypeT]]: ...
+def array(*args, **kawrgs):
+    """
+    Create pyarrow.Array instance from a Python object.
+
+    Parameters
+    ----------
+    obj : sequence, iterable, ndarray, pandas.Series, Arrow-compatible array
+        If both type and size are specified may be a single use iterable. If
+        not strongly-typed, Arrow type will be inferred for resulting array.
+        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
+        (has an ``__arrow_c_array__`` or ``__arrow_c_device_array__`` method)
+        can be passed as well.
+    type : pyarrow.DataType
+        Explicit type to attempt to coerce to, otherwise will be inferred from
+        the data.
+    mask : array[bool], optional
+        Indicate which values are null (True) or not null (False).
+    size : int64, optional
+        Size of the elements. If the input is larger than size bail at this
+        length. For iterators, if size is larger than the input iterator this
+        will be treated as a "max size", but will involve an initial allocation
+        of size followed by a resize to the actual size (so if you know the
+        exact size specifying it correctly will give you better performance).
+    from_pandas : bool, default None
+        Use pandas's semantics for inferring nulls from values in
+        ndarray-like data. If passed, the mask tasks precedence, but
+        if a value is unmasked (not-null), but still null according to
+        pandas semantics, then it is null. Defaults to False if not
+        passed explicitly by user, or True if a pandas object is
+        passed in.
+    safe : bool, default True
+        Check for overflows or other unsafe conversions.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the currently-set default
+        memory pool.
+
+    Returns
+    -------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+        A ChunkedArray instead of an Array is returned if:
+
+        - the object data overflowed binary storage.
+        - the object's ``__arrow_array__`` protocol method returned a chunked
+          array.
+
+    Notes
+    -----
+    Timezone will be preserved in the returned array for timezone-aware data,
+    else no timezone will be returned for naive timestamps.
+    Internally, UTC values are stored for timezone-aware data with the
+    timezone set in the data type.
+
+    Pandas's DateOffsets and dateutil.relativedelta.relativedelta are by
+    default converted as MonthDayNanoIntervalArray. relativedelta leapdays
+    are ignored as are all absolute fields on both objects. datetime.timedelta
+    can also be converted to MonthDayNanoIntervalArray but this requires
+    passing MonthDayNanoIntervalType explicitly.
+
+    Converting to dictionary array will promote to a wider integer type for
+    indices if the number of distinct values cannot be represented, even if
+    the index type was explicitly set. This means that if there are more than
+    127 values the returned dictionary array's index type will be at least
+    pa.int16() even if pa.int8() was passed to the function. Note that an
+    explicit index type will not be demoted even if it is wider than required.
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> import pyarrow as pa
+    >>> pa.array(pd.Series([1, 2]))
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      1,
+      2
+    ]
+
+    >>> pa.array(["a", "b", "a"], type=pa.dictionary(pa.int8(), pa.string()))
+    <pyarrow.lib.DictionaryArray object at ...>
+    ...
+    -- dictionary:
+      [
+        "a",
+        "b"
+      ]
+    -- indices:
+      [
+        0,
+        1,
+        0
+      ]
+
+    >>> import numpy as np
+    >>> pa.array(pd.Series([1, 2]), mask=np.array([0, 1], dtype=bool))
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      1,
+      null
+    ]
+
+    >>> arr = pa.array(range(1024), type=pa.dictionary(pa.int8(), pa.int64()))
+    >>> arr.type.index_type
+    DataType(int16)
+    """
+
+@overload
+def asarray(values: NullableCollection[bool]) -> BooleanArray: ...
+@overload
+def asarray(values: NullableCollection[int]) -> Int64Array: ...
+@overload
+def asarray(values: NullableCollection[float]) -> DoubleArray: ...
+@overload
+def asarray(values: NullableCollection[Decimal]) -> Decimal128Array: ...
+@overload
+def asarray(values: NullableCollection[dict[str, Any]]) -> StructArray: ...
+@overload
+def asarray(values: NullableCollection[dt.date]) -> Date32Array: ...
+@overload
+def asarray(values: NullableCollection[dt.time]) -> Time64Array: ...
+@overload
+def asarray(values: NullableCollection[dt.timedelta]) -> DurationArray: ...
+@overload
+def asarray(values: NullableCollection[MonthDayNano]) -> MonthDayNanoIntervalArray: ...
+@overload
+def asarray(values: NullableCollection[list[Any]]) -> ListArray[Any]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["null"] | types.NullType,
+) -> NullArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["bool", "boolean"] | types.BoolType,
+) -> BooleanArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i1", "int8"] | types.Int8Type,
+) -> Int8Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i2", "int16"] | types.Int16Type,
+) -> Int16Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i4", "int32"] | types.Int32Type,
+) -> Int32Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["i8", "int64"] | types.Int64Type,
+) -> Int64Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u1", "uint8"] | types.UInt8Type,
+) -> UInt8Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u2", "uint16"] | types.UInt16Type,
+) -> UInt16Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u4", "uint32"] | types.Uint32Type,
+) -> UInt32Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["u8", "uint64"] | types.UInt64Type,
+) -> UInt64Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f2", "halffloat", "float16"] | types.Float16Type,
+) -> HalfFloatArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f4", "float", "float32"] | types.Float32Type,
+) -> FloatArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["f8", "double", "float64"] | types.Float64Type,
+) -> DoubleArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["string", "str", "utf8"] | types.StringType,
+) -> StringArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["binary"] | types.BinaryType,
+) -> BinaryArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["large_string", "large_str", "large_utf8"] | types.LargeStringType,
+) -> LargeStringArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["large_binary"] | types.LargeBinaryType,
+) -> LargeBinaryArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["binary_view"] | types.BinaryViewType,
+) -> BinaryViewArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["string_view"] | types.StringViewType,
+) -> StringViewArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["date32", "date32[day]"] | types.Date32Type,
+) -> Date32Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["date64", "date64[ms]"] | types.Date64Type,
+) -> Date64Array: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time32[s]"] | types.Time32Type[Literal["s"]],
+) -> Time32Array[Literal["s"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time32[ms]"] | types.Time32Type[Literal["ms"]],
+) -> Time32Array[Literal["ms"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time64[us]"] | types.Time64Type[Literal["us"]],
+) -> Time64Array[Literal["us"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["time64[ns]"] | types.Time64Type[Literal["ns"]],
+) -> Time64Array[Literal["ns"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[s]"] | types.TimestampType[Literal["s"]],
+) -> TimestampArray[Literal["s"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[ms]"] | types.TimestampType[Literal["ms"]],
+) -> TimestampArray[Literal["ms"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[us]"] | types.TimestampType[Literal["us"]],
+) -> TimestampArray[Literal["us"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["timestamp[ns]"] | types.TimestampType[Literal["ns"]],
+) -> TimestampArray[Literal["ns"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[s]"] | types.DurationType[Literal["s"]],
+) -> DurationArray[Literal["s"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[ms]"] | types.DurationType[Literal["ms"]],
+) -> DurationArray[Literal["ms"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[us]"] | types.DurationType[Literal["us"]],
+) -> DurationArray[Literal["us"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["duration[ns]"] | types.DurationType[Literal["ns"]],
+) -> DurationArray[Literal["ns"]]: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Literal["month_day_nano_interval"] | types.MonthDayNanoIntervalType,
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def asarray(
+    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: _DataTypeT,
+) -> Array[Scalar[_DataTypeT]]: ...
+def asarray(*args, **kwargs):
+    """
+    Convert to pyarrow.Array, inferring type if not provided.
+
+    Parameters
+    ----------
+    values : array-like
+        This can be a sequence, numpy.ndarray, pyarrow.Array or
+        pyarrow.ChunkedArray. If a ChunkedArray is passed, the output will be
+        a ChunkedArray, otherwise the output will be a Array.
+    type : string or DataType
+        Explicitly construct the array with this type. Attempt to cast if
+        indicated type is different.
+
+    Returns
+    -------
+    arr : Array or ChunkedArray
+    """
+
+@overload
+def nulls(size: int, memory_pool: MemoryPool | None = None) -> NullArray: ...
+@overload
+def nulls(
+    size: int, type: types.NullType | None, memory_pool: MemoryPool | None = None
+) -> NullArray: ...
+@overload
+def nulls(
+    size: int, type: types.BoolType, memory_pool: MemoryPool | None = None
+) -> BooleanArray: ...
+@overload
+def nulls(size: int, type: types.Int8Type, memory_pool: MemoryPool | None = None) -> Int8Array: ...
+@overload
+def nulls(
+    size: int, type: types.Int16Type, memory_pool: MemoryPool | None = None
+) -> Int16Array: ...
+@overload
+def nulls(
+    size: int, type: types.Int32Type, memory_pool: MemoryPool | None = None
+) -> Int32Array: ...
+@overload
+def nulls(
+    size: int, type: types.Int64Type, memory_pool: MemoryPool | None = None
+) -> Int64Array: ...
+@overload
+def nulls(
+    size: int, type: types.UInt8Type, memory_pool: MemoryPool | None = None
+) -> UInt8Array: ...
+@overload
+def nulls(
+    size: int, type: types.UInt16Type, memory_pool: MemoryPool | None = None
+) -> UInt16Array: ...
+@overload
+def nulls(
+    size: int, type: types.Uint32Type, memory_pool: MemoryPool | None = None
+) -> UInt32Array: ...
+@overload
+def nulls(
+    size: int, type: types.UInt64Type, memory_pool: MemoryPool | None = None
+) -> UInt64Array: ...
+@overload
+def nulls(
+    size: int, type: types.Float16Type, memory_pool: MemoryPool | None = None
+) -> HalfFloatArray: ...
+@overload
+def nulls(
+    size: int, type: types.Float32Type, memory_pool: MemoryPool | None = None
+) -> FloatArray: ...
+@overload
+def nulls(
+    size: int, type: types.Float64Type, memory_pool: MemoryPool | None = None
+) -> DoubleArray: ...
+@overload
+def nulls(
+    size: int, type: types.Decimal32Type, memory_pool: MemoryPool | None = None
+) -> Decimal128Array: ...
+@overload
+def nulls(
+    size: int, type: types.Decimal64Type, memory_pool: MemoryPool | None = None
+) -> Decimal128Array: ...
+@overload
+def nulls(
+    size: int, type: types.Decimal128Type, memory_pool: MemoryPool | None = None
+) -> Decimal128Array: ...
+@overload
+def nulls(
+    size: int, type: types.Decimal256Type, memory_pool: MemoryPool | None = None
+) -> Decimal256Array: ...
+@overload
+def nulls(
+    size: int, type: types.Date32Type, memory_pool: MemoryPool | None = None
+) -> Date32Array: ...
+@overload
+def nulls(
+    size: int, type: types.Date64Type, memory_pool: MemoryPool | None = None
+) -> Date64Array: ...
+@overload
+def nulls(
+    size: int, type: types.Time32Type[types._Time32Unit], memory_pool: MemoryPool | None = None
+) -> Time32Array[types._Time32Unit]: ...
+@overload
+def nulls(
+    size: int, type: types.Time64Type[types._Time64Unit], memory_pool: MemoryPool | None = None
+) -> Time64Array[types._Time64Unit]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.TimestampType[types._Unit, types._Tz],
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[types._Unit, types._Tz]: ...
+@overload
+def nulls(
+    size: int, type: types.DurationType[types._Unit], memory_pool: MemoryPool | None = None
+) -> DurationArray[types._Unit]: ...
+@overload
+def nulls(
+    size: int, type: types.MonthDayNanoIntervalType, memory_pool: MemoryPool | None = None
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.BinaryType,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.LargeBinaryType,
+    memory_pool: MemoryPool | None = None,
+) -> LargeBinaryArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.FixedSizeBinaryType,
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeBinaryArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.StringType,
+    memory_pool: MemoryPool | None = None,
+) -> StringArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.LargeStringType,
+    memory_pool: MemoryPool | None = None,
+) -> LargeStringArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.BinaryViewType,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryViewArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.StringViewType,
+    memory_pool: MemoryPool | None = None,
+) -> StringViewArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.LargeListType[_DataTypeT],
+    memory_pool: MemoryPool | None = None,
+) -> LargeListArray[_DataTypeT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.ListViewType[_DataTypeT],
+    memory_pool: MemoryPool | None = None,
+) -> ListViewArray[_DataTypeT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.LargeListViewType[_DataTypeT],
+    memory_pool: MemoryPool | None = None,
+) -> LargeListViewArray[_DataTypeT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.FixedSizeListType[_DataTypeT, _Size],
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeListArray[_DataTypeT, _Size]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.ListType[_DataTypeT],
+    memory_pool: MemoryPool | None = None,
+) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.StructType,
+    memory_pool: MemoryPool | None = None,
+) -> StructArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.MapType[_MapKeyT, _MapItemT],
+    memory_pool: MemoryPool | None = None,
+) -> MapArray[_MapKeyT, _MapItemT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.DictionaryType[_IndexT, _BasicValueT],
+    memory_pool: MemoryPool | None = None,
+) -> DictionaryArray[_IndexT, _BasicValueT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.RunEndEncodedType[_RunEndType, _BasicValueT],
+    memory_pool: MemoryPool | None = None,
+) -> RunEndEncodedArray[_RunEndType, _BasicValueT]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.UnionType,
+    memory_pool: MemoryPool | None = None,
+) -> UnionArray: ...
+@overload
+def nulls(
+    size: int,
+    type: types.FixedShapeTensorType[types._ValueT],
+    memory_pool: MemoryPool | None = None,
+) -> FixedShapeTensorArray[Any]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.Bool8Type,
+    memory_pool: MemoryPool | None = None,
+) -> Bool8Array: ...
+@overload
+def nulls(
+    size: int,
+    type: types.UuidType,
+    memory_pool: MemoryPool | None = None,
+) -> UuidArray[Any]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.JsonType,
+    memory_pool: MemoryPool | None = None,
+) -> JsonArray[Any]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.OpaqueType,
+    memory_pool: MemoryPool | None = None,
+) -> OpaqueArray[Any]: ...
+@overload
+def nulls(
+    size: int,
+    type: types.ExtensionType,
+    memory_pool: MemoryPool | None = None,
+) -> ExtensionArray[Any]: ...
+def nulls(*args, **kwargs):
+    """
+    Create a strongly-typed Array instance with all elements null.
+
+    Parameters
+    ----------
+    size : int
+        Array length.
+    type : pyarrow.DataType, default None
+        Explicit type for the array. By default use NullType.
+    memory_pool : MemoryPool, default None
+        Arrow MemoryPool to use for allocations. Uses the default memory
+        pool if not passed.
+
+    Returns
+    -------
+    arr : Array
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.nulls(10)
+    <pyarrow.lib.NullArray object at ...>
+    10 nulls
+
+    >>> pa.nulls(3, pa.uint32())
+    <pyarrow.lib.UInt32Array object at ...>
+    [
+      null,
+      null,
+      null
+    ]
+    """
+
+@overload
+def repeat(
+    value: None | scalar.NullScalar, size: int, memory_pool: MemoryPool | None = None
+) -> NullArray: ...
+@overload
+def repeat(  # type: ignore[overload-overlap]
+    value: bool | scalar.BooleanScalar, size: int, memory_pool: MemoryPool | None = None
+) -> BooleanArray: ...
+@overload
+def repeat(
+    value: scalar.Int8Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Int8Array: ...
+@overload
+def repeat(
+    value: scalar.Int16Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Int16Array: ...
+@overload
+def repeat(
+    value: scalar.Int32Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Int32Array: ...
+@overload
+def repeat(
+    value: int | scalar.Int64Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Int64Array: ...
+@overload
+def repeat(
+    value: scalar.UInt8Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> UInt8Array: ...
+@overload
+def repeat(
+    value: scalar.UInt16Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> UInt16Array: ...
+@overload
+def repeat(
+    value: scalar.UInt32Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> UInt32Array: ...
+@overload
+def repeat(
+    value: scalar.UInt64Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> UInt64Array: ...
+@overload
+def repeat(
+    value: scalar.HalfFloatScalar, size: int, memory_pool: MemoryPool | None = None
+) -> HalfFloatArray: ...
+@overload
+def repeat(
+    value: scalar.FloatScalar, size: int, memory_pool: MemoryPool | None = None
+) -> FloatArray: ...
+@overload
+def repeat(
+    value: float | scalar.DoubleScalar, size: int, memory_pool: MemoryPool | None = None
+) -> DoubleArray: ...
+@overload
+def repeat(
+    value: Decimal | scalar.Decimal32Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Decimal32Array: ...
+@overload
+def repeat(
+    value: scalar.Decimal64Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Decimal64Array: ...
+@overload
+def repeat(
+    value: scalar.Decimal128Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Decimal128Array: ...
+@overload
+def repeat(
+    value: scalar.Decimal256Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Decimal256Array: ...
+@overload
+def repeat(
+    value: dt.date | scalar.Date32Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Date32Array: ...
+@overload
+def repeat(
+    value: scalar.Date64Scalar, size: int, memory_pool: MemoryPool | None = None
+) -> Date64Array: ...
+@overload
+def repeat(
+    value: scalar.Time32Scalar[types._Time32Unit], size: int, memory_pool: MemoryPool | None = None
+) -> Time32Array[types._Time32Unit]: ...
+@overload
+def repeat(
+    value: dt.time | scalar.Time64Scalar[types._Time64Unit],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Array[types._Time64Unit]: ...
+@overload
+def repeat(
+    value: scalar.TimestampScalar[types._Unit, types._Tz],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampArray[types._Unit, types._Tz]: ...
+@overload
+def repeat(
+    value: dt.timedelta | scalar.DurationScalar[types._Unit],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> DurationArray[types._Unit]: ...
+@overload
+def repeat(  # pyright: ignore[reportOverlappingOverload]
+    value: MonthDayNano | scalar.MonthDayNanoIntervalScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalArray: ...
+@overload
+def repeat(
+    value: bytes | scalar.BinaryScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryArray: ...
+@overload
+def repeat(
+    value: scalar.LargeBinaryScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> LargeBinaryArray: ...
+@overload
+def repeat(
+    value: scalar.FixedSizeBinaryScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeBinaryArray: ...
+@overload
+def repeat(
+    value: str | scalar.StringScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> StringArray: ...
+@overload
+def repeat(
+    value: scalar.LargeStringScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> LargeStringArray: ...
+@overload
+def repeat(
+    value: scalar.BinaryViewScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryViewArray: ...
+@overload
+def repeat(
+    value: scalar.StringViewScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> StringViewArray: ...
+@overload
+def repeat(
+    value: list[Any] | tuple[Any] | scalar.ListScalar[_DataTypeT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
+@overload
+def repeat(
+    value: scalar.FixedSizeListScalar[_DataTypeT, _Size],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeListArray[_DataTypeT, _Size]: ...
+@overload
+def repeat(
+    value: scalar.LargeListScalar[_DataTypeT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> LargeListArray[_DataTypeT]: ...
+@overload
+def repeat(
+    value: scalar.ListViewScalar[_DataTypeT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> ListViewArray[_DataTypeT]: ...
+@overload
+def repeat(
+    value: scalar.LargeListViewScalar[_DataTypeT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> LargeListViewArray[_DataTypeT]: ...
+@overload
+def repeat(
+    value: dict[str, Any] | scalar.StructScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> StructArray: ...
+@overload
+def repeat(
+    value: scalar.MapScalar[_MapKeyT, _MapItemT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> MapArray[_MapKeyT, _MapItemT]: ...
+@overload
+def repeat(
+    value: scalar.DictionaryScalar[_IndexT, _BasicValueT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> DictionaryArray[_IndexT, _BasicValueT]: ...
+@overload
+def repeat(
+    value: scalar.RunEndEncodedScalar[_RunEndType, _BasicValueT],
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> RunEndEncodedArray[_RunEndType, _BasicValueT]: ...
+@overload
+def repeat(
+    value: scalar.UnionScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> UnionArray: ...
+@overload
+def repeat(
+    value: scalar.FixedShapeTensorScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> FixedShapeTensorArray[Any]: ...
+@overload
+def repeat(
+    value: scalar.Bool8Scalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> Bool8Array: ...
+@overload
+def repeat(
+    value: scalar.UuidScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> UuidArray[Any]: ...
+@overload
+def repeat(
+    value: scalar.JsonScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> JsonArray[Any]: ...
+@overload
+def repeat(
+    value: scalar.OpaqueScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> OpaqueArray[Any]: ...
+@overload
+def repeat(
+    value: scalar.ExtensionScalar,
+    size: int,
+    memory_pool: MemoryPool | None = None,
+) -> ExtensionArray[Any]: ...
+def repeat(*args, **kwargs):
+    """
+    Create an Array instance whose slots are the given scalar.
+
+    Parameters
+    ----------
+    value : Scalar-like object
+        Either a pyarrow.Scalar or any python object coercible to a Scalar.
+    size : int
+        Number of times to repeat the scalar in the output Array.
+    memory_pool : MemoryPool, default None
+        Arrow MemoryPool to use for allocations. Uses the default memory
+        pool if not passed.
+
+    Returns
+    -------
+    arr : Array
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.repeat(10, 3)
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      10,
+      10,
+      10
+    ]
+
+    >>> pa.repeat([1, 2], 2)
+    <pyarrow.lib.ListArray object at ...>
+    [
+      [
+        1,
+        2
+      ],
+      [
+        1,
+        2
+      ]
+    ]
+
+    >>> pa.repeat("string", 3)
+    <pyarrow.lib.StringArray object at ...>
+    [
+      "string",
+      "string",
+      "string"
+    ]
+
+    >>> pa.repeat(pa.scalar({"a": 1, "b": [1, 2]}), 2)
+    <pyarrow.lib.StructArray object at ...>
+    -- is_valid: all not null
+    -- child 0 type: int64
+      [
+        1,
+        1
+      ]
+    -- child 1 type: list<item: int64>
+      [
+        [
+          1,
+          2
+        ],
+        [
+          1,
+          2
+        ]
+      ]
+    """
+
+def infer_type(values: Iterable[Any], mask: Mask, from_pandas: bool = False) -> DataType:
+    """
+    Attempt to infer Arrow data type that can hold the passed Python
+    sequence type in an Array object
+
+    Parameters
+    ----------
+    values : array-like
+        Sequence to infer type from.
+    mask : ndarray (bool type), optional
+        Optional exclusion mask where True marks null, False non-null.
+    from_pandas : bool, default False
+        Use pandas's NA/null sentinel values for type inference.
+
+    Returns
+    -------
+    type : DataType
+    """
+
+class ArrayStatistics(_Weakrefable):
+    """
+    The class for statistics of an array.
+    """
+    @property
+    def null_count(self) -> int:
+        """
+        The number of nulls.
+        """
+    @property
+    def distinct_count(self) -> int:
+        """
+        The number of distinct values.
+        """
+    @property
+    def min(self) -> Any:
+        """
+        The minimum value.
+        """
+    @property
+    def is_min_exact(self) -> bool:
+        """
+        Whether the minimum value is an exact value or not.
+        """
+    @property
+    def max(self) -> Any:
+        """
+        The maximum value.
+        """
+
+    @property
+    def is_max_exact(self) -> bool:
+        """
+        Whether the maximum value is an exact value or not.
+        """
+
+_ConvertAs = TypeVar("_ConvertAs", pd.DataFrame, pd.Series)
+
+class _PandasConvertible(_Weakrefable, Generic[_ConvertAs]):
+    def to_pandas(
+        self,
+        memory_pool: MemoryPool | None = None,
+        categories: list | None = None,
+        strings_to_categorical: bool = False,
+        zero_copy_only: bool = False,
+        integer_object_nulls: bool = False,
+        date_as_object: bool = True,
+        timestamp_as_object: bool = False,
+        use_threads: bool = True,
+        deduplicate_objects: bool = True,
+        ignore_metadata: bool = False,
+        safe: bool = True,
+        split_blocks: bool = False,
+        self_destruct: bool = False,
+        maps_as_pydicts: Literal["None", "lossy", "strict"] | None = None,
+        types_mapper: Callable[[DataType], ExtensionDtype | None] | None = None,
+        coerce_temporal_nanoseconds: bool = False,
+    ) -> _ConvertAs:
+        """
+        Convert to a pandas-compatible NumPy array or DataFrame, as appropriate
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            Arrow MemoryPool to use for allocations. Uses the default memory
+            pool if not passed.
+        categories : list, default empty
+            List of fields that should be returned as pandas.Categorical. Only
+            applies to table-like data structures.
+        strings_to_categorical : bool, default False
+            Encode string (UTF8) and binary types to pandas.Categorical.
+        zero_copy_only : bool, default False
+            Raise an ArrowException if this function call would require copying
+            the underlying data.
+        integer_object_nulls : bool, default False
+            Cast integers with nulls to objects
+        date_as_object : bool, default True
+            Cast dates to objects. If False, convert to datetime64 dtype with
+            the equivalent time unit (if supported). Note: in pandas version
+            < 2.0, only datetime64[ns] conversion is supported.
+        timestamp_as_object : bool, default False
+            Cast non-nanosecond timestamps (np.datetime64) to objects. This is
+            useful in pandas version 1.x if you have timestamps that don't fit
+            in the normal date range of nanosecond timestamps (1678 CE-2262 CE).
+            Non-nanosecond timestamps are supported in pandas version 2.0.
+            If False, all timestamps are converted to datetime64 dtype.
+        use_threads : bool, default True
+            Whether to parallelize the conversion using multiple threads.
+        deduplicate_objects : bool, default True
+            Do not create multiple copies Python objects when created, to save
+            on memory use. Conversion will be slower.
+        ignore_metadata : bool, default False
+            If True, do not use the 'pandas' metadata to reconstruct the
+            DataFrame index, if present
+        safe : bool, default True
+            For certain data types, a cast is needed in order to store the
+            data in a pandas DataFrame or Series (e.g. timestamps are always
+            stored as nanoseconds in pandas). This option controls whether it
+            is a safe cast or not.
+        split_blocks : bool, default False
+            If True, generate one internal "block" for each column when
+            creating a pandas.DataFrame from a RecordBatch or Table. While this
+            can temporarily reduce memory note that various pandas operations
+            can trigger "consolidation" which may balloon memory use.
+        self_destruct : bool, default False
+            EXPERIMENTAL: If True, attempt to deallocate the originating Arrow
+            memory while converting the Arrow object to pandas. If you use the
+            object after calling to_pandas with this option it will crash your
+            program.
+
+            Note that you may not see always memory usage improvements. For
+            example, if multiple columns share an underlying allocation,
+            memory can't be freed until all columns are converted.
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+            This can change the ordering of (key, value) pairs, and will
+            deduplicate multiple keys, resulting in a possible loss of data.
+
+            If 'lossy', this key deduplication results in a warning printed
+            when detected. If 'strict', this instead results in an exception
+            being raised when detected.
+        types_mapper : function, default None
+            A function mapping a pyarrow DataType to a pandas ExtensionDtype.
+            This can be used to override the default pandas type for conversion
+            of built-in pyarrow types or in absence of pandas_metadata in the
+            Table schema. The function receives a pyarrow DataType and is
+            expected to return a pandas ExtensionDtype or ``None`` if the
+            default conversion should be used for that type. If you have
+            a dictionary mapping, you can pass ``dict.get`` as function.
+        coerce_temporal_nanoseconds : bool, default False
+            Only applicable to pandas version >= 2.0.
+            A legacy option to coerce date32, date64, duration, and timestamp
+            time units to nanoseconds when converting to pandas. This is the
+            default behavior in pandas version 1.x. Set this option to True if
+            you'd like to use this coercion when using pandas version >= 2.0
+            for backwards compatibility (not recommended otherwise).
+
+        Returns
+        -------
+        pandas.Series or pandas.DataFrame depending on type of object
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+
+        Convert a Table to pandas DataFrame:
+
+        >>> table = pa.table(
+        ...     [
+        ...         pa.array([2, 4, 5, 100]),
+        ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
+        ...     ],
+        ...     names=["n_legs", "animals"],
+        ... )
+        >>> table.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+        >>> isinstance(table.to_pandas(), pd.DataFrame)
+        True
+
+        Convert a RecordBatch to pandas DataFrame:
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> batch = pa.record_batch([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+        >>> batch.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+        >>> isinstance(batch.to_pandas(), pd.DataFrame)
+        True
+
+        Convert a Chunked Array to pandas Series:
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.to_pandas()
+        0      2
+        1      2
+        2      4
+        3      4
+        4      5
+        5    100
+        dtype: int64
+        >>> isinstance(n_legs.to_pandas(), pd.Series)
+        True
+        """
+
+_CastAs = TypeVar("_CastAs", bound=DataType)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+
+class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+    """
+    The base class for all Arrow arrays.
+    """
+
+    def diff(self, other: Self) -> str:
+        """
+        Compare contents of this array against another one.
+
+        Return a string containing the result of diffing this array
+        (on the left side) against the other array (on the right side).
+
+        Parameters
+        ----------
+        other : Array
+            The other array to compare this array with.
+
+        Returns
+        -------
+        diff : str
+            A human-readable printout of the differences.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> left = pa.array(["one", "two", "three"])
+        >>> right = pa.array(["two", None, "two-and-a-half", "three"])
+        >>> print(left.diff(right))  # doctest: +SKIP
+
+        @@ -0, +0 @@
+        -"one"
+        @@ -2, +1 @@
+        +null
+        +"two-and-a-half"
+        """
+    def cast(
+        self,
+        target_type: _CastAs,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_CastAs]]:
+        """
+        Cast array values to another data type
+
+        See :func:`pyarrow.compute.cast` for usage.
+
+        Parameters
+        ----------
+        target_type : DataType, default None
+            Type to cast array to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+        memory_pool : MemoryPool, optional
+            memory pool to use for allocations during function execution.
+
+        Returns
+        -------
+        cast : Array
+        """
+    def view(self, target_type: _CastAs) -> Array[Scalar[_CastAs]]:
+        """
+        Return zero-copy "view" of array as another data type.
+
+        The data types must have compatible columnar buffer layouts
+
+        Parameters
+        ----------
+        target_type : DataType
+            Type to construct view as.
+
+        Returns
+        -------
+        view : Array
+        """
+    def sum(self, **kwargs) -> _Scalar_co:
+        """
+        Sum the values in a numerical array.
+
+        See :func:`pyarrow.compute.sum` for full usage.
+
+        Parameters
+        ----------
+        **kwargs : dict, optional
+            Options to pass to :func:`pyarrow.compute.sum`.
+
+        Returns
+        -------
+        sum : Scalar
+            A scalar containing the sum value.
+        """
+    @property
+    def type(self: Array[Scalar[_DataTypeT]]) -> _DataTypeT: ...
+    def unique(self) -> Self:
+        """
+        Compute distinct elements in array.
+
+        Returns
+        -------
+        unique : Array
+            An array of the same data type, with deduplicated elements.
+        """
+    def dictionary_encode(self, null_encoding: str = "mask") -> DictionaryArray:
+        """
+        Compute dictionary-encoded representation of array.
+
+        See :func:`pyarrow.compute.dictionary_encode` for full usage.
+
+        Parameters
+        ----------
+        null_encoding : str, default "mask"
+            How to handle null entries.
+
+        Returns
+        -------
+        encoded : DictionaryArray
+            A dictionary-encoded version of this array.
+        """
+    def value_count(self) -> StructArray:
+        """
+        Compute counts of unique elements in array.
+
+        Returns
+        -------
+        StructArray
+            An array of  <input type "Values", int64 "Counts"> structs
+        """
+    @overload
+    @staticmethod
+    def from_pandas(
+        obj: pd.Series | np.ndarray | ArrayLike,
+        *,
+        mask: Mask | None = None,
+        type: _DataTypeT,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar[_DataTypeT]]: ...
+    @overload
+    @staticmethod
+    def from_pandas(
+        obj: pd.Series | np.ndarray | ArrayLike,
+        *,
+        mask: Mask | None = None,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Array[Scalar]: ...
+    @staticmethod
+    def from_pandas(*args, **kwargs):
+        """
+        Convert pandas.Series to an Arrow Array.
+
+        This method uses Pandas semantics about what values indicate
+        nulls. See pyarrow.array for more general conversion from arrays or
+        sequences to Arrow arrays.
+
+        Parameters
+        ----------
+        obj : ndarray, pandas.Series, array-like
+        mask : array (boolean), optional
+            Indicate which values are null (True) or not null (False).
+        type : pyarrow.DataType
+            Explicit type to attempt to coerce to, otherwise will be inferred
+            from the data.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        memory_pool : pyarrow.MemoryPool, optional
+            If not passed, will allocate memory from the currently-set default
+            memory pool.
+
+        Notes
+        -----
+        Localized timestamps will currently be returned as UTC (pandas's native
+        representation). Timezone-naive data will be implicitly interpreted as
+        UTC.
+
+        Returns
+        -------
+        array : pyarrow.Array or pyarrow.ChunkedArray
+            ChunkedArray is returned if object data overflows binary buffer.
+        """
+    @staticmethod
+    def from_buffers(
+        type: _DataTypeT,
+        length: int,
+        buffers: list[Buffer],
+        null_count: int = -1,
+        offset=0,
+        children: NullableCollection[Array[Scalar[_DataTypeT]]] | None = None,
+    ) -> Array[Scalar[_DataTypeT]]:
+        """
+        Construct an Array from a sequence of buffers.
+
+        The concrete type returned depends on the datatype.
+
+        Parameters
+        ----------
+        type : DataType
+            The value type of the array.
+        length : int
+            The number of values in the array.
+        buffers : List[Buffer]
+            The buffers backing this array.
+        null_count : int, default -1
+            The number of null entries in the array. Negative value means that
+            the null count is not known.
+        offset : int, default 0
+            The array's logical offset (in values, not in bytes) from the
+            start of each buffer.
+        children : List[Array], default None
+            Nested type children with length matching type.num_fields.
+
+        Returns
+        -------
+        array : Array
+        """
+    @property
+    def null_count(self) -> int: ...
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the array.
+
+        In other words, the sum of bytes from all buffer
+        ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the array.
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+        """
+    def __sizeof__(self) -> int: ...
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+    def to_string(
+        self,
+        *,
+        indent: int = 2,
+        top_level_indent: int = 0,
+        window: int = 10,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str:
+        """
+        Render a "pretty-printed" string representation of the Array.
+
+        Note: for data on a non-CPU device, the full array is copied to CPU
+        memory.
+
+        Parameters
+        ----------
+        indent : int, default 2
+            How much to indent the internal items in the string to
+            the right, by default ``2``.
+        top_level_indent : int, default 0
+            How much to indent right the entire content of the array,
+            by default ``0``.
+        window : int
+            How many primitive items to preview at the begin and end
+            of the array when the array is bigger than the window.
+            The other items will be ellipsed.
+        container_window : int
+            How many container items (such as a list in a list array)
+            to preview at the begin and end of the array when the array
+            is bigger than the window.
+        skip_new_lines : bool
+            If the array should be rendered as a single line of text
+            or if each element should be on its own line.
+        """
+    format = to_string
+    def equals(self, other: Self) -> bool: ...
+    def __len__(self) -> int: ...
+    def is_null(self, *, nan_is_null: bool = False) -> BooleanArray:
+        """
+        Return BooleanArray indicating the null values.
+
+        Parameters
+        ----------
+        nan_is_null : bool (optional, default False)
+            Whether floating-point NaN values should also be considered null.
+
+        Returns
+        -------
+        array : boolean Array
+        """
+    def is_nan(self) -> BooleanArray:
+        """
+        Return BooleanArray indicating the NaN values.
+
+        Returns
+        -------
+        array : boolean Array
+        """
+    def is_valid(self) -> BooleanArray:
+        """
+        Return BooleanArray indicating the non-null values.
+        """
+    def fill_null(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]], fill_value: _AsPyType
+    ) -> Array[Scalar[_BasicDataType[_AsPyType]]]:
+        """
+        See :func:`pyarrow.compute.fill_null` for usage.
+
+        Parameters
+        ----------
+        fill_value : any
+            The replacement value for null entries.
+
+        Returns
+        -------
+        result : Array
+            A new array with nulls replaced by the given value.
+        """
+    @overload
+    def __getitem__(self, key: int) -> _Scalar_co: ...
+    @overload
+    def __getitem__(self, key: slice) -> Self: ...
+    def __getitem__(self, key):
+        """
+        Slice or return value at given index
+
+        Parameters
+        ----------
+        key : integer or slice
+            Slices with step not equal to 1 (or None) will produce a copy
+            rather than a zero-copy view
+
+        Returns
+        -------
+        value : Scalar (index) or Array (slice)
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this array.
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of array to slice.
+        length : int, default None
+            Length of slice (default is until end of Array starting from
+            offset).
+
+        Returns
+        -------
+        sliced : Array
+            An array with the same datatype, containing the sliced values.
+        """
+    def take(self, indices: Indices) -> Self:
+        """
+        Select values from an array.
+
+        See :func:`pyarrow.compute.take` for full usage.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices in the array whose values will be returned.
+
+        Returns
+        -------
+        taken : Array
+            An array with the same datatype, containing the taken values.
+        """
+    def drop_null(self) -> Self:
+        """
+        Remove missing values from an array.
+        """
+    def filter(
+        self,
+        mask: Mask,
+        *,
+        null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    ) -> Self:
+        """
+        Select values from an array.
+
+        See :func:`pyarrow.compute.filter` for full usage.
+
+        Parameters
+        ----------
+        mask : Array or array-like
+            The boolean mask to filter the array with.
+        null_selection_behavior : str, default "drop"
+            How nulls in the mask should be handled.
+
+        Returns
+        -------
+        filtered : Array
+            An array of the same type, with only the elements selected by
+            the boolean mask.
+        """
+    @overload
+    def index(
+        self: Array[_ScalarT],
+        value: _ScalarT,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> scalar.Int64Scalar: ...
+    @overload
+    def index(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]],
+        value: _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> scalar.Int64Scalar: ...
+    def index(self, *args, **kwargs):
+        """
+        Find the first index of a value.
+
+        See :func:`pyarrow.compute.index` for full usage.
+
+        Parameters
+        ----------
+        value : Scalar or object
+            The value to look for in the array.
+        start : int, optional
+            The start index where to look for `value`.
+        end : int, optional
+            The end index where to look for `value`.
+        memory_pool : MemoryPool, optional
+            A memory pool for potential memory allocations.
+
+        Returns
+        -------
+        index : Int64Scalar
+            The index of the value in the array (-1 if not found).
+        """
+    def sort(self, order: Order = "ascending", **kwargs) -> Self:
+        """
+        Sort the Array
+
+        Parameters
+        ----------
+        order : str, default "ascending"
+            Which order to sort values in.
+            Accepted values are "ascending", "descending".
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        result : Array
+        """
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    def to_numpy(self, zero_copy_only: bool = True, writable: bool = False) -> np.ndarray:
+        """
+        Return a NumPy view or copy of this array.
+
+        By default, tries to return a view of this array. This is only
+        supported for primitive arrays with the same memory layout as NumPy
+        (i.e. integers, floating point, ..) and without any nulls.
+
+        For the extension arrays, this method simply delegates to the
+        underlying storage array.
+
+        Parameters
+        ----------
+        zero_copy_only : bool, default True
+            If True, an exception will be raised if the conversion to a numpy
+            array would require copying the underlying data (e.g. in presence
+            of nulls, or for non-primitive types).
+        writable : bool, default False
+            For numpy arrays created with zero copy (view on the Arrow data),
+            the resulting array is not writable (Arrow data is immutable).
+            By setting this to True, a copy of the array is made to ensure
+            it is writable.
+
+        Returns
+        -------
+        array : numpy.ndarray
+        """
+    def to_pylist(
+        self: Array[Scalar[_BasicDataType[_AsPyType]]],
+        *,
+        map_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType | None]:
+        """
+        Convert to a list of native Python objects.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Returns
+        -------
+        lst : list
+        """
+    tolist = to_pylist
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    @property
+    def offset(self) -> int:
+        """
+        A relative position into another array's data.
+
+        The purpose is to enable zero-copy slicing. This value defaults to zero
+        but must be applied on all operations with the physical storage
+        buffers.
+        """
+    def buffers(self) -> list[Buffer | None]:
+        """
+        Return a list of Buffer objects pointing to this array's physical
+        storage.
+
+        To correctly interpret these buffers, you need to also apply the offset
+        multiplied with the size of the stored data type.
+        """
+    def copy_to(self, destination: MemoryManager | Device) -> Self:
+        """
+        Construct a copy of the array with all buffers on destination
+        device.
+
+        This method recursively copies the array's buffers and those of its
+        children onto the destination MemoryManager device and returns the
+        new Array.
+
+        Parameters
+        ----------
+        destination : pyarrow.MemoryManager or pyarrow.Device
+            The destination device to copy the array to.
+
+        Returns
+        -------
+        Array
+        """
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
+        """
+        Export to a C ArrowArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the array type
+        is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, type: int | DataType) -> Self:
+        """
+        Import Array from a C ArrowArray struct, given its pointer
+        and the imported array type.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        type: DataType or int
+            Either a DataType object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_array__(self, requested_schema=None) -> Any:
+        """
+        Get a pair of PyCapsules containing a C ArrowArray representation of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the array to this data type.
+            If None, the array will be returned as-is, with a type matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
+            respectively.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
+        """
+        Export to a C ArrowDeviceArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the array type
+        is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, type: DataType | int) -> Self:
+        """
+        Import Array from a C ArrowDeviceArray struct, given its pointer
+        and the imported array type.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        type: DataType or int
+            Either a DataType object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any:
+        """
+        Get a pair of PyCapsules containing a C ArrowDeviceArray representation
+        of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the array to this data type.
+            If None, the array will be returned as-is, with a type matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+        kwargs
+            Currently no additional keyword arguments are supported, but
+            this method will accept any keyword with a value of ``None``
+            for compatibility with future keywords.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowDeviceArray,
+            respectively.
+        """
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+    def __dlpack__(self, stream: int | None = None) -> Any:
+        """Export a primitive array as a DLPack capsule.
+
+        Parameters
+        ----------
+        stream : int, optional
+            A Python integer representing a pointer to a stream. Currently not supported.
+            Stream is provided by the consumer to the producer to instruct the producer
+            to ensure that operations can safely be performed on the array.
+
+        Returns
+        -------
+        capsule : PyCapsule
+            A DLPack capsule for the array, pointing to a DLManagedTensor.
+        """
+    def __dlpack_device__(self) -> tuple[int, int]:
+        """
+        Return the DLPack device tuple this arrays resides on.
+
+        Returns
+        -------
+        tuple : Tuple[int, int]
+            Tuple with index specifying the type of the device (where
+            CPU = 1, see cpp/src/arrow/c/dpack_abi.h) and index of the
+            device which is 0 by default for CPU.
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        The device type where the array resides.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
+
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether the array is CPU-accessible.
+        """
+    @property
+    def statistics(self) -> ArrayStatistics | None:
+        """
+        Statistics of the array.
+        """
+
+class NullArray(Array[scalar.NullScalar]): ...
+
+class BooleanArray(Array[scalar.BooleanScalar]):
+    @property
+    def false_count(self) -> int: ...
+    @property
+    def true_count(self) -> int: ...
+
+class NumericArray(Array[_ScalarT]): ...
+class IntegerArray(NumericArray[_ScalarT]): ...
+class FloatingPointArray(NumericArray[_ScalarT]): ...
+class Int8Array(IntegerArray[scalar.Int8Scalar]): ...
+class UInt8Array(IntegerArray[scalar.UInt8Scalar]): ...
+class Int16Array(IntegerArray[scalar.Int16Scalar]): ...
+class UInt16Array(IntegerArray[scalar.UInt16Scalar]): ...
+class Int32Array(IntegerArray[scalar.Int32Scalar]): ...
+class UInt32Array(IntegerArray[scalar.UInt32Scalar]): ...
+class Int64Array(IntegerArray[scalar.Int64Scalar]): ...
+class UInt64Array(IntegerArray[scalar.UInt64Scalar]): ...
+class Date32Array(NumericArray[scalar.Date32Scalar]): ...
+class Date64Array(NumericArray[scalar.Date64Scalar]): ...
+class TimestampArray(NumericArray[scalar.TimestampScalar[types._Unit, types._Tz]]): ...
+class Time32Array(NumericArray[scalar.Time32Scalar[types._Time32Unit]]): ...
+class Time64Array(NumericArray[scalar.Time64Scalar[types._Time64Unit]]): ...
+class DurationArray(NumericArray[scalar.DurationScalar[types._Unit]]): ...
+class MonthDayNanoIntervalArray(Array[scalar.MonthDayNanoIntervalScalar]): ...
+class HalfFloatArray(FloatingPointArray[scalar.HalfFloatScalar]): ...
+class FloatArray(FloatingPointArray[scalar.FloatScalar]): ...
+class DoubleArray(FloatingPointArray[scalar.DoubleScalar]): ...
+class FixedSizeBinaryArray(Array[scalar.FixedSizeBinaryScalar]): ...
+class Decimal32Array(FixedSizeBinaryArray): ...
+class Decimal64Array(FixedSizeBinaryArray): ...
+class Decimal128Array(FixedSizeBinaryArray): ...
+class Decimal256Array(FixedSizeBinaryArray): ...
+
+class BaseListArray(Array[_ScalarT]):
+    def flatten(self, recursive: bool = False) -> Array: ...
+    def value_parent_indices(self) -> Int64Array: ...
+    def value_lengths(self) -> Int32Array: ...
+
+class ListArray(BaseListArray[_ScalarT]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list[int],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[types.Int64Type]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list[float],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[types.Float64Type]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list[str],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[types.StringType]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list[bytes],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[types.BinaryType]]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: list,
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array | list[int],
+        values: Array | list,
+        *,
+        type: _DataTypeT,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct ListArray from arrays of int32 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int32 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_array : ListArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # nulls in the offsets array become null lists
+        >>> offsets = pa.array([0, None, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+    @property
+    def values(self) -> Array:
+        """
+        Return the underlying array of values which backs the ListArray
+        ignoring the array's offset.
+
+        If any of the list elements are null, but are backed by a
+        non-empty sub-list, those elements will be included in the
+        output.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's offset.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        ListArray.flatten : ...
+
+        Examples
+        --------
+
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> array = pa.array([[1, 2], None, [3, 4, None, 6]])
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          3,
+          4,
+          null,
+          6
+        ]
+
+        If an array is sliced, the slice still uses the same
+        underlying data as the original array, just with an
+        offset. Since values ignores the offset, the values are the
+        same:
+
+        >>> sliced = array.slice(1, 2)
+        >>> sliced
+        <pyarrow.lib.ListArray object at ...>
+        [
+          null,
+          [
+            3,
+            4,
+            null,
+            6
+          ]
+        ]
+        >>> sliced.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          3,
+          4,
+          null,
+          6
+        ]
+
+        """
+    @property
+    def offsets(self) -> Int32Array:
+        """
+        Return the list offsets as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> array = pa.array([[1, 2], None, [3, 4, 5]])
+        >>> array.offsets
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          0,
+          2,
+          2,
+          5
+        ]
+        """
+
+class LargeListArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListArray[_DataTypeT]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array,
+        *,
+        type: _DataTypeT,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListArray[_DataTypeT]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct LargeListArray from arrays of int64 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int64 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_array : LargeListArray
+        """
+    @property
+    def values(self) -> Array:
+        """
+        Return the underlying array of values which backs the LargeListArray
+        ignoring the array's offset.
+
+        If any of the list elements are null, but are backed by a
+        non-empty sub-list, those elements will be included in the
+        output.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's offset.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        LargeListArray.flatten : ...
+
+        Examples
+        --------
+
+        The values include null elements from the sub-lists:
+
+        >>> import pyarrow as pa
+        >>> array = pa.array(
+        ...     [[1, 2], None, [3, 4, None, 6]],
+        ...     type=pa.large_list(pa.int32()),
+        ... )
+        >>> array.values
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          1,
+          2,
+          3,
+          4,
+          null,
+          6
+        ]
+
+        If an array is sliced, the slice still uses the same
+        underlying data as the original array, just with an
+        offset. Since values ignores the offset, the values are the
+        same:
+
+        >>> sliced = array.slice(1, 2)
+        >>> sliced
+        <pyarrow.lib.LargeListArray object at ...>
+        [
+          null,
+          [
+            3,
+            4,
+            null,
+            6
+          ]
+        ]
+        >>> sliced.values
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          1,
+          2,
+          3,
+          4,
+          null,
+          6
+        ]
+        """
+    @property
+    def offsets(self) -> Int64Array:
+        """
+        Return the list offsets as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int64Array
+        """
+
+class ListViewArray(BaseListArray[scalar.ListViewScalar[_DataTypeT]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array,
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListViewArray[_DataTypeT]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int32Array,
+        values: Array,
+        *,
+        type: _DataTypeT,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> ListViewArray[_DataTypeT]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct ListViewArray from arrays of int32 offsets, sizes, and values.
+
+        Parameters
+        ----------
+        offsets : Array (int32 type)
+        sizes : Array (int32 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_view_array : ListViewArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 1, 2])
+        >>> sizes = pa.array([2, 2, 2])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # use a null mask to represent null values
+        >>> mask = pa.array([False, True, False])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values, mask=mask)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # null values can be defined in either offsets or sizes arrays
+        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
+        >>> offsets = pa.array([0, None, 2])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+    @property
+    def values(self) -> Array:
+        """
+        Return the underlying array of values which backs the ListViewArray
+        ignoring the array's offset and sizes.
+
+        The values array may be out of order and/or contain additional values
+        that are not found in the logical representation of the array. The only
+        guarantee is that each non-null value in the ListView Array is contiguous.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's order and offset.
+
+        Returns
+        -------
+        values : Array
+
+        Examples
+        --------
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [],
+          [
+            2,
+            null,
+            3,
+            4
+          ]
+        ]
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          null,
+          3,
+          4
+        ]
+        """
+    @property
+    def offsets(self) -> Int32Array:
+        """
+        Return the list offsets as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListViewArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.offsets
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          0,
+          0,
+          1
+        ]
+        """
+    @property
+    def sizes(self) -> Int32Array:
+        """
+        Return the list sizes as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListViewArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        sizes : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.sizes
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          2,
+          0,
+          4
+        ]
+        """
+
+class LargeListViewArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListViewArray[_DataTypeT]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        values: Array,
+        *,
+        type: _DataTypeT,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> LargeListViewArray[_DataTypeT]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct LargeListViewArray from arrays of int64 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int64 type)
+        sizes : Array (int64 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_view_array : LargeListViewArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 1, 2])
+        >>> sizes = pa.array([2, 2, 2])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # use a null mask to represent null values
+        >>> mask = pa.array([False, True, False])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values, mask=mask)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # null values can be defined in either offsets or sizes arrays
+        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
+        >>> offsets = pa.array([0, None, 2])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+    @property
+    def values(self) -> Array:
+        """
+        Return the underlying array of values which backs the LargeListArray
+        ignoring the array's offset.
+
+        The values array may be out of order and/or contain additional values
+        that are not found in the logical representation of the array. The only
+        guarantee is that each non-null value in the ListView Array is contiguous.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's order and offset.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        LargeListArray.flatten : ...
+
+        Examples
+        --------
+
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [],
+          [
+            2,
+            null,
+            3,
+            4
+          ]
+        ]
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          null,
+          3,
+          4
+        ]
+        """
+    @property
+    def offsets(self) -> Int64Array:
+        """
+        Return the list view offsets as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListViewArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int64Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.offsets
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          0,
+          0,
+          1
+        ]
+        """
+    @property
+    def sizes(self) -> Int64Array:
+        """
+        Return the list view sizes as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListViewArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        sizes : Int64Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.sizes
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          0,
+          4
+        ]
+        """
+
+class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _Size]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        values: Array[Scalar[_DataTypeT]],
+        *,
+        type: None = None,
+        mask: Mask | None = None,
+    ) -> FixedSizeListArray[_DataTypeT, None]: ...
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        values: Array[Scalar[_DataTypeT]],
+        limit_size: _Size,
+        *,
+        type: None = None,
+        mask: Mask | None = None,
+    ) -> FixedSizeListArray[_DataTypeT, _Size]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct FixedSizeListArray from array of values and a list length.
+
+        Parameters
+        ----------
+        values : Array (any type)
+        list_size : int
+            The fixed length of the lists.
+        type : DataType, optional
+            If not specified, a default ListType with the values' type and
+            `list_size` length is used.
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+
+        Returns
+        -------
+        FixedSizeListArray
+
+        Examples
+        --------
+
+        Create from a values array and a list size:
+
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> arr = pa.FixedSizeListArray.from_arrays(values, 2)
+        >>> arr
+        <pyarrow.lib.FixedSizeListArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+
+        Or create from a values array, list size and matching type:
+
+        >>> typ = pa.list_(pa.field("values", pa.int64()), 2)
+        >>> arr = pa.FixedSizeListArray.from_arrays(values, type=typ)
+        >>> arr
+        <pyarrow.lib.FixedSizeListArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        """
+    @property
+    def values(self) -> BaseListArray[scalar.ListScalar[_DataTypeT]]:
+        """
+        Return the underlying array of values which backs the
+        FixedSizeListArray.
+
+        Note even null elements are included.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        sub-list values.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        FixedSizeListArray.flatten : ...
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> array = pa.array([[1, 2], None, [3, None]], type=pa.list_(pa.int32(), 2))
+        >>> array.values
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          1,
+          2,
+          null,
+          null,
+          3,
+          null
+        ]
+
+        """
+
+_MapKeyT = TypeVar("_MapKeyT", bound=_BasicDataType)
+_MapItemT = TypeVar("_MapItemT", bound=_BasicDataType)
+
+class MapArray(ListArray[scalar.MapScalar[_MapKeyT, _MapItemT]]):
+    @overload
+    @classmethod
+    def from_arrays(
+        cls,
+        offsets: Int64Array,
+        keys: Array[Scalar[_MapKeyT]],
+        items: Array[Scalar[_MapItemT]],
+        *,
+        type: None = None,
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> MapArray[_MapKeyT, _MapItemT]: ...
+    @overload
+    @classmethod
+    def from_arrays(  # pyright: ignore[reportIncompatibleMethodOverride]
+        cls,
+        offsets: Int64Array,
+        values: Array,
+        *,
+        type: MapType[_MapKeyT, _MapItemT],
+        pool: MemoryPool | None = None,
+        mask: Mask | None = None,
+    ) -> MapArray[_MapKeyT, _MapItemT]: ...
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):  # pyright: ignore[reportIncompatibleMethodOverride]
+        """
+        Construct MapArray from arrays of int32 offsets and key, item arrays.
+
+        Parameters
+        ----------
+        offsets : array-like or sequence (int32 type)
+        keys : array-like or sequence (any type)
+        items : array-like or sequence (any type)
+        type : DataType, optional
+            If not specified, a default MapArray with the keys' and items' type is used.
+        pool : MemoryPool
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        map_array : MapArray
+
+        Examples
+        --------
+        First, let's understand the structure of our dataset when viewed in a rectangular data model.
+        The total of 5 respondents answered the question "How much did you like the movie x?".
+        The value -1 in the integer array means that the value is missing. The boolean array
+        represents the null bitmask corresponding to the missing values in the integer array.
+
+        >>> import pyarrow as pa
+        >>> movies_rectangular = np.ma.masked_array(
+        ...     [[10, -1, -1], [8, 4, 5], [-1, 10, 3], [-1, -1, -1], [-1, -1, -1]],
+        ...     [
+        ...         [False, True, True],
+        ...         [False, False, False],
+        ...         [True, False, False],
+        ...         [True, True, True],
+        ...         [True, True, True],
+        ...     ],
+        ... )
+
+        To represent the same data with the MapArray and from_arrays, the data is
+        formed like this:
+
+        >>> offsets = [
+        ...     0,  #  -- row 1 start
+        ...     1,  #  -- row 2 start
+        ...     4,  #  -- row 3 start
+        ...     6,  #  -- row 4 start
+        ...     6,  #  -- row 5 start
+        ...     6,  #  -- row 5 end
+        ... ]
+        >>> movies = [
+        ...     "Dark Knight",  #  ---------------------------------- row 1
+        ...     "Dark Knight",
+        ...     "Meet the Parents",
+        ...     "Superman",  #  -- row 2
+        ...     "Meet the Parents",
+        ...     "Superman",  #  ----------------- row 3
+        ... ]
+        >>> likings = [
+        ...     10,  #  -------- row 1
+        ...     8,
+        ...     4,
+        ...     5,  #  --- row 2
+        ...     10,
+        ...     3,  #  ------ row 3
+        ... ]
+        >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
+        0                                  [(Dark Knight, 10)]
+        1    [(Dark Knight, 8), (Meet the Parents, 4), (Sup...
+        2              [(Meet the Parents, 10), (Superman, 3)]
+        3                                                   []
+        4                                                   []
+        dtype: object
+
+        If the data in the empty rows needs to be marked as missing, it's possible
+        to do so by modifying the offsets argument, so that we specify `None` as
+        the starting positions of the rows we want marked as missing. The end row
+        offset still has to refer to the existing value from keys (and values):
+
+        >>> offsets = [
+        ...     0,  #  ----- row 1 start
+        ...     1,  #  ----- row 2 start
+        ...     4,  #  ----- row 3 start
+        ...     None,  #  -- row 4 start
+        ...     None,  #  -- row 5 start
+        ...     6,  #  ----- row 5 end
+        ... ]
+        >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
+        0                                  [(Dark Knight, 10)]
+        1    [(Dark Knight, 8), (Meet the Parents, 4), (Sup...
+        2              [(Meet the Parents, 10), (Superman, 3)]
+        3                                                 None
+        4                                                 None
+        dtype: object
+        """
+    @property
+    def keys(self) -> Array:
+        """Flattened array of keys across all maps in array"""
+    @property
+    def items(self) -> Array:
+        """Flattened array of items across all maps in array"""
+
+class UnionArray(Array[scalar.UnionScalar]):
+    @deprecated("Use fields() instead")
+    def child(self, pos: int) -> Field:
+        """
+        DEPRECATED, use field() instead.
+
+        Parameters
+        ----------
+        pos : int
+            The physical index of the union child field (not its type code).
+
+        Returns
+        -------
+        field : pyarrow.Field
+            The given child field.
+        """
+    def field(self, pos: int) -> Array:
+        """
+        Return the given child field as an individual array.
+
+        For sparse unions, the returned array has its offset, length,
+        and null count adjusted.
+
+        For dense unions, the returned array is unchanged.
+
+        Parameters
+        ----------
+        pos : int
+            The physical index of the union child field (not its type code).
+
+        Returns
+        -------
+        field : Array
+            The given child field.
+        """
+    @property
+    def type_codes(self) -> Int8Array:
+        """Get the type codes array."""
+    @property
+    def offsets(self) -> Int32Array:
+        """
+        Get the value offsets array (dense arrays only).
+
+        Does not account for any slice offset.
+        """
+    @staticmethod
+    def from_dense(
+        type: Int8Array,
+        value_offsets: Int32Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | None = None,
+    ) -> UnionArray:
+        """
+        Construct dense UnionArray from arrays of int8 types, int32 offsets and
+        children arrays
+
+        Parameters
+        ----------
+        types : Array (int8 type)
+        value_offsets : Array (int32 type)
+        children : list
+        field_names : list
+        type_codes : list
+
+        Returns
+        -------
+        union_array : UnionArray
+        """
+    @staticmethod
+    def from_sparse(
+        types: Int8Array,
+        children: NullableCollection[Array],
+        field_names: list[str] | None = None,
+        type_codes: Int8Array | None = None,
+    ) -> UnionArray:
+        """
+        Construct sparse UnionArray from arrays of int8 types and children
+        arrays
+
+        Parameters
+        ----------
+        types : Array (int8 type)
+        children : list
+        field_names : list
+        type_codes : list
+
+        Returns
+        -------
+        union_array : UnionArray
+        """
+
+class StringArray(Array[scalar.StringScalar]):
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray:
+        """
+        Construct a StringArray from value_offsets and data buffers.
+        If there are nulls in the data, also a null_bitmap and the matching
+        null_count must be passed.
+
+        Parameters
+        ----------
+        length : int
+        value_offsets : Buffer
+        data : Buffer
+        null_bitmap : Buffer, optional
+        null_count : int, default 0
+        offset : int, default 0
+
+        Returns
+        -------
+        string_array : StringArray
+        """
+
+class LargeStringArray(Array[scalar.LargeStringScalar]):
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        length: int,
+        value_offsets: Buffer,
+        data: Buffer,
+        null_bitmap: Buffer | None = None,
+        null_count: int | None = -1,
+        offset: int | None = 0,
+    ) -> StringArray:
+        """
+        Construct a LargeStringArray from value_offsets and data buffers.
+        If there are nulls in the data, also a null_bitmap and the matching
+        null_count must be passed.
+
+        Parameters
+        ----------
+        length : int
+        value_offsets : Buffer
+        data : Buffer
+        null_bitmap : Buffer, optional
+        null_count : int, default 0
+        offset : int, default 0
+
+        Returns
+        -------
+        string_array : StringArray
+        """
+
+class StringViewArray(Array[scalar.StringViewScalar]): ...
+
+class BinaryArray(Array[scalar.BinaryScalar]):
+    @property
+    def total_values_length(self) -> int:
+        """
+        The number of bytes from beginning to end of the data buffer addressed
+        by the offsets of this BinaryArray.
+        """
+
+class LargeBinaryArray(Array[scalar.LargeBinaryScalar]):
+    @property
+    def total_values_length(self) -> int:
+        """
+        The number of bytes from beginning to end of the data buffer addressed
+        by the offsets of this LargeBinaryArray.
+        """
+
+class BinaryViewArray(Array[scalar.BinaryViewScalar]): ...
+
+class DictionaryArray(Array[scalar.DictionaryScalar[_IndexT, _BasicValueT]]):
+    def dictionary_encode(self) -> Self: ...  # type: ignore[override]
+    def dictionary_decode(self) -> Array[Scalar[_BasicValueT]]:
+        """
+        Decodes the DictionaryArray to an Array.
+        """
+    @property
+    def indices(self) -> Array[Scalar[_IndexT]]: ...
+    @property
+    def dictionary(self) -> Array[Scalar[_BasicValueT]]: ...
+    @staticmethod
+    def from_buffers(  # type: ignore[override]
+        type: _BasicValueT,
+        length: int,
+        buffers: list[Buffer],
+        dictionary: Array | np.ndarray | pd.Series,
+        null_count: int = -1,
+        offset: int = 0,
+    ) -> DictionaryArray[Any, _BasicValueT]:
+        """
+        Construct a DictionaryArray from buffers.
+
+        Parameters
+        ----------
+        type : pyarrow.DataType
+        length : int
+            The number of values in the array.
+        buffers : List[Buffer]
+            The buffers backing the indices array.
+        dictionary : pyarrow.Array, ndarray or pandas.Series
+            The array of values referenced by the indices.
+        null_count : int, default -1
+            The number of null entries in the indices array. Negative value means that
+            the null count is not known.
+        offset : int, default 0
+            The array's logical offset (in values, not in bytes) from the
+            start of each buffer.
+
+        Returns
+        -------
+        dict_array : DictionaryArray
+        """
+    @staticmethod
+    def from_arrays(
+        indices: Indices,
+        dictionary: Array | np.ndarray | pd.Series,
+        mask: np.ndarray | pd.Series | BooleanArray | None = None,
+        ordered: bool = False,
+        from_pandas: bool = False,
+        safe: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> DictionaryArray:
+        """
+        Construct a DictionaryArray from indices and values.
+
+        Parameters
+        ----------
+        indices : pyarrow.Array, numpy.ndarray or pandas.Series, int type
+            Non-negative integers referencing the dictionary values by zero
+            based index.
+        dictionary : pyarrow.Array, ndarray or pandas.Series
+            The array of values referenced by the indices.
+        mask : ndarray or pandas.Series, bool type
+            True values indicate that indices are actually null.
+        ordered : bool, default False
+            Set to True if the category values are ordered.
+        from_pandas : bool, default False
+            If True, the indices should be treated as though they originated in
+            a pandas.Categorical (null encoded as -1).
+        safe : bool, default True
+            If True, check that the dictionary indices are in range.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise uses default pool.
+
+        Returns
+        -------
+        dict_array : DictionaryArray
+        """
+
+class StructArray(Array[scalar.StructScalar]):
+    def field(self, index: int | str) -> Array:
+        """
+        Retrieves the child array belonging to field.
+
+        Parameters
+        ----------
+        index : Union[int, str]
+            Index / position or name of the field.
+
+        Returns
+        -------
+        result : Array
+        """
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]:
+        """
+        Return one individual array for each field in the struct.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool.
+
+        Returns
+        -------
+        result : List[Array]
+        """
+    @staticmethod
+    def from_arrays(
+        arrays: Iterable[Array],
+        names: list[str] | None = None,
+        fields: list[Field] | None = None,
+        mask=None,
+        memory_pool: MemoryPool | None = None,
+        type: types.StructType | None = None,
+    ) -> StructArray:
+        """
+        Construct StructArray from collection of arrays representing
+        each field in the struct.
+
+        Either field names, field instances or a struct type must be passed.
+
+        Parameters
+        ----------
+        arrays : sequence of Array
+        names : List[str] (optional)
+            Field names for each struct child.
+        fields : List[Field] (optional)
+            Field instances for each struct child.
+        mask : pyarrow.Array[bool] (optional)
+            Indicate which values are null (True) or not null (False).
+        memory_pool : MemoryPool (optional)
+            For memory allocations, if required, otherwise uses default pool.
+        type : pyarrow.StructType (optional)
+            Struct type for name and type of each child.
+
+        Returns
+        -------
+        result : StructArray
+        """
+    def sort(self, order: Order = "ascending", by: str | None = None, **kwargs) -> StructArray:
+        """
+        Sort the StructArray
+
+        Parameters
+        ----------
+        order : str, default "ascending"
+            Which order to sort values in.
+            Accepted values are "ascending", "descending".
+        by : str or None, default None
+            If to sort the array by one of its fields
+            or by the whole array.
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        result : StructArray
+        """
+
+class RunEndEncodedArray(Array[scalar.RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
+    @overload
+    @staticmethod
+    def from_arrays(
+        run_ends: Int16Array,
+        values: Array,
+        type: DataType | None = None,
+    ) -> RunEndEncodedArray[types.Int16Type, _BasicValueT]: ...
+    @overload
+    @staticmethod
+    def from_arrays(
+        run_ends: Int32Array,
+        values: Array,
+        type: DataType | None = None,
+    ) -> RunEndEncodedArray[types.Int32Type, _BasicValueT]: ...
+    @overload
+    @staticmethod
+    def from_arrays(
+        run_ends: Int64Array,
+        values: Array,
+        type: DataType | None = None,
+    ) -> RunEndEncodedArray[types.Int64Type, _BasicValueT]: ...
+    @staticmethod
+    def from_arrays(*args, **kwargs):
+        """
+        Construct RunEndEncodedArray from run_ends and values arrays.
+
+        Parameters
+        ----------
+        run_ends : Array (int16, int32, or int64 type)
+            The run_ends array.
+        values : Array (any type)
+            The values array.
+        type : pyarrow.DataType, optional
+            The run_end_encoded(run_end_type, value_type) array type.
+
+        Returns
+        -------
+        RunEndEncodedArray
+        """
+    @staticmethod
+    def from_buffers(  # pyright: ignore[reportIncompatibleMethodOverride]
+        type: DataType,
+        length: int,
+        buffers: list[Buffer],
+        null_count: int = -1,
+        offset=0,
+        children: tuple[Array, Array] | None = None,
+    ) -> RunEndEncodedArray[Any, _BasicValueT]:
+        """
+        Construct a RunEndEncodedArray from all the parameters that make up an
+        Array.
+
+        RunEndEncodedArrays do not have buffers, only children arrays, but this
+        implementation is needed to satisfy the Array interface.
+
+        Parameters
+        ----------
+        type : DataType
+            The run_end_encoded(run_end_type, value_type) type.
+        length : int
+            The logical length of the run-end encoded array. Expected to match
+            the last value of the run_ends array (children[0]) minus the offset.
+        buffers : List[Buffer]
+            Empty List or [None].
+        null_count : int, default -1
+            The number of null entries in the array. Run-end encoded arrays
+            are specified to not have valid bits and null_count always equals 0.
+        offset : int, default 0
+            The array's logical offset (in values, not in bytes) from the
+            start of each buffer.
+        children : List[Array]
+            Nested type children containing the run_ends and values arrays.
+
+        Returns
+        -------
+        RunEndEncodedArray
+        """
+    @property
+    def run_ends(self) -> Array[scalar.Scalar[_RunEndType]]:
+        """
+        An array holding the logical indexes of each run-end.
+
+        The physical offset to the array is applied.
+        """
+    @property
+    def values(self) -> Array[scalar.Scalar[_BasicValueT]]:
+        """
+        An array holding the values of each run.
+
+        The physical offset to the array is applied.
+        """
+    def find_physical_offset(self) -> int:
+        """
+        Find the physical offset of this REE array.
+
+        This is the offset of the run that contains the value of the first
+        logical element of this array considering its offset.
+
+        This function uses binary-search, so it has a O(log N) cost.
+        """
+    def find_physical_length(self) -> int:
+        """
+        Find the physical length of this REE array.
+
+        The physical length of an REE is the number of physical values (and
+        run-ends) necessary to represent the logical range of values from offset
+        to length.
+
+        This function uses binary-search, so it has a O(log N) cost.
+        """
+
+_ArrayT = TypeVar("_ArrayT", bound=Array)
+
+class ExtensionArray(Array[scalar.ExtensionScalar], Generic[_ArrayT]):
+    @property
+    def storage(self) -> Any: ...
+    @staticmethod
+    def from_storage(typ: types.BaseExtensionType, storage: _ArrayT) -> ExtensionArray[_ArrayT]:
+        """
+        Construct ExtensionArray from type and storage array.
+
+        Parameters
+        ----------
+        typ : DataType
+            The extension type for the result array.
+        storage : Array
+            The underlying storage for the result array.
+
+        Returns
+        -------
+        ext_array : ExtensionArray
+        """
+
+class JsonArray(ExtensionArray[_ArrayT]):
+    """
+    Concrete class for Arrow arrays of JSON data type.
+
+    This does not guarantee that the JSON data actually
+    is valid JSON.
+
+    Examples
+    --------
+    Define the extension type for JSON array
+
+    >>> import pyarrow as pa
+    >>> json_type = pa.json_(pa.large_utf8())
+
+    Create an extension array
+
+    >>> arr = [None, '{ "id":30, "values":["a", "b"] }']
+    >>> storage = pa.array(arr, pa.large_utf8())
+    >>> pa.ExtensionArray.from_storage(json_type, storage)
+    <pyarrow.lib.JsonArray object at ...>
+    [
+      null,
+      "{ "id":30, "values":["a", "b"] }"
+    ]
+    """
+
+class UuidArray(ExtensionArray[_ArrayT]): ...
+
+class FixedShapeTensorArray(ExtensionArray[_ArrayT]):
+    """
+    Concrete class for fixed shape tensor extension arrays.
+
+    Examples
+    --------
+    Define the extension type for tensor array
+
+    >>> import pyarrow as pa
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
+
+    Create an extension array
+
+    >>> arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
+    >>> storage = pa.array(arr, pa.list_(pa.int32(), 4))
+    >>> pa.ExtensionArray.from_storage(tensor_type, storage)
+    <pyarrow.lib.FixedShapeTensorArray object at ...>
+    [
+      [
+        1,
+        2,
+        3,
+        4
+      ],
+      [
+        10,
+        20,
+        30,
+        40
+      ],
+      [
+        100,
+        200,
+        300,
+        400
+      ]
+    ]
+    """
+
+    def to_numpy_ndarray(self) -> np.ndarray:
+        """
+        Convert fixed shape tensor extension array to a multi-dimensional numpy.ndarray.
+
+        The resulting ndarray will have (ndim + 1) dimensions.
+        The size of the first dimension will be the length of the fixed shape tensor array
+        and the rest of the dimensions will match the permuted shape of the fixed
+        shape tensor.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        numpy.ndarray
+            Ndarray representing tensors in the fixed shape tensor array concatenated
+            along the first dimension.
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert fixed shape tensor extension array to a pyarrow.Tensor.
+
+        The resulting Tensor will have (ndim + 1) dimensions.
+        The size of the first dimension will be the length of the fixed shape tensor array
+        and the rest of the dimensions will match the permuted shape of the fixed
+        shape tensor.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        pyarrow.Tensor
+            Tensor representing tensors in the fixed shape tensor array concatenated
+            along the first dimension.
+        """
+
+    @classmethod
+    def from_numpy_ndarray(cls, obj: np.ndarray) -> Self:
+        """
+        Convert numpy tensors (ndarrays) to a fixed shape tensor extension array.
+        The first dimension of ndarray will become the length of the fixed
+        shape tensor array.
+        If input array data is not contiguous a copy will be made.
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arr = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]], dtype=np.float32)
+        >>> pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+        <pyarrow.lib.FixedShapeTensorArray object at ...>
+        [
+          [
+            1,
+            2,
+            3,
+            4,
+            5,
+            6
+          ],
+          [
+            1,
+            2,
+            3,
+            4,
+            5,
+            6
+          ]
+        ]
+        """
+
+class OpaqueArray(ExtensionArray[_ArrayT]):
+    """
+    Concrete class for opaque extension arrays.
+
+    Examples
+    --------
+    Define the extension type for an opaque array
+
+    >>> import pyarrow as pa
+    >>> opaque_type = pa.opaque(
+    ...     pa.binary(),
+    ...     type_name="geometry",
+    ...     vendor_name="postgis",
+    ... )
+
+    Create an extension array
+
+    >>> arr = [None, b"data"]
+    >>> storage = pa.array(arr, pa.binary())
+    >>> pa.ExtensionArray.from_storage(opaque_type, storage)
+    <pyarrow.lib.OpaqueArray object at ...>
+    [
+      null,
+      64617461
+    ]
+    """
+
+class Bool8Array(ExtensionArray):
+    """
+    Concrete class for bool8 extension arrays.
+
+    Examples
+    --------
+    Define the extension type for an bool8 array
+
+    >>> import pyarrow as pa
+    >>> bool8_type = pa.bool8()
+
+    Create an extension array
+
+    >>> arr = [-1, 0, 1, 2, None]
+    >>> storage = pa.array(arr, pa.int8())
+    >>> pa.ExtensionArray.from_storage(bool8_type, storage)
+    <pyarrow.lib.Bool8Array object at ...>
+    [
+      -1,
+      0,
+      1,
+      2,
+      null
+    ]
+    """
+
+    def to_numpy(self, zero_copy_only: bool = ..., writable: bool = ...) -> np.ndarray:
+        """
+        Return a NumPy bool view or copy of this array.
+
+        By default, tries to return a view of this array. This is only
+        supported for arrays without any nulls.
+
+        Parameters
+        ----------
+        zero_copy_only : bool, default True
+            If True, an exception will be raised if the conversion to a numpy
+            array would require copying the underlying data (e.g. in presence
+            of nulls).
+        writable : bool, default False
+            For numpy arrays created with zero copy (view on the Arrow data),
+            the resulting array is not writable (Arrow data is immutable).
+            By setting this to True, a copy of the array is made to ensure
+            it is writable.
+
+        Returns
+        -------
+        array : numpy.ndarray
+        """
+    @classmethod
+    def from_storage(cls, storage: Int8Array) -> Self:  # type: ignore[override]
+        """
+        Construct Bool8Array from Int8Array storage.
+
+        Parameters
+        ----------
+        storage : Int8Array
+            The underlying storage for the result array.
+
+        Returns
+        -------
+        bool8_array : Bool8Array
+        """
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray) -> Self:
+        """
+        Convert numpy array to a bool8 extension array without making a copy.
+        The input array must be 1-dimensional, with either bool_ or int8 dtype.
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+
+        Returns
+        -------
+        bool8_array : Bool8Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arr = np.array([True, False, True], dtype=np.bool_)
+        >>> pa.Bool8Array.from_numpy(arr)
+        <pyarrow.lib.Bool8Array object at ...>
+        [
+          1,
+          0,
+          1
+        ]
+        """
+
+def concat_arrays(arrays: Iterable[_ArrayT], memory_pool: MemoryPool | None = None) -> _ArrayT:
+    """
+    Concatenate the given arrays.
+
+    The contents of the input arrays are copied into the returned array.
+
+    Raises
+    ------
+    ArrowInvalid
+        If not all of the arrays have the same type.
+
+    Parameters
+    ----------
+    arrays : iterable of pyarrow.Array
+        Arrays to concatenate, must be identically typed.
+    memory_pool : MemoryPool, default None
+        For memory allocations. If None, the default pool is used.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> arr1 = pa.array([2, 4, 5, 100])
+    >>> arr2 = pa.array([2, 4])
+    >>> pa.concat_arrays([arr1, arr2])
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      2,
+      4,
+      5,
+      100,
+      2,
+      4
+    ]
+
+    """
+
+def _empty_array(type: _DataTypeT) -> Array[scalar.Scalar[_DataTypeT]]:
+    """
+    Create empty array of the given type.
+    """
+
+__all__ = [
+    "array",
+    "asarray",
+    "nulls",
+    "repeat",
+    "infer_type",
+    "_PandasConvertible",
+    "Array",
+    "NullArray",
+    "BooleanArray",
+    "NumericArray",
+    "IntegerArray",
+    "FloatingPointArray",
+    "Int8Array",
+    "UInt8Array",
+    "Int16Array",
+    "UInt16Array",
+    "Int32Array",
+    "UInt32Array",
+    "Int64Array",
+    "UInt64Array",
+    "Date32Array",
+    "Date64Array",
+    "TimestampArray",
+    "Time32Array",
+    "Time64Array",
+    "DurationArray",
+    "MonthDayNanoIntervalArray",
+    "HalfFloatArray",
+    "FloatArray",
+    "DoubleArray",
+    "FixedSizeBinaryArray",
+    "Decimal32Array",
+    "Decimal64Array",
+    "Decimal128Array",
+    "Decimal256Array",
+    "BaseListArray",
+    "ListArray",
+    "LargeListArray",
+    "ListViewArray",
+    "LargeListViewArray",
+    "FixedSizeListArray",
+    "MapArray",
+    "UnionArray",
+    "StringArray",
+    "LargeStringArray",
+    "StringViewArray",
+    "BinaryArray",
+    "LargeBinaryArray",
+    "BinaryViewArray",
+    "DictionaryArray",
+    "StructArray",
+    "RunEndEncodedArray",
+    "ExtensionArray",
+    "Bool8Array",
+    "UuidArray",
+    "JsonArray",
+    "OpaqueArray",
+    "FixedShapeTensorArray",
+    "concat_arrays",
+    "_empty_array",
+]
diff --git a/python/pyarrow/__lib_pxi/benchmark.pyi b/python/pyarrow/__lib_pxi/benchmark.pyi
new file mode 100644
index 00000000000..66981bf0f51
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/benchmark.pyi
@@ -0,0 +1 @@
+def benchmark_PandasObjectIsNull(list) -> None: ...  # noqa: N802
diff --git a/python/pyarrow/__lib_pxi/builder.pyi b/python/pyarrow/__lib_pxi/builder.pyi
new file mode 100644
index 00000000000..4a0e9ca4708
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/builder.pyi
@@ -0,0 +1,89 @@
+from typing import Iterable
+
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .array import StringArray, StringViewArray
+
+class StringBuilder(_Weakrefable):
+    """
+    Builder class for UTF8 strings.
+
+    This class exposes facilities for incrementally adding string values and
+    building the null bitmap for a pyarrow.Array (type='string').
+    """
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | None):
+        """
+        Append a single value to the builder.
+
+        The value can either be a string/bytes object or a null value
+        (np.nan or None).
+
+        Parameters
+        ----------
+        value : string/bytes or np.nan/None
+            The value to append to the string array builder.
+        """
+    def append_values(self, values: Iterable[str | bytes | None]):
+        """
+        Append all the values from an iterable.
+
+        Parameters
+        ----------
+        values : iterable of string/bytes or np.nan/None values
+            The values to append to the string array builder.
+        """
+    def finish(self) -> StringArray:
+        """
+        Return result of builder as an Array object; also resets the builder.
+
+        Returns
+        -------
+        array : pyarrow.Array
+        """
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+class StringViewBuilder(_Weakrefable):
+    """
+    Builder class for UTF8 string views.
+
+    This class exposes facilities for incrementally adding string values and
+    building the null bitmap for a pyarrow.Array (type='string_view').
+    """
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | None):
+        """
+        Append a single value to the builder.
+
+        The value can either be a string/bytes object or a null value
+        (np.nan or None).
+
+        Parameters
+        ----------
+        value : string/bytes or np.nan/None
+            The value to append to the string array builder.
+        """
+    def append_values(self, values: Iterable[str | bytes | None]):
+        """
+        Append all the values from an iterable.
+
+        Parameters
+        ----------
+        values : iterable of string/bytes or np.nan/None values
+            The values to append to the string array builder.
+        """
+    def finish(self) -> StringViewArray:
+        """
+        Return result of builder as an Array object; also resets the builder.
+
+        Returns
+        -------
+        array : pyarrow.Array
+        """
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+__all__ = ["StringBuilder", "StringViewBuilder"]
diff --git a/python/pyarrow/__lib_pxi/compat.pyi b/python/pyarrow/__lib_pxi/compat.pyi
new file mode 100644
index 00000000000..ae667be453e
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/compat.pyi
@@ -0,0 +1,5 @@
+def encode_file_path(path: str | bytes) -> bytes: ...
+def tobytes(o: str | bytes) -> bytes: ...
+def frombytes(o: bytes, *, safe: bool = False): ...
+
+__all__ = ["encode_file_path", "tobytes", "frombytes"]
diff --git a/python/pyarrow/__lib_pxi/config.pyi b/python/pyarrow/__lib_pxi/config.pyi
new file mode 100644
index 00000000000..166e10c9734
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/config.pyi
@@ -0,0 +1,41 @@
+from typing import NamedTuple
+
+class VersionInfo(NamedTuple):
+    major: int
+    minor: int
+    patch: int
+
+class BuildInfo(NamedTuple):
+    version: str
+    version_info: VersionInfo
+    so_version: str
+    full_so_version: str
+    compiler_id: str
+    compiler_version: str
+    compiler_flags: str
+    git_id: str
+    git_description: str
+    package_kind: str
+    build_type: str
+
+class RuntimeInfo(NamedTuple):
+    simd_level: str
+    detected_simd_level: str
+
+cpp_build_info: BuildInfo
+cpp_version: str
+cpp_version_info: VersionInfo
+
+def runtime_info() -> RuntimeInfo: ...
+def set_timezone_db_path(path: str) -> None: ...
+
+__all__ = [
+    "VersionInfo",
+    "BuildInfo",
+    "RuntimeInfo",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "set_timezone_db_path",
+]
diff --git a/python/pyarrow/__lib_pxi/device.pyi b/python/pyarrow/__lib_pxi/device.pyi
new file mode 100644
index 00000000000..d1b9f39eedd
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/device.pyi
@@ -0,0 +1,88 @@
+import enum
+
+from pyarrow.lib import _Weakrefable
+
+class DeviceAllocationType(enum.Flag):
+    CPU = enum.auto()
+    CUDA = enum.auto()
+    CUDA_HOST = enum.auto()
+    OPENCL = enum.auto()
+    VULKAN = enum.auto()
+    METAL = enum.auto()
+    VPI = enum.auto()
+    ROCM = enum.auto()
+    ROCM_HOST = enum.auto()
+    EXT_DEV = enum.auto()
+    CUDA_MANAGED = enum.auto()
+    ONEAPI = enum.auto()
+    WEBGPU = enum.auto()
+    HEXAGON = enum.auto()
+
+class Device(_Weakrefable):
+    """
+    Abstract interface for hardware devices
+
+    This object represents a device with access to some memory spaces.
+    When handling a Buffer or raw memory address, it allows deciding in which
+    context the raw memory address should be interpreted
+    (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
+    """
+
+    @property
+    def type_name(self) -> str:
+        """
+        A shorthand for this device's type.
+        """
+    @property
+    def device_id(self) -> int:
+        """
+        A device ID to identify this device if there are multiple of this type.
+
+        If there is no "device_id" equivalent (such as for the main CPU device on
+        non-numa systems) returns -1.
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether this device is the main CPU device.
+
+        This shorthand method is very useful when deciding whether a memory address
+        is CPU-accessible.
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        Return the DeviceAllocationType of this device.
+        """
+
+class MemoryManager(_Weakrefable):
+    """
+    An object that provides memory management primitives.
+
+    A MemoryManager is always tied to a particular Device instance.
+    It can also have additional parameters (such as a MemoryPool to
+    allocate CPU memory).
+
+    """
+    @property
+    def device(self) -> Device:
+        """
+        The device this MemoryManager is tied to.
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether this MemoryManager is tied to the main CPU device.
+
+        This shorthand method is very useful when deciding whether a memory
+        address is CPU-accessible.
+        """
+
+def default_cpu_memory_manager() -> MemoryManager:
+    """
+    Return the default CPU MemoryManager instance.
+
+    The returned singleton instance uses the default MemoryPool.
+    """
+
+__all__ = ["DeviceAllocationType", "Device", "MemoryManager", "default_cpu_memory_manager"]
diff --git a/python/pyarrow/__lib_pxi/error.pyi b/python/pyarrow/__lib_pxi/error.pyi
new file mode 100644
index 00000000000..981ed51e680
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/error.pyi
@@ -0,0 +1,53 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+class ArrowException(Exception): ...
+class ArrowInvalid(ValueError, ArrowException): ...
+class ArrowMemoryError(MemoryError, ArrowException): ...
+class ArrowKeyError(KeyError, ArrowException): ...
+class ArrowTypeError(TypeError, ArrowException): ...
+class ArrowNotImplementedError(NotImplementedError, ArrowException): ...
+class ArrowCapacityError(ArrowException): ...
+class ArrowIndexError(IndexError, ArrowException): ...
+class ArrowSerializationError(ArrowException): ...
+
+class ArrowCancelled(ArrowException):
+    signum: int | None
+    def __init__(self, message: str, signum: int | None = None) -> None: ...
+
+ArrowIOError = IOError
+
+class StopToken: ...
+
+def enable_signal_handlers(enable: bool) -> None: ...
+
+have_signal_refcycle: bool
+
+class SignalStopHandler:
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, exc_tb) -> None: ...
+    def __dealloc__(self) -> None: ...
+    @property
+    def stop_token(self) -> StopToken: ...
+
+__all__ = [
+    "ArrowException",
+    "ArrowInvalid",
+    "ArrowMemoryError",
+    "ArrowKeyError",
+    "ArrowTypeError",
+    "ArrowNotImplementedError",
+    "ArrowCapacityError",
+    "ArrowIndexError",
+    "ArrowSerializationError",
+    "ArrowCancelled",
+    "ArrowIOError",
+    "StopToken",
+    "enable_signal_handlers",
+    "have_signal_refcycle",
+    "SignalStopHandler",
+]
diff --git a/python/pyarrow/__lib_pxi/io.pyi b/python/pyarrow/__lib_pxi/io.pyi
new file mode 100644
index 00000000000..d882fd79d57
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/io.pyi
@@ -0,0 +1,1474 @@
+import sys
+
+from collections.abc import Callable
+from io import IOBase
+
+from _typeshed import StrPath
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from typing import Any, Literal, SupportsIndex, overload
+
+from pyarrow._stubs_typing import Compression, SupportPyBuffer
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .device import Device, DeviceAllocationType, MemoryManager
+from .types import KeyValueMetadata
+
+def have_libhdfs() -> bool:
+    """
+    Return true if HDFS (HadoopFileSystem) library is set up correctly.
+    """
+
+def io_thread_count() -> int:
+    """
+    Return the number of threads to use for I/O operations.
+
+    Many operations, such as scanning a dataset, will implicitly make
+    use of this pool. The number of threads is set to a fixed value at
+    startup. It can be modified at runtime by calling
+    :func:`set_io_thread_count()`.
+
+    See Also
+    --------
+    set_io_thread_count : Modify the size of this pool.
+    cpu_count : The analogous function for the CPU thread pool.
+    """
+
+def set_io_thread_count(count: int) -> None:
+    """
+    Set the number of threads to use for I/O operations.
+
+    Many operations, such as scanning a dataset, will implicitly make
+    use of this pool.
+
+    Parameters
+    ----------
+    count : int
+        The max number of threads that may be used for I/O.
+        Must be positive.
+
+    See Also
+    --------
+    io_thread_count : Get the size of this pool.
+    set_cpu_count : The analogous function for the CPU thread pool.
+    """
+
+Mode: TypeAlias = Literal["rb", "wb", "rb+", "ab"]
+
+class NativeFile(_Weakrefable):
+    """
+    The base class for all Arrow streams.
+
+    Streams are either readable, writable, or both.
+    They optionally support seeking.
+
+    While this class exposes methods to read or write data from Python, the
+    primary intent of using a Arrow stream is to pass it to other Arrow
+    facilities that will make use of it, such as Arrow IPC routines.
+
+    Be aware that there are subtle differences with regular Python files,
+    e.g. destroying a writable Arrow stream without closing it explicitly
+    will not flush any pending data.
+    """
+
+    _default_chunk_size: int
+
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args) -> None: ...
+    @property
+    def mode(self) -> Mode:
+        """
+        The file mode. Currently instances of NativeFile may support:
+
+        * rb: binary read
+        * wb: binary write
+        * rb+: binary read and write
+        * ab: binary append
+        """
+    def readable(self) -> bool: ...
+    def seekable(self) -> bool: ...
+    def isatty(self) -> bool: ...
+    def fileno(self) -> int: ...
+    @property
+    def closed(self) -> bool: ...
+    def close(self) -> None: ...
+    def size(self) -> int:
+        """
+        Return file size
+        """
+    def metadata(self) -> KeyValueMetadata:
+        """
+        Return file metadata
+        """
+    def tell(self) -> int:
+        """
+        Return current stream position
+        """
+    def seek(self, position: int, whence: int = 0) -> int:
+        """
+        Change current file stream position
+
+        Parameters
+        ----------
+        position : int
+            Byte offset, interpreted relative to value of whence argument
+        whence : int, default 0
+            Point of reference for seek offset
+
+        Notes
+        -----
+        Values of whence:
+        * 0 -- start of stream (the default); offset should be zero or positive
+        * 1 -- current stream position; offset may be negative
+        * 2 -- end of stream; offset is usually negative
+
+        Returns
+        -------
+        int
+            The new absolute stream position.
+        """
+    def flush(self) -> None:
+        """
+        Flush the stream, if applicable.
+
+        An error is raised if stream is not writable.
+        """
+    def write(self, data: bytes | SupportPyBuffer) -> int:
+        """
+        Write data to the file.
+
+        Parameters
+        ----------
+        data : bytes-like object or exporter of buffer protocol
+
+        Returns
+        -------
+        int
+            nbytes: number of bytes written
+        """
+    def read(self, nbytes: int | None = None) -> bytes:
+        """
+        Read and return up to n bytes.
+
+        If *nbytes* is None, then the entire remaining file contents are read.
+
+        Parameters
+        ----------
+        nbytes : int, default None
+
+        Returns
+        -------
+        data : bytes
+        """
+    def get_stream(self, file_offset: int, nbytes: int) -> Self:
+        """
+        Return an input stream that reads a file segment independent of the
+        state of the file.
+
+        Allows reading portions of a random access file as an input stream
+        without interfering with each other.
+
+        Parameters
+        ----------
+        file_offset : int
+        nbytes : int
+
+        Returns
+        -------
+        stream : NativeFile
+        """
+    def read_at(self) -> bytes:
+        """
+        Read indicated number of bytes at offset from the file
+
+        Parameters
+        ----------
+        nbytes : int
+        offset : int
+
+        Returns
+        -------
+        data : bytes
+        """
+    def read1(self) -> bytes:
+        """Read and return up to n bytes.
+
+        Unlike read(), if *nbytes* is None then a chunk is read, not the
+        entire file.
+
+        Parameters
+        ----------
+        nbytes : int, default None
+            The maximum number of bytes to read.
+
+        Returns
+        -------
+        data : bytes
+        """
+    def readall(self) -> bytes: ...
+    def readinto(self, b: SupportPyBuffer) -> int:
+        """
+        Read into the supplied buffer
+
+        Parameters
+        ----------
+        b : buffer-like object
+            A writable buffer object (such as a bytearray).
+
+        Returns
+        -------
+        written : int
+            number of bytes written
+        """
+
+    def readline(self, size: int | None = None) -> bytes:
+        """Read and return a line of bytes from the file.
+
+        If size is specified, read at most size bytes.
+
+        Line terminator is always b"\\n".
+
+        Parameters
+        ----------
+        size : int
+            maximum number of bytes read
+        """
+    def readlines(self, hint: int | None = None) -> list[bytes]:
+        """Read lines of the file
+
+        Parameters
+        ----------
+        hint : int
+            maximum number of bytes read until we stop
+        """
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> bytes: ...
+    def read_buffer(self, nbytes: int | None = None) -> Buffer:
+        """
+        Read from buffer.
+
+        Parameters
+        ----------
+        nbytes : int, optional
+            maximum number of bytes read
+        """
+    def truncate(self) -> None: ...
+    def writelines(self, lines: list[bytes]):
+        """
+        Write lines to the file.
+
+        Parameters
+        ----------
+        lines : iterable
+            Iterable of bytes-like objects or exporters of buffer protocol
+        """
+    def download(self, stream_or_path: StrPath | IOBase, buffer_size: int | None = None) -> None:
+        """
+        Read this file completely to a local path or destination stream.
+
+        This method first seeks to the beginning of the file.
+
+        Parameters
+        ----------
+        stream_or_path : str or file-like object
+            If a string, a local file path to write to; otherwise,
+            should be a writable stream.
+        buffer_size : int, optional
+            The buffer size to use for data transfers.
+        """
+    def upload(self, stream: IOBase, buffer_size: int | None) -> None:
+        """
+        Write from a source stream to this file.
+
+        Parameters
+        ----------
+        stream : file-like object
+            Source stream to pipe to this file.
+        buffer_size : int, optional
+            The buffer size to use for data transfers.
+        """
+
+# ----------------------------------------------------------------------
+# Python file-like objects
+
+class PythonFile(NativeFile):
+    """
+    A stream backed by a Python file object.
+
+    This class allows using Python file objects with arbitrary Arrow
+    functions, including functions written in another language than Python.
+
+    As a downside, there is a non-zero redirection cost in translating
+    Arrow stream calls to Python method calls.  Furthermore, Python's
+    Global Interpreter Lock may limit parallelism in some situations.
+
+    Examples
+    --------
+    >>> import io
+    >>> import pyarrow as pa
+    >>> pa.PythonFile(io.BytesIO())
+    <pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
+
+    Create a stream for writing:
+
+    >>> buf = io.BytesIO()
+    >>> f = pa.PythonFile(buf, mode="w")
+    >>> f.writable()
+    True
+    >>> f.write(b"PythonFile")
+    10
+    >>> buf.getvalue()
+    b'PythonFile'
+    >>> f.close()
+    >>> f
+    <pyarrow.PythonFile closed=True own_file=False is_seekable=False is_writable=True is_readable=False>
+
+    Create a stream for reading:
+
+    >>> buf = io.BytesIO(b"PythonFile")
+    >>> f = pa.PythonFile(buf, mode="r")
+    >>> f.mode
+    'rb'
+    >>> f.read()
+    b'PythonFile'
+    >>> f
+    <pyarrow.PythonFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
+    >>> f.close()
+    >>> f
+    <pyarrow.PythonFile closed=True own_file=False is_seekable=True is_writable=False is_readable=True>
+    """
+    def __init__(self, handle: IOBase, mode: Literal["r", "w"] | None = None) -> None: ...
+    def truncate(self, pos: int | None = None) -> None:
+        """
+        Parameters
+        ----------
+        pos : int, optional
+        """
+
+class MemoryMappedFile(NativeFile):
+    """
+    A stream that represents a memory-mapped file.
+
+    Supports 'r', 'r+', 'w' modes.
+
+    Examples
+    --------
+    Create a new file with memory map:
+
+    >>> import pyarrow as pa
+    >>> mmap = pa.create_memory_map("example_mmap.dat", 10)
+    >>> mmap
+    <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=True is_readable=True>
+    >>> mmap.close()
+
+    Open an existing file with memory map:
+
+    >>> with pa.memory_map("example_mmap.dat") as mmap:
+    ...     mmap
+    <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
+    """
+    @classmethod
+    def create(cls, path: str, size: int) -> Self:
+        """
+        Create a MemoryMappedFile
+
+        Parameters
+        ----------
+        path : str
+            Where to create the file.
+        size : int
+            Size of the memory mapped file.
+        """
+    def _open(self, path: str, mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"): ...
+    def resize(self, new_size: int) -> None:
+        """
+        Resize the map and underlying file.
+
+        Parameters
+        ----------
+        new_size : new size in bytes
+        """
+
+def memory_map(
+    path: str, mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"
+) -> MemoryMappedFile:
+    """
+    Open memory map at file path. Size of the memory map cannot change.
+
+    Parameters
+    ----------
+    path : str
+    mode : {'r', 'r+', 'w'}, default 'r'
+        Whether the file is opened for reading ('r'), writing ('w')
+        or both ('r+').
+
+    Returns
+    -------
+    mmap : MemoryMappedFile
+
+    Examples
+    --------
+    Reading from a memory map without any memory allocation or copying:
+
+    >>> import pyarrow as pa
+    >>> with pa.output_stream("example_mmap.txt") as stream:
+    ...     stream.write(b"Constructing a buffer referencing the mapped memory")
+    51
+    >>> with pa.memory_map("example_mmap.txt") as mmap:
+    ...     mmap.read_at(6, 45)
+    b'memory'
+    """
+
+create_memory_map = MemoryMappedFile.create
+
+class OSFile(NativeFile):
+    """
+    A stream backed by a regular file descriptor.
+
+    Examples
+    --------
+    Create a new file to write to:
+
+    >>> import pyarrow as pa
+    >>> with pa.OSFile("example_osfile.arrow", mode="w") as f:
+    ...     f.writable()
+    ...     f.write(b"OSFile")
+    ...     f.seekable()
+    True
+    6
+    False
+
+    Open the file to read:
+
+    >>> with pa.OSFile("example_osfile.arrow", mode="r") as f:
+    ...     f.mode
+    ...     f.read()
+    'rb'
+    b'OSFile'
+
+    Open the file to append:
+
+    >>> with pa.OSFile("example_osfile.arrow", mode="ab") as f:
+    ...     f.mode
+    ...     f.write(b" is super!")
+    'ab'
+    10
+    >>> with pa.OSFile("example_osfile.arrow") as f:
+    ...     f.read()
+    b'OSFile is super!'
+
+    Inspect created OSFile:
+
+    >>> pa.OSFile("example_osfile.arrow")
+    <pyarrow.OSFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
+    """
+    def __init__(
+        self,
+        path: str,
+        mode: Literal["r", "rb", "w", "wb", "a", "ab"],
+        memory_pool: MemoryPool | None = None,
+    ) -> None: ...
+
+class FixedSizeBufferWriter(NativeFile):
+    """
+    A stream writing to a Arrow buffer.
+
+    Examples
+    --------
+    Create a stream to write to ``pyarrow.Buffer``:
+
+    >>> import pyarrow as pa
+    >>> buf = pa.allocate_buffer(5)
+    >>> with pa.output_stream(buf) as stream:
+    ...     stream.write(b"abcde")
+    ...     stream
+    5
+    <pyarrow.FixedSizeBufferWriter closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
+
+    Inspect the buffer:
+
+    >>> buf.to_pybytes()
+    b'abcde'
+    >>> buf
+    <pyarrow.Buffer address=... size=5 is_cpu=True is_mutable=True>
+    """
+    def __init__(self, buffer: Buffer) -> None: ...
+    def set_memcopy_threads(self, num_threads: int) -> None: ...
+    def set_memcopy_blocksize(self, blocksize: int) -> None: ...
+    def set_memcopy_threshold(self, threshold: int) -> None: ...
+
+# ----------------------------------------------------------------------
+# Arrow buffers
+
+class Buffer(_Weakrefable):
+    """
+    The base class for all Arrow buffers.
+
+    A buffer represents a contiguous memory area.  Many buffers will own
+    their memory, though not all of them do.
+    """
+    def __len__(self) -> int: ...
+    def _assert_cpu(self) -> None: ...
+    @property
+    def size(self) -> int:
+        """
+        The buffer size in bytes.
+        """
+    @property
+    def address(self) -> int:
+        """
+        The buffer's address, as an integer.
+
+        The returned address may point to CPU or device memory.
+        Use `is_cpu()` to disambiguate.
+        """
+    def hex(self) -> bytes:
+        """
+        Compute hexadecimal representation of the buffer.
+
+        Returns
+        -------
+        : bytes
+        """
+    @property
+    def is_mutable(self) -> bool:
+        """
+        Whether the buffer is mutable.
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether the buffer is CPU-accessible.
+        """
+    @property
+    def device(self) -> Device:
+        """
+        The device where the buffer resides.
+
+        Returns
+        -------
+        Device
+        """
+    @property
+    def memory_manager(self) -> MemoryManager:
+        """
+        The memory manager associated with the buffer.
+
+        Returns
+        -------
+        MemoryManager
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        The device type where the buffer resides.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
+    @property
+    def parent(self) -> Buffer | None: ...
+    @overload
+    def __getitem__(self, key: slice) -> Self: ...
+    @overload
+    def __getitem__(self, key: int) -> int: ...
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Slice this buffer.  Memory is not copied.
+
+        You can also use the Python slice notation ``buffer[start:stop]``.
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of buffer to slice.
+        length : int, default None
+            Length of slice (default is until end of Buffer starting from
+            offset).
+
+        Returns
+        -------
+        sliced : Buffer
+            A logical view over this buffer.
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Determine if two buffers contain exactly the same data.
+
+        Parameters
+        ----------
+        other : Buffer
+
+        Returns
+        -------
+        are_equal : bool
+            True if buffer contents and size are equal
+        """
+    def __reduce_ex__(self, protocol: SupportsIndex) -> str | tuple[Any, ...]: ...
+    def to_pybytes(self) -> bytes:
+        """
+        Return this buffer as a Python bytes object. Memory is copied.
+        """
+    def __buffer__(self, flags: int, /) -> memoryview: ...
+
+class ResizableBuffer(Buffer):
+    """
+    A base class for buffers that can be resized.
+    """
+
+    def resize(self, new_size: int, shrink_to_fit: bool = False) -> None:
+        """
+        Resize buffer to indicated size.
+
+        Parameters
+        ----------
+        new_size : int
+            New size of buffer (padding may be added internally).
+        shrink_to_fit : bool, default False
+            If this is true, the buffer is shrunk when new_size is less
+            than the current size.
+            If this is false, the buffer is never shrunk.
+        """
+
+@overload
+def allocate_buffer(size: int, memory_pool: MemoryPool | None = None) -> Buffer: ...
+@overload
+def allocate_buffer(
+    size: int, memory_pool: MemoryPool | None, resizable: Literal[False]
+) -> Buffer: ...
+@overload
+def allocate_buffer(
+    size: int, memory_pool: MemoryPool | None, resizable: Literal[True]
+) -> ResizableBuffer: ...
+def allocate_buffer(*args, **kwargs):
+    """
+    Allocate a mutable buffer.
+
+    Parameters
+    ----------
+    size : int
+        Number of bytes to allocate (plus internal padding)
+    memory_pool : MemoryPool, optional
+        The pool to allocate memory from.
+        If not given, the default memory pool is used.
+    resizable : bool, default False
+        If true, the returned buffer is resizable.
+
+    Returns
+    -------
+    buffer : Buffer or ResizableBuffer
+    """
+
+# ----------------------------------------------------------------------
+# Arrow Stream
+class BufferOutputStream(NativeFile):
+    """
+    An output stream that writes to a resizable buffer.
+
+    The buffer is produced as a result when ``getvalue()`` is called.
+
+    Examples
+    --------
+    Create an output stream, write data to it and finalize it with
+    ``getvalue()``:
+
+    >>> import pyarrow as pa
+    >>> f = pa.BufferOutputStream()
+    >>> f.write(b"pyarrow.Buffer")
+    14
+    >>> f.closed
+    False
+    >>> f.getvalue()
+    <pyarrow.Buffer address=... size=14 is_cpu=True is_mutable=True>
+    >>> f.closed
+    True
+    """
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def getvalue(self) -> Buffer:
+        """
+        Finalize output stream and return result as pyarrow.Buffer.
+
+        Returns
+        -------
+        value : Buffer
+        """
+
+class MockOutputStream(NativeFile): ...
+
+class BufferReader(NativeFile):
+    """
+    Zero-copy reader from objects convertible to Arrow buffer.
+
+    Parameters
+    ----------
+    obj : Python bytes or pyarrow.Buffer
+
+    Examples
+    --------
+    Create an Arrow input stream and inspect it:
+
+    >>> import pyarrow as pa
+    >>> data = b"reader data"
+    >>> buf = memoryview(data)
+    >>> with pa.input_stream(buf) as stream:
+    ...     stream.size()
+    ...     stream.read(6)
+    ...     stream.seek(7)
+    ...     stream.read(15)
+    11
+    b'reader'
+    7
+    b'data'
+    """
+    def __init__(self, obj) -> None: ...
+
+class CompressedInputStream(NativeFile):
+    """
+    An input stream wrapper which decompresses data on the fly.
+
+    Parameters
+    ----------
+    stream : string, path, pyarrow.NativeFile, or file-like object
+        Input stream object to wrap with the compression.
+    compression : str
+        The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
+
+    Examples
+    --------
+    Create an output stream which compresses the data:
+
+    >>> import pyarrow as pa
+    >>> data = b"Compressed stream"
+    >>> raw = pa.BufferOutputStream()
+    >>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
+    ...     compressed.write(data)
+    17
+
+    Create an input stream with decompression referencing the
+    buffer with compressed data:
+
+    >>> cdata = raw.getvalue()
+    >>> with pa.input_stream(cdata, compression="gzip") as compressed:
+    ...     compressed.read()
+    b'Compressed stream'
+
+    which actually translates to the use of ``BufferReader``and
+    ``CompressedInputStream``:
+
+    >>> raw = pa.BufferReader(cdata)
+    >>> with pa.CompressedInputStream(raw, "gzip") as compressed:
+    ...     compressed.read()
+    b'Compressed stream'
+    """
+
+    def __init__(
+        self,
+        stream: StrPath | NativeFile | IOBase,
+        compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
+    ) -> None: ...
+
+class CompressedOutputStream(NativeFile):
+    """
+    An output stream wrapper which compresses data on the fly.
+
+    Parameters
+    ----------
+    stream : string, path, pyarrow.NativeFile, or file-like object
+        Input stream object to wrap with the compression.
+    compression : str
+        The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
+
+    Examples
+    --------
+    Create an output stream which compresses the data:
+
+    >>> import pyarrow as pa
+    >>> data = b"Compressed stream"
+    >>> raw = pa.BufferOutputStream()
+    >>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
+    ...     compressed.write(data)
+    17
+    """
+    def __init__(
+        self,
+        stream: StrPath | NativeFile | IOBase,
+        compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
+    ) -> None: ...
+
+class BufferedInputStream(NativeFile):
+    """
+    An input stream that performs buffered reads from
+    an unbuffered input stream, which can mitigate the overhead
+    of many small reads in some cases.
+
+    Parameters
+    ----------
+    stream : NativeFile
+        The input stream to wrap with the buffer
+    buffer_size : int
+        Size of the temporary read buffer.
+    memory_pool : MemoryPool
+        The memory pool used to allocate the buffer.
+    """
+    def __init__(
+        self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
+    ) -> None: ...
+    def detach(self) -> NativeFile:
+        """
+        Release the raw InputStream.
+        Further operations on this stream are invalid.
+
+        Returns
+        -------
+        raw : NativeFile
+            The underlying raw input stream
+        """
+
+class BufferedOutputStream(NativeFile):
+    """
+    An output stream that performs buffered reads from
+    an unbuffered output stream, which can mitigate the overhead
+    of many small writes in some cases.
+
+    Parameters
+    ----------
+    stream : NativeFile
+        The writable output stream to wrap with the buffer
+    buffer_size : int
+        Size of the buffer that should be added.
+    memory_pool : MemoryPool
+        The memory pool used to allocate the buffer.
+    """
+    def __init__(
+        self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
+    ) -> None: ...
+    def detach(self) -> NativeFile:
+        """
+        Flush any buffered writes and release the raw OutputStream.
+        Further operations on this stream are invalid.
+
+        Returns
+        -------
+        raw : NativeFile
+            The underlying raw output stream.
+        """
+
+class TransformInputStream(NativeFile):
+    """
+    Transform an input stream.
+
+    Parameters
+    ----------
+    stream : NativeFile
+        The stream to transform.
+    transform_func : callable
+        The transformation to apply.
+    """
+    def __init__(self, stream: NativeFile, transform_func: Callable[[Buffer], Any]) -> None: ...
+
+class Transcoder:
+    def __init__(self, decoder, encoder) -> None: ...
+    def __call__(self, buf: Buffer): ...
+
+def transcoding_input_stream(
+    stream: NativeFile, src_encoding: str, dest_encoding: str
+) -> TransformInputStream:
+    """
+    Add a transcoding transformation to the stream.
+    Incoming data will be decoded according to ``src_encoding`` and
+    then re-encoded according to ``dest_encoding``.
+
+    Parameters
+    ----------
+    stream : NativeFile
+        The stream to which the transformation should be applied.
+    src_encoding : str
+        The codec to use when reading data.
+    dest_encoding : str
+        The codec to use for emitted data.
+    """
+
+def py_buffer(obj: SupportPyBuffer) -> Buffer:
+    """
+    Construct an Arrow buffer from a Python bytes-like or buffer-like object
+
+    Parameters
+    ----------
+    obj : object
+        the object from which the buffer should be constructed.
+    """
+
+def foreign_buffer(address: int, size: int, base: Any | None = None) -> Buffer:
+    """
+    Construct an Arrow buffer with the given *address* and *size*.
+
+    The buffer will be optionally backed by the Python *base* object, if given.
+    The *base* object will be kept alive as long as this buffer is alive,
+    including across language boundaries (for example if the buffer is
+    referenced by C++ code).
+
+    Parameters
+    ----------
+    address : int
+        The starting address of the buffer. The address can
+        refer to both device or host memory but it must be
+        accessible from device after mapping it with
+        `get_device_address` method.
+    size : int
+        The size of device buffer in bytes.
+    base : {None, object}
+        Object that owns the referenced memory.
+    """
+
+def as_buffer(o: Buffer | SupportPyBuffer) -> Buffer: ...
+
+# ---------------------------------------------------------------------
+
+class CacheOptions(_Weakrefable):
+    """
+    Cache options for a pre-buffered fragment scan.
+
+    Parameters
+    ----------
+    hole_size_limit : int, default 8KiB
+        The maximum distance in bytes between two consecutive ranges; beyond
+        this value, ranges are not combined.
+    range_size_limit : int, default 32MiB
+        The maximum size in bytes of a combined range; if combining two
+        consecutive ranges would produce a range of a size greater than this,
+        they are not combined
+    lazy : bool, default True
+        lazy = false: request all byte ranges when PreBuffer or WillNeed is called.
+        lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader
+        needs them.
+        lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the
+        range that is currently being read.
+    prefetch_limit : int, default 0
+        The maximum number of ranges to be prefetched. This is only used for
+        lazy cache to asynchronously read some ranges after reading the target
+        range.
+    """
+
+    hole_size_limit: int
+    range_size_limit: int
+    lazy: bool
+    prefetch_limit: int
+    def __init__(
+        self,
+        *,
+        hole_size_limit: int | None = None,
+        range_size_limit: int | None = None,
+        lazy: bool = True,
+        prefetch_limit: int = 0,
+    ) -> None: ...
+    @classmethod
+    def from_network_metrics(
+        cls,
+        time_to_first_byte_millis: int,
+        transfer_bandwidth_mib_per_sec: int,
+        ideal_bandwidth_utilization_frac: float = 0.9,
+        max_ideal_request_size_mib: int = 64,
+    ) -> Self:
+        """
+        Create suitable CacheOptions based on provided network metrics.
+
+        Typically this will be used with object storage solutions like Amazon S3,
+        Google Cloud Storage and Azure Blob Storage.
+
+        Parameters
+        ----------
+        time_to_first_byte_millis : int
+            Seek-time or Time-To-First-Byte (TTFB) in milliseconds, also called call
+            setup latency of a new read request. The value is a positive integer.
+        transfer_bandwidth_mib_per_sec : int
+            Data transfer Bandwidth (BW) in MiB/sec (per connection). The value is a positive
+            integer.
+        ideal_bandwidth_utilization_frac : int, default 0.9
+            Transfer bandwidth utilization fraction (per connection) to maximize the net
+            data load. The value is a positive float less than 1.
+        max_ideal_request_size_mib : int, default 64
+            The maximum single data request size (in MiB) to maximize the net data load.
+
+        Returns
+        -------
+        CacheOptions
+        """
+
+class Codec(_Weakrefable):
+    """
+    Compression codec.
+
+    Parameters
+    ----------
+    compression : str
+        Type of compression codec to initialize, valid values are: 'gzip',
+        'bz2', 'brotli', 'lz4' (or 'lz4_frame'), 'lz4_raw', 'zstd' and
+        'snappy'.
+    compression_level : int, None
+        Optional parameter specifying how aggressively to compress.  The
+        possible ranges and effect of this parameter depend on the specific
+        codec chosen.  Higher values compress more but typically use more
+        resources (CPU/RAM).  Some codecs support negative values.
+
+        gzip
+            The compression_level maps to the memlevel parameter of
+            deflateInit2.  Higher levels use more RAM but are faster
+            and should have higher compression ratios.
+
+        bz2
+            The compression level maps to the blockSize100k parameter of
+            the BZ2_bzCompressInit function.  Higher levels use more RAM
+            but are faster and should have higher compression ratios.
+
+        brotli
+            The compression level maps to the BROTLI_PARAM_QUALITY
+            parameter.  Higher values are slower and should have higher
+            compression ratios.
+
+        lz4/lz4_frame/lz4_raw
+            The compression level parameter is not supported and must
+            be None
+
+        zstd
+            The compression level maps to the compressionLevel parameter
+            of ZSTD_initCStream.  Negative values are supported.  Higher
+            values are slower and should have higher compression ratios.
+
+        snappy
+            The compression level parameter is not supported and must
+            be None
+
+
+    Raises
+    ------
+    ValueError
+        If invalid compression value is passed.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.Codec.is_available("gzip")
+    True
+    >>> codec = pa.Codec("gzip")
+    >>> codec.name
+    'gzip'
+    >>> codec.compression_level
+    9
+    """
+    def __init__(self, compression: Compression, compression_level: int | None = None) -> None: ...
+    @classmethod
+    def detect(cls, path: StrPath) -> Self:
+        """
+        Detect and instantiate compression codec based on file extension.
+
+        Parameters
+        ----------
+        path : str, path-like
+            File-path to detect compression from.
+
+        Raises
+        ------
+        TypeError
+            If the passed value is not path-like.
+        ValueError
+            If the compression can't be detected from the path.
+
+        Returns
+        -------
+        Codec
+        """
+    @staticmethod
+    def is_available(compression: Compression) -> bool:
+        """
+        Returns whether the compression support has been built and enabled.
+
+        Parameters
+        ----------
+        compression : str
+             Type of compression codec,
+             refer to Codec docstring for a list of supported ones.
+
+        Returns
+        -------
+        bool
+        """
+    @staticmethod
+    def supports_compression_level(compression: Compression) -> int:
+        """
+        Returns true if the compression level parameter is supported
+        for the given codec.
+
+        Parameters
+        ----------
+        compression : str
+            Type of compression codec,
+            refer to Codec docstring for a list of supported ones.
+        """
+    @staticmethod
+    def default_compression_level(compression: Compression) -> int:
+        """
+        Returns the compression level that Arrow will use for the codec if
+        None is specified.
+
+        Parameters
+        ----------
+        compression : str
+            Type of compression codec,
+            refer to Codec docstring for a list of supported ones.
+        """
+    @staticmethod
+    def minimum_compression_level(compression: Compression) -> int:
+        """
+        Returns the smallest valid value for the compression level
+
+        Parameters
+        ----------
+        compression : str
+            Type of compression codec,
+            refer to Codec docstring for a list of supported ones.
+        """
+    @staticmethod
+    def maximum_compression_level(compression: Compression) -> int:
+        """
+        Returns the largest valid value for the compression level
+
+        Parameters
+        ----------
+        compression : str
+            Type of compression codec,
+            refer to Codec docstring for a list of supported ones.
+        """
+    @property
+    def name(self) -> Compression:
+        """Returns the name of the codec"""
+    @property
+    def compression_level(self) -> int:
+        """Returns the compression level parameter of the codec"""
+    @overload
+    def compress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer: ...
+    @overload
+    def compress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        *,
+        asbytes: Literal[False],
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer: ...
+    @overload
+    def compress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        *,
+        asbytes: Literal[True],
+        memory_pool: MemoryPool | None = None,
+    ) -> bytes: ...
+    def compress(self, *args, **kwargs):
+        """
+        Compress data from buffer-like object.
+
+        Parameters
+        ----------
+        buf : pyarrow.Buffer, bytes, or other object supporting buffer protocol
+        asbytes : bool, default False
+            Return result as Python bytes object, otherwise Buffer
+        memory_pool : MemoryPool, default None
+            Memory pool to use for buffer allocations, if any
+
+        Returns
+        -------
+        compressed : pyarrow.Buffer or bytes (if asbytes=True)
+        """
+    @overload
+    def decompress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        decompressed_size: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer: ...
+    @overload
+    def decompress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        decompressed_size: int | None = None,
+        *,
+        asbytes: Literal[False],
+        memory_pool: MemoryPool | None = None,
+    ) -> Buffer: ...
+    @overload
+    def decompress(
+        self,
+        buf: Buffer | bytes | SupportPyBuffer,
+        decompressed_size: int | None = None,
+        *,
+        asbytes: Literal[True],
+        memory_pool: MemoryPool | None = None,
+    ) -> bytes: ...
+    def decompress(self, *args, **kwargs):
+        """
+        Decompress data from buffer-like object.
+
+        Parameters
+        ----------
+        buf : pyarrow.Buffer, bytes, or memoryview-compatible object
+        decompressed_size : int, default None
+            Size of the decompressed result
+        asbytes : boolean, default False
+            Return result as Python bytes object, otherwise Buffer
+        memory_pool : MemoryPool, default None
+            Memory pool to use for buffer allocations, if any.
+
+        Returns
+        -------
+        uncompressed : pyarrow.Buffer or bytes (if asbytes=True)
+        """
+
+@overload
+def compress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    codec: Compression = "lz4",
+    *,
+    memory_pool: MemoryPool | None = None,
+) -> Buffer: ...
+@overload
+def compress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[False],
+    memory_pool: MemoryPool | None = None,
+) -> Buffer: ...
+@overload
+def compress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[True],
+    memory_pool: MemoryPool | None = None,
+) -> bytes: ...
+def compress(*args, **kwargs):
+    """
+    Compress data from buffer-like object.
+
+    Parameters
+    ----------
+    buf : pyarrow.Buffer, bytes, or other object supporting buffer protocol
+    codec : str, default 'lz4'
+        Compression codec.
+        Supported types: {'brotli, 'gzip', 'lz4', 'lz4_raw', 'snappy', 'zstd'}
+    asbytes : bool, default False
+        Return result as Python bytes object, otherwise Buffer.
+    memory_pool : MemoryPool, default None
+        Memory pool to use for buffer allocations, if any.
+
+    Returns
+    -------
+    compressed : pyarrow.Buffer or bytes (if asbytes=True)
+    """
+
+@overload
+def decompress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    decompressed_size: int | None = None,
+    codec: Compression = "lz4",
+    *,
+    memory_pool: MemoryPool | None = None,
+) -> Buffer: ...
+@overload
+def decompress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    decompressed_size: int | None = None,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[False],
+    memory_pool: MemoryPool | None = None,
+) -> Buffer: ...
+@overload
+def decompress(
+    buf: Buffer | bytes | SupportPyBuffer,
+    decompressed_size: int | None = None,
+    codec: Compression = "lz4",
+    *,
+    asbytes: Literal[True],
+    memory_pool: MemoryPool | None = None,
+) -> bytes: ...
+def decompress(*args, **kwargs):
+    """
+    Decompress data from buffer-like object.
+
+    Parameters
+    ----------
+    buf : pyarrow.Buffer, bytes, or memoryview-compatible object
+        Input object to decompress data from.
+    decompressed_size : int, default None
+        Size of the decompressed result
+    codec : str, default 'lz4'
+        Compression codec.
+        Supported types: {'brotli, 'gzip', 'lz4', 'lz4_raw', 'snappy', 'zstd'}
+    asbytes : bool, default False
+        Return result as Python bytes object, otherwise Buffer.
+    memory_pool : MemoryPool, default None
+        Memory pool to use for buffer allocations, if any.
+
+    Returns
+    -------
+    uncompressed : pyarrow.Buffer or bytes (if asbytes=True)
+    """
+
+def input_stream(
+    source: StrPath | Buffer | IOBase,
+    compression: Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] = "detect",
+    buffer_size: int | None = None,
+) -> BufferReader:
+    """
+    Create an Arrow input stream.
+
+    Parameters
+    ----------
+    source : str, Path, buffer, or file-like object
+        The source to open for reading.
+    compression : str optional, default 'detect'
+        The compression algorithm to use for on-the-fly decompression.
+        If "detect" and source is a file path, then compression will be
+        chosen based on the file extension.
+        If None, no compression will be applied.
+        Otherwise, a well-known algorithm name must be supplied (e.g. "gzip").
+    buffer_size : int, default None
+        If None or 0, no buffering will happen. Otherwise the size of the
+        temporary read buffer.
+
+    Examples
+    --------
+    Create a readable BufferReader (NativeFile) from a Buffer or a memoryview object:
+
+    >>> import pyarrow as pa
+    >>> buf = memoryview(b"some data")
+    >>> with pa.input_stream(buf) as stream:
+    ...     stream.read(4)
+    b'some'
+
+    Create a readable OSFile (NativeFile) from a string or file path:
+
+    >>> import gzip
+    >>> with gzip.open("example.gz", "wb") as f:
+    ...     f.write(b"some data")
+    9
+    >>> with pa.input_stream("example.gz") as stream:
+    ...     stream.read()
+    b'some data'
+
+    Create a readable PythonFile (NativeFile) from a a Python file object:
+
+    >>> with open("example.txt", mode="w") as f:
+    ...     f.write("some text")
+    9
+    >>> with pa.input_stream("example.txt") as stream:
+    ...     stream.read(6)
+    b'some t'
+    """
+
+def output_stream(
+    source: StrPath | Buffer | IOBase,
+    compression: Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] = "detect",
+    buffer_size: int | None = None,
+) -> NativeFile:
+    """
+    Create an Arrow output stream.
+
+    Parameters
+    ----------
+    source : str, Path, buffer, file-like object
+        The source to open for writing.
+    compression : str optional, default 'detect'
+        The compression algorithm to use for on-the-fly compression.
+        If "detect" and source is a file path, then compression will be
+        chosen based on the file extension.
+        If None, no compression will be applied.
+        Otherwise, a well-known algorithm name must be supplied (e.g. "gzip").
+    buffer_size : int, default None
+        If None or 0, no buffering will happen. Otherwise the size of the
+        temporary write buffer.
+
+    Examples
+    --------
+    Create a writable NativeFile from a pyarrow Buffer:
+
+    >>> import pyarrow as pa
+    >>> data = b"buffer data"
+    >>> empty_obj = bytearray(11)
+    >>> buf = pa.py_buffer(empty_obj)
+    >>> with pa.output_stream(buf) as stream:
+    ...     stream.write(data)
+    11
+    >>> with pa.input_stream(buf) as stream:
+    ...     stream.read(6)
+    b'buffer'
+
+    or from a memoryview object:
+
+    >>> buf = memoryview(empty_obj)
+    >>> with pa.output_stream(buf) as stream:
+    ...     stream.write(data)
+    11
+    >>> with pa.input_stream(buf) as stream:
+    ...     stream.read()
+    b'buffer data'
+
+    Create a writable NativeFile from a string or file path:
+
+    >>> with pa.output_stream("example_second.txt") as stream:
+    ...     stream.write(b"Write some data")
+    15
+    >>> with pa.input_stream("example_second.txt") as stream:
+    ...     stream.read()
+    b'Write some data'
+    """
+
+__all__ = [
+    "have_libhdfs",
+    "io_thread_count",
+    "set_io_thread_count",
+    "NativeFile",
+    "PythonFile",
+    "MemoryMappedFile",
+    "memory_map",
+    "create_memory_map",
+    "OSFile",
+    "FixedSizeBufferWriter",
+    "Buffer",
+    "ResizableBuffer",
+    "allocate_buffer",
+    "BufferOutputStream",
+    "MockOutputStream",
+    "BufferReader",
+    "CompressedInputStream",
+    "CompressedOutputStream",
+    "BufferedInputStream",
+    "BufferedOutputStream",
+    "TransformInputStream",
+    "Transcoder",
+    "transcoding_input_stream",
+    "py_buffer",
+    "foreign_buffer",
+    "as_buffer",
+    "CacheOptions",
+    "Codec",
+    "compress",
+    "decompress",
+    "input_stream",
+    "output_stream",
+]
diff --git a/python/pyarrow/__lib_pxi/ipc.pyi b/python/pyarrow/__lib_pxi/ipc.pyi
new file mode 100644
index 00000000000..3d72892061e
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/ipc.pyi
@@ -0,0 +1,705 @@
+import enum
+import sys
+
+from io import IOBase
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Iterable, Iterator, Literal, Mapping, NamedTuple
+
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportArrowStream, SupportPyBuffer
+from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable
+
+from .io import Buffer, Codec, NativeFile
+from .types import DictionaryMemo, KeyValueMetadata
+
+class MetadataVersion(enum.IntEnum):
+    V1 = enum.auto()
+    V2 = enum.auto()
+    V3 = enum.auto()
+    V4 = enum.auto()
+    V5 = enum.auto()
+
+class WriteStats(NamedTuple):
+    """IPC write statistics
+
+    Parameters
+    ----------
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
+    """
+
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+class ReadStats(NamedTuple):
+    """IPC read statistics
+
+    Parameters
+    ----------
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
+    """
+
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+class IpcReadOptions(_Weakrefable):
+    """
+    Serialization options for reading IPC format.
+
+    Parameters
+    ----------
+    ensure_native_endian : bool, default True
+        Whether to convert incoming data to platform-native endianness.
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like decompression
+    included_fields : list
+        If empty (the default), return all deserialized fields.
+        If non-empty, the values are the indices of fields to read on
+        the top-level schema
+    """
+
+    ensure_native_endian: bool
+    use_threads: bool
+    included_fields: list[int]
+    def __init__(
+        self,
+        *,
+        ensure_native_endian: bool = True,
+        use_threads: bool = True,
+        included_fields: list[int] | None = None,
+    ) -> None: ...
+
+class IpcWriteOptions(_Weakrefable):
+    """
+    Serialization options for the IPC format.
+
+    Parameters
+    ----------
+    metadata_version : MetadataVersion, default MetadataVersion.V5
+        The metadata version to write.  V5 is the current and latest,
+        V4 is the pre-1.0 metadata version (with incompatible Union layout).
+    allow_64bit : bool, default False
+        If true, allow field lengths that don't fit in a signed 32-bit int.
+    use_legacy_format : bool, default False
+        Whether to use the pre-Arrow 0.15 IPC format.
+    compression : str, Codec, or None
+        compression codec to use for record batch buffers.
+        If None then batch buffers will be uncompressed.
+        Must be "lz4", "zstd" or None.
+        To specify a compression_level use `pyarrow.Codec`
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like compression.
+    emit_dictionary_deltas : bool
+        Whether to emit dictionary deltas.  Default is false for maximum
+        stream compatibility.
+    unify_dictionaries : bool
+        If true then calls to write_table will attempt to unify dictionaries
+        across all batches in the table.  This can help avoid the need for
+        replacement dictionaries (which the file format does not support)
+        but requires computing the unified dictionary and then remapping
+        the indices arrays.
+
+        This parameter is ignored when writing to the IPC stream format as
+        the IPC stream format can support replacement dictionaries.
+    """
+
+    metadata_version: MetadataVersion
+    allow_64bit: bool
+    use_legacy_format: bool
+    compression: Codec | Literal["lz4", "zstd"] | None
+    use_threads: bool
+    emit_dictionary_deltas: bool
+    unify_dictionaries: bool
+    def __init__(
+        self,
+        *,
+        metadata_version: MetadataVersion = MetadataVersion.V5,
+        allow_64bit: bool = False,
+        use_legacy_format: bool = False,
+        compression: Codec | Literal["lz4", "zstd"] | None = None,
+        use_threads: bool = True,
+        emit_dictionary_deltas: bool = False,
+        unify_dictionaries: bool = False,
+    ) -> None: ...
+
+class Message(_Weakrefable):
+    """
+    Container for an Arrow IPC message with metadata and optional body
+    """
+
+    @property
+    def type(self) -> str: ...
+    @property
+    def metadata(self) -> Buffer: ...
+    @property
+    def metadata_version(self) -> MetadataVersion: ...
+    @property
+    def body(self) -> Buffer | None: ...
+    def equals(self, other: Message) -> bool: ...
+    def serialize_to(
+        self, sink: NativeFile, alignment: int = 8, memory_pool: MemoryPool | None = None
+    ):
+        """
+        Write message to generic OutputStream
+
+        Parameters
+        ----------
+        sink : NativeFile
+        alignment : int, default 8
+            Byte alignment for metadata and body
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+        """
+    def serialize(self, alignment: int = 8, memory_pool: MemoryPool | None = None) -> Buffer:
+        """
+        Write message as encapsulated IPC message
+
+        Parameters
+        ----------
+        alignment : int, default 8
+            Byte alignment for metadata and body
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+
+        Returns
+        -------
+        serialized : Buffer
+        """
+
+class MessageReader(_Weakrefable):
+    """
+    Interface for reading Message objects from some source (like an
+    InputStream)
+    """
+    @classmethod
+    def open_stream(cls, source: bytes | NativeFile | IOBase | SupportPyBuffer) -> Self:
+        """
+        Open stream from source, if you want to use memory map use
+        MemoryMappedFile as source.
+
+        Parameters
+        ----------
+        source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+            A readable source, like an InputStream
+        """
+    def __iter__(self) -> Self: ...
+    def read_next_message(self) -> Message:
+        """
+        Read next Message from the stream.
+
+        Raises
+        ------
+        StopIteration
+            At end of stream
+        """
+    __next__ = read_next_message
+
+# ----------------------------------------------------------------------
+# File and stream readers and writers
+
+class _CRecordBatchWriter(_Weakrefable):
+    """The base RecordBatchWriter wrapper.
+
+    Provides common implementations of convenience methods. Should not
+    be instantiated directly by user code.
+    """
+    def write(self, table_or_batch: Table | RecordBatch):
+        """
+        Write RecordBatch or Table to stream.
+
+        Parameters
+        ----------
+        table_or_batch : {RecordBatch, Table}
+        """
+    def write_batch(
+        self,
+        batch: RecordBatch,
+        custom_metadata: Mapping[bytes, bytes] | KeyValueMetadata | None = None,
+    ):
+        """
+        Write RecordBatch to stream.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+        custom_metadata : mapping or KeyValueMetadata
+            Keys and values must be string-like / coercible to bytes
+        """
+    def write_table(self, table: Table, max_chunksize: int | None = None) -> None:
+        """
+        Write Table to stream in (contiguous) RecordBatch objects.
+
+        Parameters
+        ----------
+        table : Table
+        max_chunksize : int, default None
+            Maximum number of rows for RecordBatch chunks. Individual chunks may
+            be smaller depending on the chunk layout of individual columns.
+        """
+    def close(self) -> None:
+        """
+        Close stream and write end-of-stream 0 marker.
+        """
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def stats(self) -> WriteStats:
+        """
+        Current IPC write statistics.
+        """
+
+class _RecordBatchStreamWriter(_CRecordBatchWriter):
+    def __dealloc__(self) -> None: ...
+    def _open(self, sink, schema: Schema, options: IpcWriteOptions = IpcWriteOptions()): ...
+
+class _ReadPandasMixin:
+    def read_pandas(self, **options) -> pd.DataFrame:
+        """
+        Read contents of stream to a pandas.DataFrame.
+
+        Read all record batches as a pyarrow.Table then convert it to a
+        pandas.DataFrame using Table.to_pandas.
+
+        Parameters
+        ----------
+        **options
+            Arguments to forward to :meth:`Table.to_pandas`.
+
+        Returns
+        -------
+        df : pandas.DataFrame
+        """
+
+class RecordBatchReader(_Weakrefable):
+    """Base class for reading stream of record batches.
+
+    Record batch readers function as iterators of record batches that also
+    provide the schema (without the need to get any batches).
+
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the
+    ``RecordBatchReader.from_*`` functions instead.
+
+    Notes
+    -----
+    To import and export using the Arrow C stream interface, use the
+    ``_import_from_c`` and ``_export_to_c`` methods. However, keep in mind this
+    interface is intended for expert users.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> schema = pa.schema([("x", pa.int64())])
+    >>> def iter_record_batches():
+    ...     for i in range(2):
+    ...         yield pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], schema=schema)
+    >>> reader = pa.RecordBatchReader.from_batches(schema, iter_record_batches())
+    >>> print(reader.schema)
+    x: int64
+    >>> for batch in reader:
+    ...     print(batch)
+    pyarrow.RecordBatch
+    x: int64
+    ----
+    x: [1,2,3]
+    pyarrow.RecordBatch
+    x: int64
+    ----
+    x: [1,2,3]
+    """
+
+    def __iter__(self) -> Self: ...
+    def read_next_batch(self) -> RecordBatch:
+        """
+        Read next RecordBatch from the stream.
+
+        Raises
+        ------
+        StopIteration:
+            At end of stream.
+
+        Returns
+        -------
+        RecordBatch
+        """
+    __next__ = read_next_batch
+    @property
+    def schema(self) -> Schema:
+        """
+        Shared schema of the record batches in the stream.
+
+        Returns
+        -------
+        Schema
+        """
+    def read_next_batch_with_custom_metadata(self) -> RecordBatchWithMetadata:
+        """
+        Read next RecordBatch from the stream along with its custom metadata.
+
+        Raises
+        ------
+        StopIteration:
+            At end of stream.
+
+        Returns
+        -------
+        batch : RecordBatch
+        custom_metadata : KeyValueMetadata
+        """
+    def iter_batches_with_custom_metadata(
+        self,
+    ) -> Iterator[RecordBatchWithMetadata]:
+        """
+        Iterate over record batches from the stream along with their custom
+        metadata.
+
+        Yields
+        ------
+        RecordBatchWithMetadata
+        """
+    def read_all(self) -> Table:
+        """
+        Read all record batches as a pyarrow.Table.
+
+        Returns
+        -------
+        Table
+        """
+    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    def close(self) -> None:
+        """
+        Release any resources associated with the reader.
+        """
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    def cast(self, target_schema: Schema) -> Self:
+        """
+        Wrap this reader with one that casts each batch lazily as it is pulled.
+        Currently only a safe cast to target_schema is implemented.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+    def _export_to_c(self, out_ptr: int) -> None:
+        """
+        Export to a C ArrowArrayStream struct, given its pointer.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArrayStream struct.
+
+        Be careful: if you don't pass the ArrowArrayStream struct to a
+        consumer, array memory will leak.  This is a low-level function
+        intended for expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self:
+        """
+        Import RecordBatchReader from a C ArrowArrayStream struct,
+        given its pointer.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArrayStream struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export to a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+            A capsule containing a C ArrowArrayStream struct.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self:
+        """
+        Import RecordBatchReader from a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        stream: PyCapsule
+            A capsule containing a C ArrowArrayStream PyCapsule.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+    @classmethod
+    def from_stream(cls, data: SupportArrowStream, schema: Schema | None = None) -> Self:
+        """
+        Create RecordBatchReader from a Arrow-compatible stream object.
+
+        This accepts objects implementing the Arrow PyCapsule Protocol for
+        streams, i.e. objects that have a ``__arrow_c_stream__`` method.
+
+        Parameters
+        ----------
+        data : Arrow-compatible stream object
+            Any object that implements the Arrow PyCapsule Protocol for
+            streams.
+        schema : Schema, default None
+            The schema to which the stream should be casted, if supported
+            by the stream object.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+    @classmethod
+    def from_batches(cls, schema: Schema, batches: Iterable[RecordBatch]) -> Self:
+        """
+        Create RecordBatchReader from an iterable of batches.
+
+        Parameters
+        ----------
+        schema : Schema
+            The shared schema of the record batches
+        batches : Iterable[RecordBatch]
+            The batches that this reader will return.
+
+        Returns
+        -------
+        reader : RecordBatchReader
+        """
+
+class _RecordBatchStreamReader(RecordBatchReader):
+    @property
+    def stats(self) -> ReadStats:
+        """
+        Current IPC read statistics.
+        """
+
+class _RecordBatchFileWriter(_RecordBatchStreamWriter): ...
+
+class RecordBatchWithMetadata(NamedTuple):
+    """RecordBatch with its custom metadata
+
+    Parameters
+    ----------
+    batch : RecordBatch
+    custom_metadata : KeyValueMetadata
+    """
+
+    batch: RecordBatch
+    custom_metadata: KeyValueMetadata
+
+class _RecordBatchFileReader(_Weakrefable):
+    @property
+    def num_record_batches(self) -> int:
+        """
+        The number of record batches in the IPC file.
+        """
+    def get_batch(self, i: int) -> RecordBatch:
+        """
+        Read the record batch with the given index.
+
+        Parameters
+        ----------
+        i : int
+            The index of the record batch in the IPC file.
+
+        Returns
+        -------
+        batch : RecordBatch
+        """
+    get_record_batch = get_batch
+    def get_batch_with_custom_metadata(self, i: int) -> RecordBatchWithMetadata:
+        """
+        Read the record batch with the given index along with
+        its custom metadata
+
+        Parameters
+        ----------
+        i : int
+            The index of the record batch in the IPC file.
+
+        Returns
+        -------
+        batch : RecordBatch
+        custom_metadata : KeyValueMetadata
+        """
+    def read_all(self) -> Table:
+        """
+        Read all record batches as a pyarrow.Table
+        """
+    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def stats(self) -> ReadStats: ...
+
+def get_tensor_size(tensor: Tensor) -> int:
+    """
+    Return total size of serialized Tensor including metadata and padding.
+
+    Parameters
+    ----------
+    tensor : Tensor
+        The tensor for which we want to known the size.
+    """
+
+def get_record_batch_size(batch: RecordBatch) -> int:
+    """
+    Return total size of serialized RecordBatch including metadata and padding.
+
+    Parameters
+    ----------
+    batch : RecordBatch
+        The recordbatch for which we want to know the size.
+    """
+
+def write_tensor(tensor: Tensor, dest: NativeFile) -> int:
+    """
+    Write pyarrow.Tensor to pyarrow.NativeFile object its current position.
+
+    Parameters
+    ----------
+    tensor : pyarrow.Tensor
+    dest : pyarrow.NativeFile
+
+    Returns
+    -------
+    bytes_written : int
+        Total number of bytes written to the file
+    """
+
+def read_tensor(source: NativeFile) -> Tensor:
+    """Read pyarrow.Tensor from pyarrow.NativeFile object from current
+    position. If the file source supports zero copy (e.g. a memory map), then
+    this operation does not allocate any memory. This function not assume that
+    the stream is aligned
+
+    Parameters
+    ----------
+    source : pyarrow.NativeFile
+
+    Returns
+    -------
+    tensor : Tensor
+
+    """
+
+def read_message(source: NativeFile | IOBase | SupportPyBuffer) -> Message:
+    """
+    Read length-prefixed message from file or buffer-like object
+
+    Parameters
+    ----------
+    source : pyarrow.NativeFile, file-like object, or buffer-like object
+
+    Returns
+    -------
+    message : Message
+    """
+
+def read_schema(obj: Buffer | Message, dictionary_memo: DictionaryMemo | None = None) -> Schema:
+    """
+    Read Schema from message or buffer
+
+    Parameters
+    ----------
+    obj : buffer or Message
+    dictionary_memo : DictionaryMemo, optional
+        Needed to be able to reconstruct dictionary-encoded fields
+        with read_record_batch
+
+    Returns
+    -------
+    schema : Schema
+    """
+
+def read_record_batch(
+    obj: Message | SupportPyBuffer, schema: Schema, dictionary_memo: DictionaryMemo | None = None
+) -> RecordBatch:
+    """
+    Read RecordBatch from message, given a known schema. If reading data from a
+    complete IPC stream, use ipc.open_stream instead
+
+    Parameters
+    ----------
+    obj : Message or Buffer-like
+    schema : Schema
+    dictionary_memo : DictionaryMemo, optional
+        If message contains dictionaries, must pass a populated
+        DictionaryMemo
+
+    Returns
+    -------
+    batch : RecordBatch
+    """
+
+__all__ = [
+    "MetadataVersion",
+    "WriteStats",
+    "ReadStats",
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "_CRecordBatchWriter",
+    "_RecordBatchStreamWriter",
+    "_ReadPandasMixin",
+    "RecordBatchReader",
+    "_RecordBatchStreamReader",
+    "_RecordBatchFileWriter",
+    "RecordBatchWithMetadata",
+    "_RecordBatchFileReader",
+    "get_tensor_size",
+    "get_record_batch_size",
+    "write_tensor",
+    "read_tensor",
+    "read_message",
+    "read_schema",
+    "read_record_batch",
+]
diff --git a/python/pyarrow/__lib_pxi/memory.pyi b/python/pyarrow/__lib_pxi/memory.pyi
new file mode 100644
index 00000000000..57a3bb4f1b3
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/memory.pyi
@@ -0,0 +1,174 @@
+from pyarrow.lib import _Weakrefable
+
+class MemoryPool(_Weakrefable):
+    """
+    Base class for memory allocation.
+
+    Besides tracking its number of allocated bytes, a memory pool also
+    takes care of the required 64-byte alignment for Arrow data.
+    """
+
+    def release_unused(self) -> None:
+        """
+        Attempt to return to the OS any memory being held onto by the pool.
+
+        This function should not be called except potentially for
+        benchmarking or debugging as it could be expensive and detrimental to
+        performance.
+
+        This is best effort and may not have any effect on some memory pools
+        or in some situations (e.g. fragmentation).
+        """
+    def bytes_allocated(self) -> int:
+        """
+        Return the number of bytes that are currently allocated from this
+        memory pool.
+        """
+    def total_bytes_allocated(self) -> int:
+        """
+        Return the total number of bytes that have been allocated from this
+        memory pool.
+        """
+    def max_memory(self) -> int | None:
+        """
+        Return the peak memory allocation in this memory pool.
+        This can be an approximate number in multi-threaded applications.
+
+        None is returned if the pool implementation doesn't know how to
+        compute this number.
+        """
+    def num_allocations(self) -> int:
+        """
+        Return the number of allocations or reallocations that were made
+        using this memory pool.
+        """
+    def print_stats(self) -> None:
+        """
+        Print statistics about this memory pool.
+
+        The output format is implementation-specific. Not all memory pools
+        implement this method.
+        """
+    @property
+    def backend_name(self) -> str:
+        """
+        The name of the backend used by this MemoryPool (e.g. "jemalloc").
+        """
+
+class LoggingMemoryPool(MemoryPool): ...
+class ProxyMemoryPool(MemoryPool): ...
+
+def default_memory_pool() -> MemoryPool:
+    """
+    Return the process-global memory pool.
+
+    Examples
+    --------
+    >>> default_memory_pool()
+    <pyarrow.MemoryPool backend_name=... bytes_allocated=0 max_memory=...>
+    """
+
+def proxy_memory_pool(parent: MemoryPool) -> ProxyMemoryPool:
+    """
+    Create and return a MemoryPool instance that redirects to the
+    *parent*, but with separate allocation statistics.
+
+    Parameters
+    ----------
+    parent : MemoryPool
+        The real memory pool that should be used for allocations.
+    """
+
+def logging_memory_pool(parent: MemoryPool) -> LoggingMemoryPool:
+    """
+    Create and return a MemoryPool instance that redirects to the
+    *parent*, but also dumps allocation logs on stderr.
+
+    Parameters
+    ----------
+    parent : MemoryPool
+        The real memory pool that should be used for allocations.
+    """
+
+def system_memory_pool() -> MemoryPool:
+    """
+    Return a memory pool based on the C malloc heap.
+    """
+
+def jemalloc_memory_pool() -> MemoryPool:
+    """
+    Return a memory pool based on the jemalloc heap.
+
+    NotImplementedError is raised if jemalloc support is not enabled.
+    """
+
+def mimalloc_memory_pool() -> MemoryPool:
+    """
+    Return a memory pool based on the mimalloc heap.
+
+    NotImplementedError is raised if mimalloc support is not enabled.
+    """
+
+def set_memory_pool(pool: MemoryPool) -> None:
+    """
+    Set the default memory pool.
+
+    Parameters
+    ----------
+    pool : MemoryPool
+        The memory pool that should be used by default.
+    """
+
+def log_memory_allocations(enable: bool = True) -> None:
+    """
+    Enable or disable memory allocator logging for debugging purposes
+
+    Parameters
+    ----------
+    enable : bool, default True
+        Pass False to disable logging
+    """
+
+def total_allocated_bytes() -> int:
+    """
+    Return the currently allocated bytes from the default memory pool.
+    Other memory pools may not be accounted for.
+    """
+
+def jemalloc_set_decay_ms(decay_ms: int) -> None:
+    """
+    Set arenas.dirty_decay_ms and arenas.muzzy_decay_ms to indicated number of
+    milliseconds. A value of 0 (the default) results in dirty / muzzy memory
+    pages being released right away to the OS, while a higher value will result
+    in a time-based decay. See the jemalloc docs for more information
+
+    It's best to set this at the start of your application.
+
+    Parameters
+    ----------
+    decay_ms : int
+        Number of milliseconds to set for jemalloc decay conf parameters. Note
+        that this change will only affect future memory arenas
+    """
+
+def supported_memory_backends() -> list[str]:
+    """
+    Return a list of available memory pool backends
+    """
+
+__all__ = [
+    "MemoryPool",
+    "LoggingMemoryPool",
+    "ProxyMemoryPool",
+    "default_memory_pool",
+    "proxy_memory_pool",
+    "logging_memory_pool",
+    "system_memory_pool",
+    "jemalloc_memory_pool",
+    "mimalloc_memory_pool",
+    "set_memory_pool",
+    "log_memory_allocations",
+    "total_allocated_bytes",
+    "jemalloc_set_decay_ms",
+    "supported_memory_backends",
+]
diff --git a/python/pyarrow/__lib_pxi/pandas_shim.pyi b/python/pyarrow/__lib_pxi/pandas_shim.pyi
new file mode 100644
index 00000000000..0e80fae4ebf
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/pandas_shim.pyi
@@ -0,0 +1,51 @@
+from types import ModuleType
+from typing import Any, Iterable, TypeGuard
+
+import pandas as pd
+
+from numpy import dtype
+from pandas.core.dtypes.base import ExtensionDtype
+
+class _PandasAPIShim:
+    has_sparse: bool
+
+    def series(self, *args, **kwargs) -> pd.Series: ...
+    def data_frame(self, *args, **kwargs) -> pd.DataFrame: ...
+    @property
+    def have_pandas(self) -> bool: ...
+    @property
+    def compat(self) -> ModuleType: ...
+    @property
+    def pd(self) -> ModuleType: ...
+    def infer_dtype(self, obj: Iterable) -> str: ...
+    def pandas_dtype(self, dtype: str) -> dtype: ...
+    @property
+    def loose_version(self) -> Any: ...
+    @property
+    def version(self) -> str: ...
+    def is_v1(self) -> bool: ...
+    def is_ge_v21(self) -> bool: ...
+    def is_ge_v23(self) -> bool: ...
+    def is_ge_v3(self) -> bool: ...
+    @property
+    def categorical_type(self) -> type[pd.Categorical]: ...
+    @property
+    def datetimetz_type(self) -> type[pd.DatetimeTZDtype]: ...
+    @property
+    def extension_dtype(self) -> type[ExtensionDtype]: ...
+    def is_array_like(
+        self, obj: Any
+    ) -> TypeGuard[pd.Series | pd.Index | pd.Categorical | ExtensionDtype]: ...
+    def is_categorical(self, obj: Any) -> TypeGuard[pd.Categorical]: ...
+    def is_datetimetz(self, obj: Any) -> TypeGuard[pd.DatetimeTZDtype]: ...
+    def is_extension_array_dtype(self, obj: Any) -> TypeGuard[ExtensionDtype]: ...
+    def is_sparse(self, obj: Any) -> bool: ...
+    def is_data_frame(self, obj: Any) -> TypeGuard[pd.DataFrame]: ...
+    def is_series(self, obj: Any) -> TypeGuard[pd.Series]: ...
+    def is_index(self, obj: Any) -> TypeGuard[pd.Index]: ...
+    def get_values(self, obj: Any) -> bool: ...
+    def get_rangeindex_attribute(self, level, name): ...
+
+_pandas_api: _PandasAPIShim
+
+__all__ = ["_PandasAPIShim", "_pandas_api"]
diff --git a/python/pyarrow/__lib_pxi/scalar.pyi b/python/pyarrow/__lib_pxi/scalar.pyi
new file mode 100644
index 00000000000..81ab5012067
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/scalar.pyi
@@ -0,0 +1,1017 @@
+import collections.abc
+import datetime as dt
+import sys
+
+from decimal import Decimal
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import Any, Generic, Iterator, Literal, Mapping, overload
+
+import numpy as np
+
+from pyarrow._compute import CastOptions
+from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable
+from typing_extensions import Protocol, TypeVar
+
+from . import types
+from .types import (
+    _AsPyType,
+    _DataTypeT,
+    _Time32Unit,
+    _Time64Unit,
+    _Tz,
+    _Unit,
+)
+
+_AsPyTypeK = TypeVar("_AsPyTypeK")
+_AsPyTypeV = TypeVar("_AsPyTypeV")
+_DataType_co = TypeVar("_DataType_co", bound=types.DataType, covariant=True)
+
+class Scalar(_Weakrefable, Generic[_DataType_co]):
+    """
+    The base class for scalars.
+    """
+    @property
+    def type(self) -> _DataType_co:
+        """
+        Data type of the Scalar object.
+        """
+    @property
+    def is_valid(self) -> bool:
+        """
+        Holds a valid (non-null) value.
+        """
+    @overload
+    def cast(
+        self,
+        target_type: None,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Self: ...
+    @overload
+    def cast(
+        self,
+        target_type: _DataTypeT,
+        safe: bool = True,
+        options: CastOptions | None = None,
+        memory_pool: MemoryPool | None = None,
+    ) -> Scalar[_DataTypeT]: ...
+    def cast(self, *args, **kwargs):
+        """
+        Cast scalar value to another data type.
+
+        See :func:`pyarrow.compute.cast` for usage.
+
+        Parameters
+        ----------
+        target_type : DataType, default None
+            Type to cast scalar to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+        memory_pool : MemoryPool, optional
+            memory pool to use for allocations during function execution.
+
+        Returns
+        -------
+        scalar : A Scalar of the given target data type.
+        """
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    def equals(self, other: Scalar) -> bool: ...
+    def __hash__(self) -> int: ...
+    @overload
+    def as_py(
+        self: Scalar[types._BasicDataType[_AsPyType]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> _AsPyType: ...
+    @overload
+    def as_py(
+        self: Scalar[types.ListType[types._BasicDataType[_AsPyType]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType]: ...
+    @overload
+    def as_py(
+        self: Scalar[
+            types.ListType[
+                types.DictionaryType[types._IndexT, types._BasicDataType[_AsPyTypeV], Any]
+            ]
+        ],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[dict[int, _AsPyTypeV]]: ...
+    @overload
+    def as_py(
+        self: Scalar[
+            types.ListType[types.DictionaryType[Any, types._BasicDataType[_AsPyTypeV], Any]],
+        ],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[dict[Any, _AsPyTypeV]]: ...
+    @overload
+    def as_py(
+        self: Scalar[types.ListType[types.DictionaryType[types._IndexT, Any, Any]],],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[dict[int, Any]]: ...
+    @overload
+    def as_py(
+        self: Scalar[types.StructType],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[dict[str, Any]]: ...
+    @overload
+    def as_py(
+        self: Scalar[
+            types.MapType[types._BasicDataType[_AsPyTypeK], types._BasicDataType[_AsPyTypeV]]
+        ],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[tuple[_AsPyTypeK, _AsPyTypeV]]: ...
+    @overload
+    def as_py(
+        self: Scalar[types.MapType[Any, types._BasicDataType[_AsPyTypeV]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[tuple[Any, _AsPyTypeV]]: ...
+    @overload
+    def as_py(
+        self: Scalar[types.MapType[types._BasicDataType[_AsPyTypeK], Any]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[tuple[_AsPyTypeK, Any]]: ...
+    @overload
+    def as_py(
+        self: Scalar[Any],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> Any: ...
+    def as_py(self, *args, **kwargs):
+        """
+        Return this value as a Python representation.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+        """
+
+_NULL: TypeAlias = None
+NA = _NULL
+
+class NullScalar(Scalar[types.NullType]): ...
+class BooleanScalar(Scalar[types.BoolType]): ...
+class UInt8Scalar(Scalar[types.UInt8Type]): ...
+class Int8Scalar(Scalar[types.Int8Type]): ...
+class UInt16Scalar(Scalar[types.UInt16Type]): ...
+class Int16Scalar(Scalar[types.Int16Type]): ...
+class UInt32Scalar(Scalar[types.Uint32Type]): ...
+class Int32Scalar(Scalar[types.Int32Type]): ...
+class UInt64Scalar(Scalar[types.UInt64Type]): ...
+class Int64Scalar(Scalar[types.Int64Type]): ...
+class HalfFloatScalar(Scalar[types.Float16Type]): ...
+class FloatScalar(Scalar[types.Float32Type]): ...
+class DoubleScalar(Scalar[types.Float64Type]): ...
+class Decimal32Scalar(Scalar[types.Decimal32Type[types._Precision, types._Scale]]): ...
+class Decimal64Scalar(Scalar[types.Decimal64Type[types._Precision, types._Scale]]): ...
+class Decimal128Scalar(Scalar[types.Decimal128Type[types._Precision, types._Scale]]): ...
+class Decimal256Scalar(Scalar[types.Decimal256Type[types._Precision, types._Scale]]): ...
+class Date32Scalar(Scalar[types.Date32Type]): ...
+
+class Date64Scalar(Scalar[types.Date64Type]):
+    @property
+    def value(self) -> dt.date | None: ...
+
+class Time32Scalar(Scalar[types.Time32Type[_Time32Unit]]):
+    @property
+    def value(self) -> dt.time | None: ...
+
+class Time64Scalar(Scalar[types.Time64Type[_Time64Unit]]):
+    @property
+    def value(self) -> dt.time | None: ...
+
+class TimestampScalar(Scalar[types.TimestampType[_Unit, _Tz]]):
+    @property
+    def value(self) -> int | None: ...
+
+class DurationScalar(Scalar[types.DurationType[_Unit]]):
+    @property
+    def value(self) -> dt.timedelta | None: ...
+
+class MonthDayNanoIntervalScalar(Scalar[types.MonthDayNanoIntervalType]):
+    @property
+    def value(self) -> MonthDayNano | None: ...
+
+class BinaryScalar(Scalar[types.BinaryType]):
+    def as_buffer(self) -> Buffer: ...
+
+class LargeBinaryScalar(Scalar[types.LargeBinaryType]):
+    def as_buffer(self) -> Buffer: ...
+
+class FixedSizeBinaryScalar(Scalar[types.FixedSizeBinaryType]):
+    def as_buffer(self) -> Buffer: ...
+
+class StringScalar(Scalar[types.StringType]):
+    def as_buffer(self) -> Buffer: ...
+
+class LargeStringScalar(Scalar[types.LargeStringType]):
+    def as_buffer(self) -> Buffer: ...
+
+class BinaryViewScalar(Scalar[types.BinaryViewType]):
+    def as_buffer(self) -> Buffer: ...
+
+class StringViewScalar(Scalar[types.StringViewType]):
+    def as_buffer(self) -> Buffer: ...
+
+class ListScalar(Scalar[types.ListType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class FixedSizeListScalar(Scalar[types.FixedSizeListType[_DataTypeT, types._Size]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class LargeListScalar(Scalar[types.LargeListType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class ListViewScalar(Scalar[types.ListViewType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class LargeListViewScalar(Scalar[types.LargeListViewType[_DataTypeT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+    def __iter__(self) -> Iterator[Array]: ...
+
+class StructScalar(Scalar[types.StructType], collections.abc.Mapping[str, Scalar]):
+    def __len__(self) -> int: ...
+    def __iter__(self) -> Iterator[str]: ...
+    def __getitem__(self, __key: str) -> Scalar[Any]: ...  # type: ignore[override]
+    def _as_py_tuple(self) -> list[tuple[str, Any]]: ...
+
+class MapScalar(Scalar[types.MapType[types._K, types._ValueT]]):
+    @property
+    def values(self) -> Array | None: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, i: int) -> tuple[Scalar[types._K], types._ValueT, Any]: ...
+    @overload
+    def __iter__(
+        self: Scalar[
+            types.MapType[types._BasicDataType[_AsPyTypeK], types._BasicDataType[_AsPyTypeV]]
+        ],
+    ) -> Iterator[tuple[_AsPyTypeK, _AsPyTypeV]]: ...
+    @overload
+    def __iter__(
+        self: Scalar[types.MapType[Any, types._BasicDataType[_AsPyTypeV]],],
+    ) -> Iterator[tuple[Any, _AsPyTypeV]]: ...
+    @overload
+    def __iter__(
+        self: Scalar[types.MapType[types._BasicDataType[_AsPyTypeK], Any],],
+    ) -> Iterator[tuple[_AsPyTypeK, Any]]: ...
+
+class DictionaryScalar(Scalar[types.DictionaryType[types._IndexT, types._BasicValueT]]):
+    @property
+    def index(self) -> Scalar[types._IndexT]: ...
+    @property
+    def value(self) -> Scalar[types._BasicValueT]: ...
+    @property
+    def dictionary(self) -> Array: ...
+
+class RunEndEncodedScalar(Scalar[types.RunEndEncodedType[types._RunEndType, types._BasicValueT]]):
+    @property
+    def value(self) -> tuple[int, types._BasicValueT] | None: ...
+
+class UnionScalar(Scalar[types.UnionType]):
+    @property
+    def value(self) -> Any | None: ...
+    @property
+    def type_code(self) -> str: ...
+
+class ExtensionScalar(Scalar[types.ExtensionType]):
+    @property
+    def value(self) -> Any | None: ...
+    @staticmethod
+    def from_storage(typ: types.BaseExtensionType, value) -> ExtensionScalar:
+        """
+        Construct ExtensionScalar from type and storage value.
+
+        Parameters
+        ----------
+        typ : DataType
+            The extension type for the result scalar.
+        value : object
+            The storage value for the result scalar.
+
+        Returns
+        -------
+        ext_scalar : ExtensionScalar
+        """
+
+class Bool8Scalar(Scalar[types.Bool8Type]): ...
+class UuidScalar(Scalar[types.UuidType]): ...
+class JsonScalar(Scalar[types.JsonType]): ...
+class OpaqueScalar(Scalar[types.OpaqueType]): ...
+
+class FixedShapeTensorScalar(ExtensionScalar):
+    def to_numpy(self) -> np.ndarray:
+        """
+        Convert fixed shape tensor scalar to a numpy.ndarray.
+
+        The resulting ndarray's shape matches the permuted shape of the
+        fixed shape tensor scalar.
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        numpy.ndarray
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert fixed shape tensor extension scalar to a pyarrow.Tensor, using shape
+        and strides derived from corresponding FixedShapeTensorType.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        pyarrow.Tensor
+            Tensor represented stored in FixedShapeTensorScalar.
+        """
+
+_V = TypeVar("_V")
+
+class NullableCollection(Protocol[_V]):  # pyright: ignore[reportInvalidTypeVarUse]
+    def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ...
+    def __len__(self) -> int: ...
+    def __contains__(self, item: Any, /) -> bool: ...
+
+@overload
+def scalar(
+    value: str,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StringScalar: ...
+@overload
+def scalar(
+    value: bytes,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryScalar: ...
+@overload
+def scalar(  # pyright: ignore[reportOverlappingOverload]
+    value: bool,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BooleanScalar: ...
+@overload
+def scalar(
+    value: int,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int64Scalar: ...
+@overload
+def scalar(
+    value: float,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DoubleScalar: ...
+@overload
+def scalar(
+    value: Decimal,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal128Scalar: ...
+@overload
+def scalar(  # pyright: ignore[reportOverlappingOverload]
+    value: dt.datetime,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampScalar[Literal["us"]]: ...
+@overload
+def scalar(
+    value: dt.date,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Date32Scalar: ...
+@overload
+def scalar(
+    value: dt.time,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Scalar[Literal["us"]]: ...
+@overload
+def scalar(
+    value: dt.timedelta,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DurationScalar[Literal["us"]]: ...
+@overload
+def scalar(  # pyright: ignore[reportOverlappingOverload]
+    value: MonthDayNano,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalScalar: ...
+@overload
+def scalar(
+    value: Mapping[str, Any],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StructScalar: ...
+@overload
+def scalar(
+    value: NullableCollection[str],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.StringType]]: ...
+@overload
+def scalar(
+    value: NullableCollection[bytes],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.BinaryType]]: ...
+@overload
+def scalar(
+    value: NullableCollection[bool],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.BoolType]]: ...
+@overload
+def scalar(
+    value: NullableCollection[int],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Int64Type]]: ...
+@overload
+def scalar(
+    value: NullableCollection[float],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Float64Type]]: ...
+@overload
+def scalar(
+    value: NullableCollection[Decimal],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Decimal32Type]]: ...
+@overload
+def scalar(
+    value: NullableCollection[dt.datetime],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.TimestampType[Literal["us"]]]]: ...
+@overload
+def scalar(
+    value: NullableCollection[dt.date],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Date32Type]]: ...
+@overload
+def scalar(
+    value: NullableCollection[dt.time],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.Time64Type[Literal["us"]]]]: ...
+@overload
+def scalar(
+    value: NullableCollection[dt.timedelta],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.DurationType[Literal["us"]]]]: ...
+@overload
+def scalar(
+    value: NullableCollection[MonthDayNano],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[types.ListType[types.MonthDayNanoIntervalType]]: ...
+@overload
+def scalar(
+    value: NullableCollection[Any],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[Any]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.NullType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> NullScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.BoolType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BooleanScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UInt8Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UInt8Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Int8Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int8Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UInt16Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UInt16Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Int16Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int16Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Uint32Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UInt32Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Int32Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int32Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UInt64Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UInt64Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Int64Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Int64Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Float16Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> HalfFloatScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Float32Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> FloatScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Float64Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DoubleScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Date32Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Date32Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Date64Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Date64Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.MonthDayNanoIntervalType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> MonthDayNanoIntervalScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.StringType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StringScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.LargeStringType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> LargeStringScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.StringViewType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StringViewScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.BinaryType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.LargeBinaryType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> LargeBinaryScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.BinaryViewType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> BinaryViewScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.TimestampType[types._Unit, types._Tz],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> TimestampScalar[types._Unit, types._Tz]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Time32Type[types._Time32Unit],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Time32Scalar[types._Time32Unit]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Time64Type[types._Time64Unit],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Time64Scalar[types._Time64Unit]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.DurationType[types._Unit],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DurationScalar[types._Unit]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Decimal32Type[types._Precision, types._Scale],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal32Scalar[types._Precision, types._Scale]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Decimal64Type[types._Precision, types._Scale],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal64Scalar[types._Precision, types._Scale]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Decimal128Type[types._Precision, types._Scale],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal128Scalar[types._Precision, types._Scale]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Decimal256Type[types._Precision, types._Scale],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Decimal256Scalar[types._Precision, types._Scale]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.ListType[_DataTypeT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListScalar[_DataTypeT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.LargeListType[_DataTypeT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> LargeListScalar[_DataTypeT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.ListViewType[_DataTypeT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> ListViewScalar[_DataTypeT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.LargeListViewType[_DataTypeT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> LargeListViewScalar[_DataTypeT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.FixedSizeListType[_DataTypeT, types._Size],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> FixedSizeListScalar[_DataTypeT, types._Size]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.DictionaryType[types._IndexT, types._BasicValueT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> DictionaryScalar[types._IndexT, types._BasicValueT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.MapType[types._K, types._ValueT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> MapScalar[types._K, types._ValueT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.StructType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> StructScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UnionType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UnionScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.RunEndEncodedType[types._RunEndType, types._BasicValueT],
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> RunEndEncodedScalar[types._RunEndType, types._BasicValueT]: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.Bool8Type,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Bool8Scalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.UuidType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> UuidScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.JsonType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> JsonScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: types.OpaqueType,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> OpaqueScalar: ...
+@overload
+def scalar(
+    value: Any,
+    type: _DataTypeT,
+    *,
+    from_pandas: bool | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Scalar[_DataTypeT]: ...
+def scalar(*args, **kwargs):
+    """
+    Create a pyarrow.Scalar instance from a Python object.
+
+    Parameters
+    ----------
+    value : Any
+        Python object coercible to arrow's type system.
+    type : pyarrow.DataType
+        Explicit type to attempt to coerce to, otherwise will be inferred from
+        the value.
+    from_pandas : bool, default None
+        Use pandas's semantics for inferring nulls from values in
+        ndarray-like data. Defaults to False if not passed explicitly by user,
+        or True if a pandas object is passed in.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the currently-set default
+        memory pool.
+
+    Returns
+    -------
+    scalar : pyarrow.Scalar
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+
+    >>> pa.scalar(42)
+    <pyarrow.Int64Scalar: 42>
+
+    >>> pa.scalar("string")
+    <pyarrow.StringScalar: 'string'>
+
+    >>> pa.scalar([1, 2])
+    <pyarrow.ListScalar: [1, 2]>
+
+    >>> pa.scalar([1, 2], type=pa.list_(pa.int16()))
+    <pyarrow.ListScalar: [1, 2]>
+    """
+
+__all__ = [
+    "Scalar",
+    "_NULL",
+    "NA",
+    "NullScalar",
+    "BooleanScalar",
+    "UInt8Scalar",
+    "Int8Scalar",
+    "UInt16Scalar",
+    "Int16Scalar",
+    "UInt32Scalar",
+    "Int32Scalar",
+    "UInt64Scalar",
+    "Int64Scalar",
+    "HalfFloatScalar",
+    "FloatScalar",
+    "DoubleScalar",
+    "Decimal32Scalar",
+    "Decimal64Scalar",
+    "Decimal128Scalar",
+    "Decimal256Scalar",
+    "Date32Scalar",
+    "Date64Scalar",
+    "Time32Scalar",
+    "Time64Scalar",
+    "TimestampScalar",
+    "DurationScalar",
+    "MonthDayNanoIntervalScalar",
+    "BinaryScalar",
+    "LargeBinaryScalar",
+    "FixedSizeBinaryScalar",
+    "StringScalar",
+    "LargeStringScalar",
+    "BinaryViewScalar",
+    "StringViewScalar",
+    "ListScalar",
+    "FixedSizeListScalar",
+    "LargeListScalar",
+    "ListViewScalar",
+    "LargeListViewScalar",
+    "StructScalar",
+    "MapScalar",
+    "DictionaryScalar",
+    "RunEndEncodedScalar",
+    "UnionScalar",
+    "ExtensionScalar",
+    "FixedShapeTensorScalar",
+    "Bool8Scalar",
+    "UuidScalar",
+    "JsonScalar",
+    "OpaqueScalar",
+    "scalar",
+]
diff --git a/python/pyarrow/__lib_pxi/table.pyi b/python/pyarrow/__lib_pxi/table.pyi
new file mode 100644
index 00000000000..ffba4262e8c
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/table.pyi
@@ -0,0 +1,5617 @@
+import datetime as dt
+import sys
+
+from decimal import Decimal
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import (
+    Any,
+    Collection,
+    Generator,
+    Generic,
+    Iterable,
+    Iterator,
+    Literal,
+    Mapping,
+    Sequence,
+    TypeVar,
+    overload,
+)
+
+import numpy as np
+import pandas as pd
+
+from numpy.typing import NDArray
+from pyarrow._compute import (
+    CastOptions,
+    CountOptions,
+    FunctionOptions,
+    ScalarAggregateOptions,
+    TDigestOptions,
+    VarianceOptions,
+)
+from pyarrow._stubs_typing import (
+    Indices,
+    Mask,
+    NullEncoding,
+    NullSelectionBehavior,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+    SupportArrowStream,
+)
+from pyarrow.compute import ArrayOrChunkedArray, Expression
+from pyarrow.interchange.dataframe import _PyArrowDataFrame
+from pyarrow.lib import Device, MemoryManager, MemoryPool, MonthDayNano, Schema
+from pyarrow.lib import Field as _Field
+
+from . import array, scalar, types
+from .array import Array, NullableCollection, StructArray, _CastAs, _PandasConvertible
+from .device import DeviceAllocationType
+from .io import Buffer
+from .ipc import RecordBatchReader
+from .scalar import Int64Scalar, Scalar
+from .tensor import Tensor
+from .types import DataType, _AsPyType, _BasicDataType, _DataTypeT
+
+Field: TypeAlias = _Field[DataType]
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+
+_Aggregation: TypeAlias = Literal[
+    "all",
+    "any",
+    "approximate_median",
+    "count",
+    "count_all",
+    "count_distinct",
+    "distinct",
+    "first",
+    "first_last",
+    "last",
+    "list",
+    "max",
+    "mean",
+    "min",
+    "min_max",
+    "one",
+    "product",
+    "stddev",
+    "sum",
+    "tdigest",
+    "variance",
+]
+_AggregationPrefixed: TypeAlias = Literal[
+    "hash_all",
+    "hash_any",
+    "hash_approximate_median",
+    "hash_count",
+    "hash_count_all",
+    "hash_count_distinct",
+    "hash_distinct",
+    "hash_first",
+    "hash_first_last",
+    "hash_last",
+    "hash_list",
+    "hash_max",
+    "hash_mean",
+    "hash_min",
+    "hash_min_max",
+    "hash_one",
+    "hash_product",
+    "hash_stddev",
+    "hash_sum",
+    "hash_tdigest",
+    "hash_variance",
+]
+Aggregation: TypeAlias = _Aggregation | _AggregationPrefixed
+AggregateOptions: TypeAlias = (
+    ScalarAggregateOptions | CountOptions | TDigestOptions | VarianceOptions | FunctionOptions
+)
+
+UnarySelector: TypeAlias = str
+NullarySelector: TypeAlias = tuple[()]
+NarySelector: TypeAlias = list[str] | tuple[str, ...]
+ColumnSelector: TypeAlias = UnarySelector | NullarySelector | NarySelector
+
+class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+    """
+    An array-like composed from a (possibly empty) collection of pyarrow.Arrays
+
+    Warnings
+    --------
+    Do not call this class's constructor directly.
+
+    Examples
+    --------
+    To construct a ChunkedArray object use :func:`pyarrow.chunked_array`:
+
+    >>> import pyarrow as pa
+    >>> pa.chunked_array([], type=pa.int8())
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+    ...
+    ]
+
+    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+      [
+        2,
+        2,
+        4
+      ],
+      [
+        4,
+        5,
+        100
+      ]
+    ]
+    >>> isinstance(pa.chunked_array([[2, 2, 4], [4, 5, 100]]), pa.ChunkedArray)
+    True
+    """
+
+    @property
+    def data(self) -> Self: ...
+    @property
+    def type(self: ChunkedArray[Scalar[_DataTypeT]]) -> _DataTypeT:
+        """
+        Return data type of a ChunkedArray.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.type
+        DataType(int64)
+        """
+    def length(self) -> int:
+        """
+        Return length of a ChunkedArray.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.length()
+        6
+        """
+    __len__ = length
+    def to_string(
+        self,
+        *,
+        indent: int = 0,
+        window: int = 5,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str:
+        """
+        Render a "pretty-printed" string representation of the ChunkedArray
+
+        Parameters
+        ----------
+        indent : int
+            How much to indent right the content of the array,
+            by default ``0``.
+        window : int
+            How many items to preview within each chunk at the begin and end
+            of the chunk when the chunk is bigger than the window.
+            The other elements will be ellipsed.
+        container_window : int
+            How many chunks to preview at the begin and end
+            of the array when the array is bigger than the window.
+            The other elements will be ellipsed.
+            This setting also applies to list columns.
+        skip_new_lines : bool
+            If the array should be rendered as a single line of text
+            or if each element should be on its own line.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.to_string(skip_new_lines=True)
+        '[[2,2,4],[4,5,100]]'
+        """
+    format = to_string
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    @property
+    def null_count(self) -> int:
+        """
+        Number of null entries
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.null_count
+        1
+        """
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the chunked array.
+
+        In other words, the sum of bytes from all buffer ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will only be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.nbytes
+        49
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the chunked array.
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.get_total_buffer_size()
+        49
+        """
+    def __sizeof__(self) -> int: ...
+    @overload
+    def __getitem__(self, key: slice) -> Self: ...
+    @overload
+    def __getitem__(self, key: int) -> _Scalar_co: ...
+    def __getitem__(self, key):
+        """
+        Slice or return value at given index
+
+        Parameters
+        ----------
+        key : integer or slice
+            Slices with step not equal to 1 (or None) will produce a copy
+            rather than a zero-copy view
+
+        Returns
+        -------
+        value : Scalar (index) or ChunkedArray (slice)
+        """
+    def getitem(self, i: int) -> Scalar: ...
+    def is_null(self, *, nan_is_null: bool = False) -> ChunkedArray[scalar.BooleanScalar]:
+        """
+        Return boolean array indicating the null values.
+
+        Parameters
+        ----------
+        nan_is_null : bool (optional, default False)
+            Whether floating-point NaN values should also be considered null.
+
+        Returns
+        -------
+        array : boolean Array or ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.is_null()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            false,
+            false,
+            false,
+            false,
+            true,
+            false
+          ]
+        ]
+        """
+    def is_nan(self) -> ChunkedArray[scalar.BooleanScalar]:
+        """
+        Return boolean array indicating the NaN values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arr = pa.chunked_array([[2, np.nan, 4], [4, None, 100]])
+        >>> arr.is_nan()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            false,
+            true,
+            false,
+            false,
+            null,
+            false
+          ]
+        ]
+        """
+    def is_valid(self) -> ChunkedArray[scalar.BooleanScalar]:
+        """
+        Return boolean array indicating the non-null values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.is_valid()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            true,
+            true,
+            true
+          ],
+          [
+            true,
+            false,
+            true
+          ]
+        ]
+        """
+    def fill_null(self, fill_value: Scalar[_DataTypeT]) -> Self:
+        """
+        Replace each null element in values with fill_value.
+
+        See :func:`pyarrow.compute.fill_null` for full usage.
+
+        Parameters
+        ----------
+        fill_value : any
+            The replacement value for null entries.
+
+        Returns
+        -------
+        result : Array or ChunkedArray
+            A new array with nulls replaced by the given value.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> fill_value = pa.scalar(5, type=pa.int8())
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.fill_null(fill_value)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4,
+            4,
+            5,
+            100
+          ]
+        ]
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return whether the contents of two chunked arrays are equal.
+
+        Parameters
+        ----------
+        other : pyarrow.ChunkedArray
+            Chunked array to compare against.
+
+        Returns
+        -------
+        are_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> animals = pa.chunked_array(
+        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
+        ... )
+        >>> n_legs.equals(n_legs)
+        True
+        >>> n_legs.equals(animals)
+        False
+        """
+    def to_numpy(self, zero_copy_only: bool = False) -> np.ndarray:
+        """
+        Return a NumPy copy of this array (experimental).
+
+        Parameters
+        ----------
+        zero_copy_only : bool, default False
+            Introduced for signature consistence with pyarrow.Array.to_numpy.
+            This must be False here since NumPy arrays' buffer must be contiguous.
+
+        Returns
+        -------
+        array : numpy.ndarray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.to_numpy()
+        array([  2,   2,   4,   4,   5, 100])
+        """
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    @overload
+    def cast(
+        self,
+        target_type: None = None,
+        safe: bool | None = None,
+        options: CastOptions | None = None,
+    ) -> Self: ...
+    @overload
+    def cast(
+        self, target_type: _CastAs, safe: bool | None = None, options: CastOptions | None = None
+    ) -> ChunkedArray[Scalar[_CastAs]]: ...
+    def cast(self, *args, **kwargs):
+        """
+        Cast array values to another data type
+
+        See :func:`pyarrow.compute.cast` for usage.
+
+        Parameters
+        ----------
+        target_type : DataType, None
+            Type to cast array to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        cast : Array or ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.type
+        DataType(int64)
+
+        Change the data type of an array:
+
+        >>> n_legs_seconds = n_legs.cast(pa.duration("s"))
+        >>> n_legs_seconds.type
+        DurationType(duration[s])
+        """
+    def dictionary_encode(self, null_encoding: NullEncoding = "mask") -> Self:
+        """
+        Compute dictionary-encoded representation of array.
+
+        See :func:`pyarrow.compute.dictionary_encode` for full usage.
+
+        Parameters
+        ----------
+        null_encoding : str, default "mask"
+            How to handle null entries.
+
+        Returns
+        -------
+        encoded : ChunkedArray
+            A dictionary-encoded version of this array.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> animals = pa.chunked_array(
+        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
+        ... )
+        >>> animals.dictionary_encode()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ],
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              3,
+              4,
+              5
+            ]
+        ]
+        """
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[ChunkedArray[Any]]:
+        """
+        Flatten this ChunkedArray.  If it has a struct type, the column is
+        flattened into one array per struct field.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : list of ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> c_arr = pa.chunked_array(n_legs.value_counts())
+        >>> c_arr
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          -- is_valid: all not null
+          -- child 0 type: int64
+            [
+              2,
+              4,
+              5,
+              100
+            ]
+          -- child 1 type: int64
+            [
+              2,
+              2,
+              1,
+              1
+            ]
+        ]
+        >>> c_arr.flatten()
+        [<pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            4,
+            5,
+            100
+          ]
+        ], <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            1,
+            1
+          ]
+        ]]
+        >>> c_arr.type
+        StructType(struct<values: int64, counts: int64>)
+        >>> n_legs.type
+        DataType(int64)
+        """
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Array[_Scalar_co]:
+        """
+        Flatten this ChunkedArray into a single non-chunked array.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.combine_chunks()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          2,
+          4,
+          4,
+          5,
+          100
+        ]
+        """
+    def unique(self) -> ChunkedArray[_Scalar_co]:
+        """
+        Compute distinct elements in array
+
+        Returns
+        -------
+        pyarrow.Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.unique()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          4,
+          5,
+          100
+        ]
+        """
+    def value_counts(self) -> StructArray:
+        """
+        Compute counts of unique elements in array.
+
+        Returns
+        -------
+        An array of  <input type "Values", int64_t "Counts"> structs
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.value_counts()
+        <pyarrow.lib.StructArray object at ...>
+        -- is_valid: all not null
+        -- child 0 type: int64
+          [
+            2,
+            4,
+            5,
+            100
+          ]
+        -- child 1 type: int64
+          [
+            2,
+            2,
+            1,
+            1
+          ]
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this ChunkedArray
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of array to slice
+        length : int, default None
+            Length of slice (default is until end of batch starting from
+            offset)
+
+        Returns
+        -------
+        sliced : ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.slice(2, 2)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            4
+          ],
+          [
+            4
+          ]
+        ]
+        """
+    def filter(self, mask: Mask, null_selection_behavior: NullSelectionBehavior = "drop") -> Self:
+        """
+        Select values from the chunked array.
+
+        See :func:`pyarrow.compute.filter` for full usage.
+
+        Parameters
+        ----------
+        mask : Array or array-like
+            The boolean mask to filter the chunked array with.
+        null_selection_behavior : str, default "drop"
+            How nulls in the mask should be handled.
+
+        Returns
+        -------
+        filtered : Array or ChunkedArray
+            An array of the same type, with only the elements selected by
+            the boolean mask.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> mask = pa.array([True, False, None, True, False, True])
+        >>> n_legs.filter(mask)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2
+          ],
+          [
+            4,
+            100
+          ]
+        ]
+        >>> n_legs.filter(mask, null_selection_behavior="emit_null")
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            null
+          ],
+          [
+            4,
+            100
+          ]
+        ]
+        """
+    @overload
+    def index(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        value: Scalar[_DataTypeT] | _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+    @overload
+    def index(
+        self,
+        value: Scalar[_DataTypeT],
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar: ...
+    def index(self, *args, **kwargs):
+        """
+        Find the first index of a value.
+
+        See :func:`pyarrow.compute.index` for full usage.
+
+        Parameters
+        ----------
+        value : Scalar or object
+            The value to look for in the array.
+        start : int, optional
+            The start index where to look for `value`.
+        end : int, optional
+            The end index where to look for `value`.
+        memory_pool : MemoryPool, optional
+            A memory pool for potential memory allocations.
+
+        Returns
+        -------
+        index : Int64Scalar
+            The index of the value in the array (-1 if not found).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.index(4)
+        <pyarrow.Int64Scalar: 2>
+        >>> n_legs.index(4, start=3)
+        <pyarrow.Int64Scalar: 3>
+        """
+    def take(self, indices: Indices) -> Self:
+        """
+        Select values from the chunked array.
+
+        See :func:`pyarrow.compute.take` for full usage.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices in the array whose values will be returned.
+
+        Returns
+        -------
+        taken : Array or ChunkedArray
+            An array with the same datatype, containing the taken values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.take([1, 4, 5])
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            5,
+            100
+          ]
+        ]
+        """
+    def drop_null(self) -> Self:
+        """
+        Remove missing values from a chunked array.
+        See :func:`pyarrow.compute.drop_null` for full description.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            null
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.drop_null()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        """
+    def sort(self, order: Order = "ascending", **kwargs) -> Self:
+        """
+        Sort the ChunkedArray
+
+        Parameters
+        ----------
+        order : str, default "ascending"
+            Which order to sort values in.
+            Accepted values are "ascending", "descending".
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        result : ChunkedArray
+        """
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Unify dictionaries across all chunks.
+
+        This method returns an equivalent chunked array, but where all
+        chunks share the same dictionary values.  Dictionary indices are
+        transposed accordingly.
+
+        If there are no dictionaries in the chunked array, it is returned
+        unchanged.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
+        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
+        >>> c_arr = pa.chunked_array([arr_1, arr_2])
+        >>> c_arr
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ],
+        ...
+          -- dictionary:
+            [
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ]
+        ]
+        >>> c_arr.unify_dictionaries()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ],
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              3,
+              4,
+              5
+            ]
+        ]
+        """
+    @property
+    def num_chunks(self) -> int:
+        """
+        Number of underlying chunks.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs.num_chunks
+        2
+        """
+    def chunk(self, i: int) -> ChunkedArray[_Scalar_co]:
+        """
+        Select a chunk by its index.
+
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        pyarrow.Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs.chunk(1)
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          4,
+          5,
+          100
+        ]
+        """
+    @property
+    def chunks(self) -> list[Array[_Scalar_co]]:
+        """
+        Convert to a list of single-chunked arrays.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            null
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.chunks
+        [<pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          2,
+          null
+        ], <pyarrow.lib.Int64Array object at ...>
+        [
+          4,
+          5,
+          100
+        ]]
+        """
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.NullScalar],
+    ) -> Generator[array.NullArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.BooleanScalar],
+    ) -> Generator[array.BooleanArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UInt8Scalar],
+    ) -> Generator[array.UInt8Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Int8Scalar],
+    ) -> Generator[array.Int8Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UInt16Scalar],
+    ) -> Generator[array.UInt16Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Int16Scalar],
+    ) -> Generator[array.Int16Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UInt32Scalar],
+    ) -> Generator[array.UInt32Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Int32Scalar],
+    ) -> Generator[array.Int32Array, None, None]:
+        """
+        Convert to an iterator of ChunkArrays.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> for i in n_legs.iterchunks():
+        ...     print(i.null_count)
+        0
+        1
+
+        """
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UInt64Scalar],
+    ) -> Generator[array.UInt64Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Int64Scalar],
+    ) -> Generator[array.Int64Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.HalfFloatScalar],
+    ) -> Generator[array.HalfFloatArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.FloatScalar],
+    ) -> Generator[array.FloatArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.DoubleScalar],
+    ) -> Generator[array.DoubleArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Decimal32Scalar],
+    ) -> Generator[array.Decimal32Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Decimal64Scalar],
+    ) -> Generator[array.Decimal64Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Decimal128Scalar],
+    ) -> Generator[array.Decimal128Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Decimal256Scalar],
+    ) -> Generator[array.Decimal256Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Date32Scalar],
+    ) -> Generator[array.Date32Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Date64Scalar],
+    ) -> Generator[array.Date64Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Time32Scalar[types._Time32Unit]],
+    ) -> Generator[array.Time32Array[types._Time32Unit], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Time64Scalar[types._Time64Unit]],
+    ) -> Generator[array.Time64Array[types._Time64Unit], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.DurationScalar[types._Unit]],
+    ) -> Generator[array.DurationArray[types._Unit], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.MonthDayNanoIntervalScalar],
+    ) -> Generator[array.MonthDayNanoIntervalArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.BinaryScalar],
+    ) -> Generator[array.BinaryArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.LargeBinaryScalar],
+    ) -> Generator[array.LargeBinaryArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.FixedSizeBinaryScalar],
+    ) -> Generator[array.FixedSizeBinaryArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.StringScalar],
+    ) -> Generator[array.StringArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.LargeStringScalar],
+    ) -> Generator[array.LargeStringArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.BinaryViewScalar],
+    ) -> Generator[array.BinaryViewArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.StringViewScalar],
+    ) -> Generator[array.StringViewArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.ListScalar[_DataTypeT]],
+    ) -> Generator[array.ListArray[scalar.ListScalar[_DataTypeT]], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.FixedSizeListScalar[_DataTypeT, types._Size]],
+    ) -> Generator[array.FixedSizeListArray[_DataTypeT, types._Size], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.LargeListScalar[_DataTypeT]],
+    ) -> Generator[array.LargeListArray[_DataTypeT], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.LargeListViewScalar[_DataTypeT]],
+    ) -> Generator[array.LargeListViewArray[_DataTypeT], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.StructScalar],
+    ) -> Generator[array.StructArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.MapScalar[array._MapKeyT, array._MapItemT]],
+    ) -> Generator[array.MapArray[array._MapKeyT, array._MapItemT], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.DictionaryScalar[types._IndexT, types._BasicValueT]],
+    ) -> Generator[array.DictionaryArray[types._IndexT, types._BasicValueT], None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.RunEndEncodedScalar],
+    ) -> Generator[array.RunEndEncodedArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UnionScalar],
+    ) -> Generator[array.UnionArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.Bool8Scalar],
+    ) -> Generator[array.Bool8Array, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.UuidScalar],
+    ) -> Generator[array.UuidArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.JsonScalar],
+    ) -> Generator[array.JsonArray, None, None]: ...
+    @overload
+    def iterchunks(
+        self: ChunkedArray[scalar.OpaqueScalar],
+    ) -> Generator[array.OpaqueArray, None, None]: ...
+    def iterchunks(self):
+        """
+        Convert to an iterator of ChunkArrays.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> for i in n_legs.iterchunks():
+        ...     print(i.null_count)
+        0
+        1
+
+        """
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+    def to_pylist(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType | None]:
+        """
+        Convert to a list of native Python objects.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.to_pylist()
+        [2, 2, 4, 4, None, 100]
+        """
+    def __arrow_c_stream__(self, requested_schema=None) -> Any:
+        """
+        Export to a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+            A capsule containing a C ArrowArrayStream struct.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self:
+        """
+        Import ChunkedArray from a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        stream: PyCapsule
+            A capsule containing a C ArrowArrayStream PyCapsule.
+
+        Returns
+        -------
+        ChunkedArray
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether all chunks in the ChunkedArray are CPU-accessible.
+        """
+
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[bool]],
+    type: None = None,
+) -> ChunkedArray[scalar.BooleanScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[int]],
+    type: None = None,
+) -> ChunkedArray[scalar.Int64Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[float]],
+    type: None = None,
+) -> ChunkedArray[scalar.DoubleScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[Decimal]],
+    type: None = None,
+) -> ChunkedArray[scalar.Decimal128Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dict[str, Any]]],
+    type: None = None,
+) -> ChunkedArray[scalar.StructScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dt.datetime]],
+    type: None = None,
+) -> ChunkedArray[scalar.TimestampScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dt.date]],
+    type: None = None,
+) -> ChunkedArray[scalar.Date32Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dt.time]],
+    type: None = None,
+) -> ChunkedArray[scalar.Time64Scalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[dt.timedelta]],
+    type: None = None,
+) -> ChunkedArray[scalar.DurationScalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[MonthDayNano]],
+    type: None = None,
+) -> ChunkedArray[scalar.MonthDayNanoIntervalScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[str]],
+    type: None = None,
+) -> ChunkedArray[scalar.StringScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[bytes]],
+    type: None = None,
+) -> ChunkedArray[scalar.BinaryScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[NullableCollection[list[Any]]],
+    type: None = None,
+) -> ChunkedArray[scalar.ListScalar[Any]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["null"] | types.NullType,
+) -> ChunkedArray[scalar.NullScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["bool", "boolean"] | types.BoolType,
+) -> ChunkedArray[scalar.BooleanScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["i1", "int8"] | types.Int8Type,
+) -> ChunkedArray[scalar.Int8Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["i2", "int16"] | types.Int16Type,
+) -> ChunkedArray[scalar.Int16Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["i4", "int32"] | types.Int32Type,
+) -> ChunkedArray[scalar.Int32Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["i8", "int64"] | types.Int64Type,
+) -> ChunkedArray[scalar.Int64Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["u1", "uint8"] | types.UInt8Type,
+) -> ChunkedArray[scalar.UInt8Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["u2", "uint16"] | types.UInt16Type,
+) -> ChunkedArray[scalar.UInt16Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["u4", "uint32"] | types.Uint32Type,
+) -> ChunkedArray[scalar.UInt32Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["u8", "uint64"] | types.UInt64Type,
+) -> ChunkedArray[scalar.UInt64Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["f2", "halffloat", "float16"] | types.Float16Type,
+) -> ChunkedArray[scalar.HalfFloatScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["f4", "float", "float32"] | types.Float32Type,
+) -> ChunkedArray[scalar.FloatScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["f8", "double", "float64"] | types.Float64Type,
+) -> ChunkedArray[scalar.DoubleScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["string", "str", "utf8"] | types.StringType,
+) -> ChunkedArray[scalar.StringScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["binary"] | types.BinaryType,
+) -> ChunkedArray[scalar.BinaryScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["large_string", "large_str", "large_utf8"] | types.LargeStringType,
+) -> ChunkedArray[scalar.LargeStringScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["large_binary"] | types.LargeBinaryType,
+) -> ChunkedArray[scalar.LargeBinaryScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["binary_view"] | types.BinaryViewType,
+) -> ChunkedArray[scalar.BinaryViewScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["string_view"] | types.StringViewType,
+) -> ChunkedArray[scalar.StringViewScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["date32", "date32[day]"] | types.Date32Type,
+) -> ChunkedArray[scalar.Date32Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["date64", "date64[ms]"] | types.Date64Type,
+) -> ChunkedArray[scalar.Date64Scalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["time32[s]"] | types.Time32Type[Literal["s"]],
+) -> ChunkedArray[scalar.Time32Scalar[Literal["s"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["time32[ms]"] | types.Time32Type[Literal["ms"]],
+) -> ChunkedArray[scalar.Time32Scalar[Literal["ms"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["time64[us]"] | types.Time64Type[Literal["us"]],
+) -> ChunkedArray[scalar.Time64Scalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["time64[ns]"] | types.Time64Type[Literal["ns"]],
+) -> ChunkedArray[scalar.Time64Scalar[Literal["ns"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["timestamp[s]"] | types.TimestampType[Literal["s"]],
+) -> ChunkedArray[scalar.TimestampScalar[Literal["s"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["timestamp[ms]"] | types.TimestampType[Literal["ms"]],
+) -> ChunkedArray[scalar.TimestampScalar[Literal["ms"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["timestamp[us]"] | types.TimestampType[Literal["us"]],
+) -> ChunkedArray[scalar.TimestampScalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["timestamp[ns]"] | types.TimestampType[Literal["ns"]],
+) -> ChunkedArray[scalar.TimestampScalar[Literal["ns"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["duration[s]"] | types.DurationType[Literal["s"]],
+) -> ChunkedArray[scalar.DurationScalar[Literal["s"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["duration[ms]"] | types.DurationType[Literal["ms"]],
+) -> ChunkedArray[scalar.DurationScalar[Literal["ms"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["duration[us]"] | types.DurationType[Literal["us"]],
+) -> ChunkedArray[scalar.DurationScalar[Literal["us"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
+    type: Literal["duration[ns]"] | types.DurationType[Literal["ns"]],
+) -> ChunkedArray[scalar.DurationScalar[Literal["ns"]]]: ...
+@overload
+def chunked_array(
+    values: Iterable[Iterable[Any]] | SupportArrowStream | SupportArrowArray,
+    type: Literal["month_day_nano_interval"] | types.MonthDayNanoIntervalType,
+) -> ChunkedArray[scalar.MonthDayNanoIntervalScalar]: ...
+@overload
+def chunked_array(
+    values: Iterable[Array[_ScalarT]],
+    type: None = None,
+) -> ChunkedArray[_ScalarT]: ...
+def chunked_array(value, type=None):
+    """
+    Construct chunked array from list of array-like objects
+
+    Parameters
+    ----------
+    arrays : Array, list of Array, or array-like
+        Must all be the same data type. Can be empty only if type also passed.
+        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
+        (has an ``__arrow_c_array__`` or ``__arrow_c_stream__`` method) can be
+        passed as well.
+    type : DataType or string coercible to DataType
+
+    Returns
+    -------
+    ChunkedArray
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.chunked_array([], type=pa.int8())
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+    ...
+    ]
+
+    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+      [
+        2,
+        2,
+        4
+      ],
+      [
+        4,
+        5,
+        100
+      ]
+    ]
+    """
+
+_ColumnT = TypeVar("_ColumnT", bound=ArrayOrChunkedArray[Any])
+
+class _Tabular(_PandasConvertible[pd.DataFrame], Generic[_ColumnT]):
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame:
+        """
+        Return the dataframe interchange object implementing the interchange protocol.
+
+        Parameters
+        ----------
+        nan_as_null : bool, default False
+            Whether to tell the DataFrame to overwrite null values in the data
+            with ``NaN`` (or ``NaT``).
+        allow_copy : bool, default True
+            Whether to allow memory copying when exporting. If set to False
+            it would cause non-zero-copy exports to fail.
+
+        Returns
+        -------
+        DataFrame interchange object
+            The object which consuming library can use to ingress the dataframe.
+
+        Notes
+        -----
+        Details on the interchange protocol:
+        https://data-apis.org/dataframe-protocol/latest/index.html
+        `nan_as_null` currently has no effect; once support for nullable extension
+        dtypes is added, this value should be propagated to columns.
+        """
+    @overload
+    def __getitem__(self, key: int | str) -> _ColumnT: ...
+    @overload
+    def __getitem__(self, key: slice) -> Self: ...
+    def __getitem__(self, key):
+        """
+        Slice or return column at given index or column name
+
+        Parameters
+        ----------
+        key : integer, str, or slice
+            Slices with step not equal to 1 (or None) will produce a copy
+            rather than a zero-copy view
+
+        Returns
+        -------
+        Array (from RecordBatch) or ChunkedArray (from Table) for column input.
+        RecordBatch or Table for slice input.
+        """
+    def __len__(self) -> int: ...
+    def column(self, i: int | str) -> _ColumnT:
+        """
+        Select single column from Table or RecordBatch.
+
+        Parameters
+        ----------
+        i : int or string
+            The index or name of the column to retrieve.
+
+        Returns
+        -------
+        column : Array (for RecordBatch) or ChunkedArray (for Table)
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Select a column by numeric index:
+
+        >>> table.column(0)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            4,
+            5,
+            100
+          ]
+        ]
+
+        Select a column by its name:
+
+        >>> table.column("animals")
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            "Flamingo",
+            "Horse",
+            "Brittle stars",
+            "Centipede"
+          ]
+        ]
+        """
+    @property
+    def column_names(self) -> list[str]:
+        """
+        Names of the Table or RecordBatch columns.
+
+        Returns
+        -------
+        list of str
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> table = pa.Table.from_arrays(
+        ...     [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
+        ...     names=["n_legs", "animals"],
+        ... )
+        >>> table.column_names
+        ['n_legs', 'animals']
+        """
+    @property
+    def columns(self) -> list[_ColumnT]:
+        """
+        List of all columns in numerical order.
+
+        Returns
+        -------
+        columns : list of Array (for RecordBatch) or list of ChunkedArray (for Table)
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.columns
+        [<pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            null,
+            4,
+            5,
+            null
+          ]
+        ], <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            "Flamingo",
+            "Horse",
+            null,
+            "Centipede"
+          ]
+        ]]
+        """
+    def drop_null(self) -> Self:
+        """
+        Remove rows that contain missing values from a Table or RecordBatch.
+
+        See :func:`pyarrow.compute.drop_null` for full usage.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object with the same schema, with rows containing
+            no missing values.
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [None, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", None, "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.drop_null()
+        pyarrow.Table
+        year: double
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2022,2021]]
+        n_legs: [[4,100]]
+        animals: [["Horse","Centipede"]]
+        """
+    def field(self, i: int | str) -> Field:
+        """
+        Select a schema field by its column name or numeric index.
+
+        Parameters
+        ----------
+        i : int or string
+            The index or name of the field to retrieve.
+
+        Returns
+        -------
+        Field
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.field(0)
+        pyarrow.Field<n_legs: int64>
+        >>> table.field(1)
+        pyarrow.Field<animals: string>
+        """
+    @classmethod
+    def from_pydict(
+        cls,
+        mapping: Mapping[str, ArrayOrChunkedArray[Any] | list[Any] | np.ndarray],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self:
+        """
+        Construct a Table or RecordBatch from Arrow arrays or columns.
+
+        Parameters
+        ----------
+        mapping : dict or Mapping
+            A mapping of strings to Arrays or Python lists.
+        schema : Schema, default None
+            If not passed, will be inferred from the Mapping values.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        Table or RecordBatch
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> pydict = {"n_legs": n_legs, "animals": animals}
+
+        Construct a Table from a dictionary of arrays:
+
+        >>> pa.Table.from_pydict(pydict)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> pa.Table.from_pydict(pydict).schema
+        n_legs: int64
+        animals: string
+
+        Construct a Table from a dictionary of arrays with metadata:
+
+        >>> my_metadata = {"n_legs": "Number of legs per animal"}
+        >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Construct a Table from a dictionary of arrays with pyarrow schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.Table.from_pydict(pydict, schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    @classmethod
+    def from_pylist(
+        cls,
+        mapping: Sequence[Mapping[str, Any]],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self:
+        """
+        Construct a Table or RecordBatch from list of rows / dictionaries.
+
+        Parameters
+        ----------
+        mapping : list of dicts of rows
+            A mapping of strings to row values.
+        schema : Schema, default None
+            If not passed, will be inferred from the first row of the
+            mapping values.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        Table or RecordBatch
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
+
+        Construct a Table from a list of rows:
+
+        >>> pa.Table.from_pylist(pylist)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4]]
+        animals: [["Flamingo","Dog"]]
+
+        Construct a Table from a list of rows with metadata:
+
+        >>> my_metadata = {"n_legs": "Number of legs per animal"}
+        >>> pa.Table.from_pylist(pylist, metadata=my_metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Construct a Table from a list of rows with pyarrow schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    def itercolumns(self) -> Generator[_ColumnT, None, None]:
+        """
+        Iterator over all columns in their numerical order.
+
+        Yields
+        ------
+        Array (for RecordBatch) or ChunkedArray (for Table)
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> for i in table.itercolumns():
+        ...     print(i.null_count)
+        2
+        1
+        """
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def shape(self) -> tuple[int, int]:
+        """
+        Dimensions of the table or record batch: (#rows, #columns).
+
+        Returns
+        -------
+        (int, int)
+            Number of rows and number of columns.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table.shape
+        (4, 2)
+        """
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def nbytes(self) -> int: ...
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> Self:
+        """
+        Sort the Table or RecordBatch by one or multiple columns.
+
+        Parameters
+        ----------
+        sorting : str or list[tuple(name, order)]
+            Name of the column to use to sort (ascending), or
+            a list of multiple sorting conditions where
+            each entry is a tuple with column name
+            and sorting order ("ascending" or "descending")
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        Table or RecordBatch
+            A new tabular object sorted according to the sort keys.
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.sort_by("animal")
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        year: [[2019,2021,2021,2020,2022,2022]]
+        n_legs: [[5,100,4,2,4,2]]
+        animal: [["Brittle stars","Centipede","Dog","Flamingo","Horse","Parrot"]]
+        """
+    def take(self, indices: Indices) -> Self:
+        """
+        Select rows from a Table or RecordBatch.
+
+        See :func:`pyarrow.compute.take` for full usage.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices in the tabular object whose rows will be returned.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object with the same schema, containing the taken rows.
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.take([1, 3])
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2022,2021]]
+        n_legs: [[4,100]]
+        animals: [["Horse","Centipede"]]
+        """
+    def filter(
+        self, mask: Mask | Expression, null_selection_behavior: NullSelectionBehavior = "drop"
+    ) -> Self:
+        """
+        Select rows from the table or record batch based on a boolean mask.
+
+        The Table can be filtered based on a mask, which will be passed to
+        :func:`pyarrow.compute.filter` to perform the filtering, or it can
+        be filtered through a boolean :class:`.Expression`
+
+        Parameters
+        ----------
+        mask : Array or array-like or .Expression
+            The boolean mask or the :class:`.Expression` to filter the table with.
+        null_selection_behavior : str, default "drop"
+            How nulls in the mask should be handled, does nothing if
+            an :class:`.Expression` is used.
+
+        Returns
+        -------
+        filtered : Table or RecordBatch
+            A tabular object of the same schema, with only the rows selected
+            by applied filtering
+
+        Examples
+        --------
+        Using a Table (works similarly for RecordBatch):
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+
+        Define an expression and select rows:
+
+        >>> import pyarrow.compute as pc
+        >>> expr = pc.field("year") <= 2020
+        >>> table.filter(expr)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2019]]
+        n_legs: [[2,5]]
+        animals: [["Flamingo","Brittle stars"]]
+
+        Define a mask and select rows:
+
+        >>> mask = [True, True, False, None]
+        >>> table.filter(mask)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2022]]
+        n_legs: [[2,4]]
+        animals: [["Flamingo","Horse"]]
+        >>> table.filter(mask, null_selection_behavior="emit_null")
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2022,null]]
+        n_legs: [[2,4,null]]
+        animals: [["Flamingo","Horse",null]]
+        """
+    def to_pydict(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> dict[str, list[Any]]:
+        """
+        Convert the Table or RecordBatch to a dict or OrderedDict.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Returns
+        -------
+        dict
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> table = pa.Table.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> table.to_pydict()
+        {'n_legs': [2, 2, 4, 4, 5, 100], 'animals': ['Flamingo', 'Parrot', ..., 'Centipede']}
+        """
+    def to_pylist(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> list[dict[str, Any]]:
+        """
+        Convert the Table or RecordBatch to a list of rows / dictionaries.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Returns
+        -------
+        list
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> data = [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]]
+        >>> table = pa.table(data, names=["n_legs", "animals"])
+        >>> table.to_pylist()
+        [{'n_legs': 2, 'animals': 'Flamingo'}, {'n_legs': 4, 'animals': 'Horse'}, ...
+        """
+    def to_string(self, *, show_metadata: bool = False, preview_cols: int = 0) -> str:
+        """
+        Return human-readable string representation of Table or RecordBatch.
+
+        Parameters
+        ----------
+        show_metadata : bool, default False
+            Display Field-level and Schema-level KeyValueMetadata.
+        preview_cols : int, default 0
+            Display values of the columns for the first N columns.
+
+        Returns
+        -------
+        str
+        """
+    def remove_column(self, i: int) -> Self: ...
+    def drop_columns(self, columns: str | list[str]) -> Self:
+        """
+        Drop one or more columns and return a new Table or RecordBatch.
+
+        Parameters
+        ----------
+        columns : str or list[str]
+            Field name(s) referencing existing column(s).
+
+        Raises
+        ------
+        KeyError
+            If any of the passed column names do not exist.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object without the column(s).
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Drop one column:
+
+        >>> table.drop_columns("animals")
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,4,5,100]]
+
+        Drop one or more columns:
+
+        >>> table.drop_columns(["n_legs", "animals"])
+        pyarrow.Table
+        ...
+        ----
+        """
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self: ...
+    def append_column(
+        self, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self:
+        """
+        Append column at end of columns.
+
+        Parameters
+        ----------
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        Table or RecordBatch
+            New table or record batch with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Append column at the end:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.append_column("year", [year])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        year: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        year: [[2021,2022,2019,2021]]
+        """
+
+class RecordBatch(_Tabular[Array]):
+    """
+    Batch of rows of columns of equal length
+
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the
+    ``RecordBatch.from_*`` functions instead.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Constructing a RecordBatch from arrays:
+
+    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    Constructing a RecordBatch from pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022],
+    ...         "month": [3, 5, 7, 9],
+    ...         "day": [1, 5, 9, 13],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.RecordBatch.from_pandas(df)
+    pyarrow.RecordBatch
+    year: int64
+    month: int64
+    day: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [2020,2022,2021,2022]
+    month: [3,5,7,9]
+    day: [1,5,9,13]
+    n_legs: [2,4,5,100]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+    >>> pa.RecordBatch.from_pandas(df).to_pandas()
+       year  month  day  n_legs        animals
+    0  2020      3    1       2       Flamingo
+    1  2022      5    5       4          Horse
+    2  2021      7    9       5  Brittle stars
+    3  2022      9   13     100      Centipede
+
+    Constructing a RecordBatch from pylist:
+
+    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
+    >>> pa.RecordBatch.from_pylist(pylist).to_pandas()
+       n_legs   animals
+    0       2  Flamingo
+    1       4       Dog
+
+    You can also construct a RecordBatch using :func:`pyarrow.record_batch`:
+
+    >>> pa.record_batch([n_legs, animals], names=names).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    >>> pa.record_batch(df)
+    pyarrow.RecordBatch
+    year: int64
+    month: int64
+    day: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [2020,2022,2021,2022]
+    month: [3,5,7,9]
+    day: [1,5,9,13]
+    n_legs: [2,4,5,100]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+    """
+
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    def replace_schema_metadata(
+        self, metadata: dict[str | bytes, str | bytes] | None = None
+    ) -> Self:
+        """
+        Create shallow copy of record batch by replacing schema
+        key-value metadata with the indicated new metadata (which may be None,
+        which deletes any existing metadata
+
+        Parameters
+        ----------
+        metadata : dict, default None
+
+        Returns
+        -------
+        shallow_copy : RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+
+        Constructing a RecordBatch with schema and metadata:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64())], metadata={"n_legs": "Number of legs per animal"}
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs], schema=my_schema)
+        >>> batch.schema
+        n_legs: int64
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Shallow copy of a RecordBatch with deleted schema metadata:
+
+        >>> batch.replace_schema_metadata().schema
+        n_legs: int64
+        """
+    @property
+    def num_columns(self) -> int:
+        """
+        Number of columns
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.num_columns
+        2
+        """
+
+    @property
+    def num_rows(self) -> int:
+        """
+        Number of rows
+
+        Due to the definition of a RecordBatch, all columns have the same
+        number of rows.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.num_rows
+        6
+        """
+    @property
+    def schema(self) -> Schema:
+        """
+        Schema of the RecordBatch and its columns
+
+        Returns
+        -------
+        pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.schema
+        n_legs: int64
+        animals: string
+        """
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the record batch.
+
+        In other words, the sum of bytes from all buffer ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will only be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.nbytes
+        116
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the record batch
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.get_total_buffer_size()
+        120
+        """
+
+    def __sizeof__(self) -> int: ...
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
+    ) -> Self:
+        """
+        Add column to RecordBatch at position i.
+
+        A new record batch is returned with the column added, the original record batch
+        object is left unchanged.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        RecordBatch
+            New record batch with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+
+        Add column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> batch.add_column(0, "year", year)
+        pyarrow.RecordBatch
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [2021,2022,2019,2021]
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Original record batch is left unchanged:
+
+        >>> batch
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+    def remove_column(self, i: int) -> Self:
+        """
+        Create new RecordBatch with the indicated column removed.
+
+        Parameters
+        ----------
+        i : int
+            Index of column to remove.
+
+        Returns
+        -------
+        Table
+            New record batch without the column.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> batch.remove_column(1)
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,4,5,100]
+        """
+    def set_column(self, i: int, field_: str | Field, column: Array | list) -> Self:
+        """
+        Replace column in RecordBatch at position.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        RecordBatch
+            New record batch with the passed column set.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+
+        Replace a column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> batch.set_column(1, "year", year)
+        pyarrow.RecordBatch
+        n_legs: int64
+        year: int64
+        ----
+        n_legs: [2,4,5,100]
+        year: [2021,2022,2019,2021]
+        """
+    @overload
+    def rename_columns(self, names: list[str]) -> Self: ...
+    @overload
+    def rename_columns(self, names: dict[str, str]) -> Self: ...
+    def rename_columns(self, names):
+        """
+        Create new record batch with columns renamed to provided names.
+
+        Parameters
+        ----------
+        names : list[str] or dict[str, str]
+            List of new column names or mapping of old column names to new column names.
+
+            If a mapping of old to new column names is passed, then all columns which are
+            found to match a provided old column name will be renamed to the new column name.
+            If any column names are not found in the mapping, a KeyError will be raised.
+
+        Raises
+        ------
+        KeyError
+            If any of the column names passed in the names mapping do not exist.
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> new_names = ["n", "name"]
+        >>> batch.rename_columns(new_names)
+        pyarrow.RecordBatch
+        n: int64
+        name: string
+        ----
+        n: [2,4,5,100]
+        name: ["Flamingo","Horse","Brittle stars","Centipede"]
+        >>> new_names = {"n_legs": "n", "animals": "name"}
+        >>> batch.rename_columns(new_names)
+        pyarrow.RecordBatch
+        n: int64
+        name: string
+        ----
+        n: [2,4,5,100]
+        name: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer:
+        """
+        Write RecordBatch to Buffer as encapsulated IPC message, which does not
+        include a Schema.
+
+        To reconstruct a RecordBatch from the encapsulated IPC message Buffer
+        returned by this function, a Schema must be passed separately. See
+        Examples.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+
+        Returns
+        -------
+        serialized : Buffer
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> buf = batch.serialize()
+        >>> buf
+        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
+
+        Reconstruct RecordBatch from IPC message Buffer and original Schema
+
+        >>> pa.ipc.read_record_batch(buf, batch.schema)
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,2,4,4,5,100]
+        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this RecordBatch
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of record batch to slice
+        length : int, default None
+            Length of slice (default is until end of batch starting from
+            offset)
+
+        Returns
+        -------
+        sliced : RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+        >>> batch.slice(offset=3).to_pandas()
+           n_legs        animals
+        0       4          Horse
+        1       5  Brittle stars
+        2     100      Centipede
+        >>> batch.slice(length=2).to_pandas()
+           n_legs   animals
+        0       2  Flamingo
+        1       2    Parrot
+        >>> batch.slice(offset=3, length=1).to_pandas()
+           n_legs animals
+        0       4   Horse
+        """
+    def equals(self, other: Self, check_metadata: bool = False) -> bool:
+        """
+        Check if contents of two record batches are equal.
+
+        Parameters
+        ----------
+        other : pyarrow.RecordBatch
+            RecordBatch to compare against.
+        check_metadata : bool, default False
+            Whether schema metadata equality should be checked as well.
+
+        Returns
+        -------
+        are_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch_0 = pa.record_batch([])
+        >>> batch_1 = pa.RecordBatch.from_arrays(
+        ...     [n_legs, animals],
+        ...     names=["n_legs", "animals"],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> batch.equals(batch)
+        True
+        >>> batch.equals(batch_0)
+        False
+        >>> batch.equals(batch_1)
+        True
+        >>> batch.equals(batch_1, check_metadata=True)
+        False
+        """
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
+        """
+        Select columns of the RecordBatch.
+
+        Returns a new RecordBatch with the specified columns, and metadata
+        preserved.
+
+        Parameters
+        ----------
+        columns : list-like
+            The column names or integer indices to select.
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.record_batch([n_legs, animals], names=["n_legs", "animals"])
+
+        Select columns my indices:
+
+        >>> batch.select([1])
+        pyarrow.RecordBatch
+        animals: string
+        ----
+        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+
+        Select columns by names:
+
+        >>> batch.select(["n_legs"])
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,2,4,4,5,100]
+        """
+    def cast(
+        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
+    ) -> Self:
+        """
+        Cast record batch values to another schema.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> batch.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+
+        Define new schema and cast batch values:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
+        ... )
+        >>> batch.cast(target_schema=my_schema)
+        pyarrow.RecordBatch
+        n_legs: duration[s]
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Collection[Array],
+        names: list[str] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self:
+        """
+        Construct a RecordBatch from multiple pyarrow.Arrays
+
+        Parameters
+        ----------
+        arrays : list of pyarrow.Array
+            One for each field in RecordBatch
+        names : list of str, optional
+            Names for the batch fields. If not passed, schema must be passed
+        schema : Schema, default None
+            Schema for the created batch. If not passed, names must be passed
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> names = ["n_legs", "animals"]
+
+        Construct a RecordBatch from pyarrow Arrays using names:
+
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,2,4,4,5,100]
+        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+
+        Construct a RecordBatch from pyarrow Arrays using schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: list[str] | None = None,
+    ) -> Self:
+        """
+        Convert pandas.DataFrame to an Arrow RecordBatch
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+        schema : pyarrow.Schema, optional
+            The expected schema of the RecordBatch. This can be used to
+            indicate the type of columns if we cannot infer it automatically.
+            If passed, the output will have exactly this schema. Columns
+            specified in the schema that are not found in the DataFrame columns
+            or its index will raise an error. Additional columns or index
+            levels in the DataFrame which are not specified in the schema will
+            be ignored.
+        preserve_index : bool, optional
+            Whether to store the index as an additional column in the resulting
+            ``RecordBatch``. The default of None will store the index as a
+            column, except for RangeIndex which is stored as metadata only. Use
+            ``preserve_index=True`` to force it to be stored as a column.
+        nthreads : int, default None
+            If greater than 1, convert columns to Arrow in parallel using
+            indicated number of threads. By default, this follows
+            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
+        columns : list, optional
+           List of column to be converted. If None, use all columns.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022],
+        ...         "month": [3, 5, 7, 9],
+        ...         "day": [1, 5, 9, 13],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+
+        Convert pandas DataFrame to RecordBatch:
+
+        >>> import pyarrow as pa
+        >>> pa.RecordBatch.from_pandas(df)
+        pyarrow.RecordBatch
+        year: int64
+        month: int64
+        day: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [2020,2022,2021,2022]
+        month: [3,5,7,9]
+        day: [1,5,9,13]
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Convert pandas DataFrame to RecordBatch using schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.RecordBatch.from_pandas(df, schema=my_schema)
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Convert pandas DataFrame to RecordBatch specifying columns:
+
+        >>> pa.RecordBatch.from_pandas(df, columns=["n_legs"])
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,4,5,100]
+        """
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[scalar.StructScalar]
+    ) -> Self:
+        """
+        Construct a RecordBatch from a StructArray.
+
+        Each field in the StructArray will become a column in the resulting
+        ``RecordBatch``.
+
+        Parameters
+        ----------
+        struct_array : StructArray
+            Array to construct the record batch from.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
+        >>> pa.RecordBatch.from_struct_array(struct).to_pandas()
+          animals  n_legs    year
+        0  Parrot       2     NaN
+        1    None       4  2022.0
+        """
+    def to_struct_array(self) -> StructArray:
+        """
+        Convert to a struct array.
+        """
+    def to_tensor(
+        self,
+        null_to_nan: bool = False,
+        row_major: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Tensor:
+        """
+        Convert to a :class:`~pyarrow.Tensor`.
+
+        RecordBatches that can be converted have fields of type signed or unsigned
+        integer or float, including all bit-widths.
+
+        ``null_to_nan`` is ``False`` by default and this method will raise an error in case
+        any nulls are present. RecordBatches with nulls can be converted with ``null_to_nan``
+        set to ``True``. In this case null values are converted to ``NaN`` and integer type
+        arrays are promoted to the appropriate float type.
+
+        Parameters
+        ----------
+        null_to_nan : bool, default False
+            Whether to write null values in the result as ``NaN``.
+        row_major : bool, default True
+            Whether resulting Tensor is row-major or column-major
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch(
+        ...     [
+        ...         pa.array([1, 2, 3, 4, None], type=pa.int32()),
+        ...         pa.array([10, 20, 30, 40, None], type=pa.float32()),
+        ...     ],
+        ...     names=["a", "b"],
+        ... )
+
+        >>> batch
+        pyarrow.RecordBatch
+        a: int32
+        b: float
+        ----
+        a: [1,2,3,4,null]
+        b: [10,20,30,40,null]
+
+        Convert a RecordBatch to row-major Tensor with null values
+        written as ``NaN``s
+
+        >>> batch.to_tensor(null_to_nan=True)
+        <pyarrow.Tensor>
+        type: double
+        shape: (5, 2)
+        strides: (16, 8)
+        >>> batch.to_tensor(null_to_nan=True).to_numpy()
+        array([[ 1., 10.],
+               [ 2., 20.],
+               [ 3., 30.],
+               [ 4., 40.],
+               [nan, nan]])
+
+        Convert a RecordBatch to column-major Tensor
+
+        >>> batch.to_tensor(null_to_nan=True, row_major=False)
+        <pyarrow.Tensor>
+        type: double
+        shape: (5, 2)
+        strides: (8, 40)
+        >>> batch.to_tensor(null_to_nan=True, row_major=False).to_numpy()
+        array([[ 1., 10.],
+               [ 2., 20.],
+               [ 3., 30.],
+               [ 4., 40.],
+               [nan, nan]])
+        """
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0):
+        """
+        Export to a C ArrowArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the record batch
+        schema is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, schema: Schema) -> Self:
+        """
+        Import RecordBatch from a C ArrowArray struct, given its pointer
+        and the imported schema.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        type: Schema or int
+            Either a Schema object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_array__(self, requested_schema=None):
+        """
+        Get a pair of PyCapsules containing a C ArrowArray representation of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the batch to this schema.
+            If None, the batch will be returned as-is, with a schema matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
+            respectively.
+        """
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export the batch as an Arrow C stream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+            Currently, this is not supported and will raise a
+            NotImplementedError if the schema doesn't match the current schema.
+
+        Returns
+        -------
+        PyCapsule
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self:
+        """
+        Import RecordBatch from a pair of PyCapsules containing a C ArrowSchema
+        and ArrowArray, respectively.
+
+        Parameters
+        ----------
+        schema_capsule : PyCapsule
+            A PyCapsule containing a C ArrowSchema representation of the schema.
+        array_capsule : PyCapsule
+            A PyCapsule containing a C ArrowArray representation of the array.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+        """
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
+        """
+        Export to a C ArrowDeviceArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the record batch
+        schema is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, schema: Schema) -> Self:
+        """
+        Import RecordBatch from a C ArrowDeviceArray struct, given its pointer
+        and the imported schema.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        type: Schema or int
+            Either a Schema object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs):
+        """
+        Get a pair of PyCapsules containing a C ArrowDeviceArray representation
+        of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the batch to this data type.
+            If None, the batch will be returned as-is, with a type matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+        kwargs
+            Currently no additional keyword arguments are supported, but
+            this method will accept any keyword with a value of ``None``
+            for compatibility with future keywords.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowDeviceArray,
+            respectively.
+        """
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self:
+        """
+        Import RecordBatch from a pair of PyCapsules containing a
+        C ArrowSchema and ArrowDeviceArray, respectively.
+
+        Parameters
+        ----------
+        schema_capsule : PyCapsule
+            A PyCapsule containing a C ArrowSchema representation of the schema.
+        array_capsule : PyCapsule
+            A PyCapsule containing a C ArrowDeviceArray representation of the array.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        The device type where the arrays in the RecordBatch reside.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether the RecordBatch's arrays are CPU-accessible.
+        """
+    def copy_to(self, destination: MemoryManager | Device) -> Self:
+        """
+        Copy the entire RecordBatch to destination device.
+
+        This copies each column of the record batch to create
+        a new record batch where all underlying buffers for the columns have
+        been copied to the destination MemoryManager.
+
+        Parameters
+        ----------
+        destination : pyarrow.MemoryManager or pyarrow.Device
+            The destination device to copy the array to.
+
+        Returns
+        -------
+        RecordBatch
+        """
+
+def table_to_blocks(options, table: Table, categories, extension_columns): ...
+
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+
+class Table(_Tabular[ChunkedArray[Any]]):
+    """
+    A collection of top-level named, equal length Arrow arrays.
+
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the ``from_*``
+    methods instead.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Construct a Table from arrays:
+
+    >>> pa.Table.from_arrays([n_legs, animals], names=names)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from a RecordBatch:
+
+    >>> batch = pa.record_batch([n_legs, animals], names=names)
+    >>> pa.Table.from_batches([batch])
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2019, 2021],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.Table.from_pandas(df)
+    pyarrow.Table
+    year: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [[2020,2022,2019,2021]]
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from a dictionary of arrays:
+
+    >>> pydict = {"n_legs": n_legs, "animals": animals}
+    >>> pa.Table.from_pydict(pydict)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    >>> pa.Table.from_pydict(pydict).schema
+    n_legs: int64
+    animals: string
+
+    Construct a Table from a dictionary of arrays with metadata:
+
+    >>> my_metadata = {"n_legs": "Number of legs per animal"}
+    >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+
+    Construct a Table from a list of rows:
+
+    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"year": 2021, "animals": "Centipede"}]
+    >>> pa.Table.from_pylist(pylist)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,null]]
+    animals: [["Flamingo","Centipede"]]
+
+    Construct a Table from a list of rows with pyarrow schema:
+
+    >>> my_schema = pa.schema(
+    ...     [
+    ...         pa.field("year", pa.int64()),
+    ...         pa.field("n_legs", pa.int64()),
+    ...         pa.field("animals", pa.string()),
+    ...     ],
+    ...     metadata={"year": "Year of entry"},
+    ... )
+    >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
+    year: int64
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    year: 'Year of entry'
+
+    Construct a Table with :func:`pyarrow.table`:
+
+    >>> pa.table([n_legs, animals], names=names)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    """
+
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this Table.
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of table to slice.
+        length : int, default None
+            Length of slice (default is until end of table starting from
+            offset).
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.slice(length=3)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2022,2019]]
+        n_legs: [[2,4,5]]
+        animals: [["Flamingo","Horse","Brittle stars"]]
+        >>> table.slice(offset=2)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2019,2021]]
+        n_legs: [[5,100]]
+        animals: [["Brittle stars","Centipede"]]
+        >>> table.slice(offset=2, length=1)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2019]]
+        n_legs: [[5]]
+        animals: [["Brittle stars"]]
+        """
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
+        """
+        Select columns of the Table.
+
+        Returns a new Table with the specified columns, and metadata
+        preserved.
+
+        Parameters
+        ----------
+        columns : list-like
+            The column names or integer indices to select.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.select([0, 1])
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        ----
+        year: [[2020,2022,2019,2021]]
+        n_legs: [[2,4,5,100]]
+        >>> table.select(["year"])
+        pyarrow.Table
+        year: int64
+        ----
+        year: [[2020,2022,2019,2021]]
+        """
+    def replace_schema_metadata(
+        self, metadata: dict[str | bytes, str | bytes] | None = None
+    ) -> Self:
+        """
+        Create shallow copy of table by replacing schema
+        key-value metadata with the indicated new metadata (which may be None),
+        which deletes any existing metadata.
+
+        Parameters
+        ----------
+        metadata : dict, default None
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Constructing a Table with pyarrow schema and metadata:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> table = pa.table(df, my_schema)
+        >>> table.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        pandas: ...
+
+        Create a shallow copy of a Table with deleted schema metadata:
+
+        >>> table.replace_schema_metadata().schema
+        n_legs: int64
+        animals: string
+
+        Create a shallow copy of a Table with new schema metadata:
+
+        >>> metadata = {"animals": "Which animal"}
+        >>> table.replace_schema_metadata(metadata=metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        animals: 'Which animal'
+        """
+    def flatten(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Flatten this Table.
+
+        Each column with a struct type is flattened
+        into one column per struct field.  Other columns are left unchanged.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
+        >>> month = pa.array([4, 6])
+        >>> table = pa.Table.from_arrays([struct, month], names=["a", "month"])
+        >>> table
+        pyarrow.Table
+        a: struct<animals: string, n_legs: int64, year: int64>
+          child 0, animals: string
+          child 1, n_legs: int64
+          child 2, year: int64
+        month: int64
+        ----
+        a: [
+          -- is_valid: all not null
+          -- child 0 type: string
+        ["Parrot",null]
+          -- child 1 type: int64
+        [2,4]
+          -- child 2 type: int64
+        [null,2022]]
+        month: [[4,6]]
+
+        Flatten the columns with struct field:
+
+        >>> table.flatten()
+        pyarrow.Table
+        a.animals: string
+        a.n_legs: int64
+        a.year: int64
+        month: int64
+        ----
+        a.animals: [["Parrot",null]]
+        a.n_legs: [[2,4]]
+        a.year: [[null,2022]]
+        month: [[4,6]]
+        """
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Make a new table by combining the chunks this table has.
+
+        All the underlying chunks in the ChunkedArray of each column are
+        concatenated into zero or one chunk.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> animals = pa.chunked_array(
+        ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
+        ... )
+        >>> names = ["n_legs", "animals"]
+        >>> table = pa.table([n_legs, animals], names=names)
+        >>> table
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,2,4],[4,5,100]]
+        animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
+        >>> table.combine_chunks()
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,2,4,4,5,100]]
+        animals: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
+        """
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Unify dictionaries across all chunks.
+
+        This method returns an equivalent table, but where all chunks of
+        each column share the same dictionary values.  Dictionary indices
+        are transposed accordingly.
+
+        Columns without dictionaries are returned unchanged.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
+        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
+        >>> c_arr = pa.chunked_array([arr_1, arr_2])
+        >>> table = pa.table([c_arr], names=["animals"])
+        >>> table
+        pyarrow.Table
+        animals: dictionary<values=string, indices=int32, ordered=0>
+        ----
+        animals: [  -- dictionary:
+        ["Flamingo","Parrot","Dog"]  -- indices:
+        [0,1,2],  -- dictionary:
+        ["Horse","Brittle stars","Centipede"]  -- indices:
+        [0,1,2]]
+
+        Unify dictionaries across both chunks:
+
+        >>> table.unify_dictionaries()
+        pyarrow.Table
+        animals: dictionary<values=string, indices=int32, ordered=0>
+        ----
+        animals: [  -- dictionary:
+        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
+        [0,1,2],  -- dictionary:
+        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
+        [3,4,5]]
+        """
+    def equals(self, other: Self, check_metadata: bool = False) -> Self:
+        """
+        Check if contents of two tables are equal.
+
+        Parameters
+        ----------
+        other : pyarrow.Table
+            Table to compare against.
+        check_metadata : bool, default False
+            Whether schema metadata equality should be checked as well.
+
+        Returns
+        -------
+        bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> names = ["n_legs", "animals"]
+        >>> table = pa.Table.from_arrays([n_legs, animals], names=names)
+        >>> table_0 = pa.Table.from_arrays([])
+        >>> table_1 = pa.Table.from_arrays(
+        ...     [n_legs, animals], names=names, metadata={"n_legs": "Number of legs per animal"}
+        ... )
+        >>> table.equals(table)
+        True
+        >>> table.equals(table_0)
+        False
+        >>> table.equals(table_1)
+        True
+        >>> table.equals(table_1, check_metadata=True)
+        False
+        """
+    def cast(
+        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
+    ) -> Self:
+        """
+        Cast table values to another schema.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+
+        Define new schema and cast table values:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
+        ... )
+        >>> table.cast(target_schema=my_schema)
+        pyarrow.Table
+        n_legs: duration[s]
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: list[str] | None = None,
+        safe: bool = True,
+    ) -> Self:
+        """
+        Convert pandas.DataFrame to an Arrow Table.
+
+        The column types in the resulting Arrow Table are inferred from the
+        dtypes of the pandas.Series in the DataFrame. In the case of non-object
+        Series, the NumPy dtype is translated to its Arrow equivalent. In the
+        case of `object`, we need to guess the datatype by looking at the
+        Python objects in this Series.
+
+        Be aware that Series of the `object` dtype don't carry enough
+        information to always lead to a meaningful Arrow type. In the case that
+        we cannot infer a type, e.g. because the DataFrame is of length 0 or
+        the Series only contains None/nan objects, the type is set to
+        null. This behavior can be avoided by constructing an explicit schema
+        and passing it to this function.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+        schema : pyarrow.Schema, optional
+            The expected schema of the Arrow Table. This can be used to
+            indicate the type of columns if we cannot infer it automatically.
+            If passed, the output will have exactly this schema. Columns
+            specified in the schema that are not found in the DataFrame columns
+            or its index will raise an error. Additional columns or index
+            levels in the DataFrame which are not specified in the schema will
+            be ignored.
+        preserve_index : bool, optional
+            Whether to store the index as an additional column in the resulting
+            ``Table``. The default of None will store the index as a column,
+            except for RangeIndex which is stored as metadata only. Use
+            ``preserve_index=True`` to force it to be stored as a column.
+        nthreads : int, default None
+            If greater than 1, convert columns to Arrow in parallel using
+            indicated number of threads. By default, this follows
+            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
+        columns : list, optional
+           List of column to be converted. If None, use all columns.
+        safe : bool, default True
+           Check for overflows or other unsafe conversions.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> pa.Table.from_pandas(df)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Collection[ArrayOrChunkedArray[Any]],
+        names: list[str] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self:
+        """
+        Construct a Table from Arrow arrays.
+
+        Parameters
+        ----------
+        arrays : list of pyarrow.Array or pyarrow.ChunkedArray
+            Equal-length arrays that should form the table.
+        names : list of str, optional
+            Names for the table columns. If not passed, schema must be passed.
+        schema : Schema, default None
+            Schema for the created table. If not passed, names must be passed.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> names = ["n_legs", "animals"]
+
+        Construct a Table from arrays:
+
+        >>> pa.Table.from_arrays([n_legs, animals], names=names)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+        Construct a Table from arrays with metadata:
+
+        >>> my_metadata = {"n_legs": "Number of legs per animal"}
+        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Construct a Table from arrays with pyarrow schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"animals": "Name of the animal species"},
+        ... )
+        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        animals: 'Name of the animal species'
+        """
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[scalar.StructScalar]
+    ) -> Self:
+        """
+        Construct a Table from a StructArray.
+
+        Each field in the StructArray will become a column in the resulting
+        ``Table``.
+
+        Parameters
+        ----------
+        struct_array : StructArray or ChunkedArray
+            Array to construct the table from.
+
+        Returns
+        -------
+        pyarrow.Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
+        >>> pa.Table.from_struct_array(struct).to_pandas()
+          animals  n_legs    year
+        0  Parrot       2     NaN
+        1    None       4  2022.0
+        """
+    def to_struct_array(
+        self, max_chunksize: int | None = None
+    ) -> ChunkedArray[scalar.StructScalar]:
+        """
+        Convert to a chunked array of struct type.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum number of rows for ChunkedArray chunks. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        ChunkedArray
+        """
+    @classmethod
+    def from_batches(cls, batches: Iterable[RecordBatch], schema: Schema | None = None) -> Self:
+        """
+        Construct a Table from a sequence or iterator of Arrow RecordBatches.
+
+        Parameters
+        ----------
+        batches : sequence or iterator of RecordBatch
+            Sequence of RecordBatch to be converted, all schemas must be equal.
+        schema : Schema, default None
+            If not passed, will be inferred from the first RecordBatch.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> names = ["n_legs", "animals"]
+        >>> batch = pa.record_batch([n_legs, animals], names=names)
+        >>> batch.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+
+        Construct a Table from a RecordBatch:
+
+        >>> pa.Table.from_batches([batch])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+        Construct a Table from a sequence of RecordBatches:
+
+        >>> pa.Table.from_batches([batch, batch])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100],[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    def to_batches(self, max_chunksize: int | None = None) -> list[RecordBatch]:
+        """
+        Convert Table to a list of RecordBatch objects.
+
+        Note that this method is zero-copy, it merely exposes the same data
+        under a different API.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum number of rows for each RecordBatch chunk. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        list[RecordBatch]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Convert a Table to a RecordBatch:
+
+        >>> table.to_batches()[0].to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+
+        Convert a Table to a list of RecordBatches:
+
+        >>> table.to_batches(max_chunksize=2)[0].to_pandas()
+           n_legs   animals
+        0       2  Flamingo
+        1       4     Horse
+        >>> table.to_batches(max_chunksize=2)[1].to_pandas()
+           n_legs        animals
+        0       5  Brittle stars
+        1     100      Centipede
+        """
+    def to_reader(self, max_chunksize: int | None = None) -> RecordBatchReader:
+        """
+        Convert the Table to a RecordBatchReader.
+
+        Note that this method is zero-copy, it merely exposes the same data
+        under a different API.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum number of rows for each RecordBatch chunk. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        RecordBatchReader
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Convert a Table to a RecordBatchReader:
+
+        >>> table.to_reader()
+        <pyarrow.lib.RecordBatchReader object at ...>
+
+        >>> reader = table.to_reader()
+        >>> reader.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+        >>> reader.read_all()
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    @property
+    def schema(self) -> Schema:
+        """
+        Schema of the table and its columns.
+
+        Returns
+        -------
+        Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, "' ...
+        """
+    @property
+    def num_columns(self) -> int:
+        """
+        Number of columns in this table.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.num_columns
+        2
+        """
+    @property
+    def num_rows(self) -> int:
+        """
+        Number of rows in this table.
+
+        Due to the definition of a table, all columns have the same number of
+        rows.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.num_rows
+        4
+        """
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the table.
+
+        In other words, the sum of bytes from all buffer ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will only be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.nbytes
+        72
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the table.
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.get_total_buffer_size()
+        76
+        """
+    def __sizeof__(self) -> int: ...
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self:
+        """
+        Add column to Table at position.
+
+        A new table is returned with the column added, the original table
+        object is left unchanged.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array, list of Array, or values coercible to arrays
+            Column data.
+
+        Returns
+        -------
+        Table
+            New table with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Add column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.add_column(0, "year", [year])
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2021,2022,2019,2021]]
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+        Original table is left unchanged:
+
+        >>> table
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    def remove_column(self, i: int) -> Self:
+        """
+        Create new Table with the indicated column removed.
+
+        Parameters
+        ----------
+        i : int
+            Index of column to remove.
+
+        Returns
+        -------
+        Table
+            New table without the column.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.remove_column(1)
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        """
+    def set_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self:
+        """
+        Replace column in Table at position.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array, list of Array, or values coercible to arrays
+            Column data.
+
+        Returns
+        -------
+        Table
+            New table with the passed column set.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Replace a column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.set_column(1, "year", [year])
+        pyarrow.Table
+        n_legs: int64
+        year: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        year: [[2021,2022,2019,2021]]
+        """
+    @overload
+    def rename_columns(self, names: list[str]) -> Self: ...
+    @overload
+    def rename_columns(self, names: dict[str, str]) -> Self: ...
+    def rename_columns(self, names):
+        """
+        Create new table with columns renamed to provided names.
+
+        Parameters
+        ----------
+        names : list[str] or dict[str, str]
+            List of new column names or mapping of old column names to new column names.
+
+            If a mapping of old to new column names is passed, then all columns which are
+            found to match a provided old column name will be renamed to the new column name.
+            If any column names are not found in the mapping, a KeyError will be raised.
+
+        Raises
+        ------
+        KeyError
+            If any of the column names passed in the names mapping do not exist.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> new_names = ["n", "name"]
+        >>> table.rename_columns(new_names)
+        pyarrow.Table
+        n: int64
+        name: string
+        ----
+        n: [[2,4,5,100]]
+        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> new_names = {"n_legs": "n", "animals": "name"}
+        >>> table.rename_columns(new_names)
+        pyarrow.Table
+        n: int64
+        name: string
+        ----
+        n: [[2,4,5,100]]
+        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    def drop(self, columns: str | list[str]) -> Self:
+        """
+        Drop one or more columns and return a new table.
+
+        Alias of Table.drop_columns, but kept for backwards compatibility.
+
+        Parameters
+        ----------
+        columns : str or list[str]
+            Field name(s) referencing existing column(s).
+
+        Returns
+        -------
+        Table
+            New table without the column(s).
+        """
+    def group_by(self, keys: str | list[str], use_threads: bool = True) -> TableGroupBy:
+        """
+        Declare a grouping over the columns of the table.
+
+        Resulting grouping can then be used to perform aggregations
+        with a subsequent ``aggregate()`` method.
+
+        Parameters
+        ----------
+        keys : str or list[str]
+            Name of the columns that should be used as the grouping key.
+        use_threads : bool, default True
+            Whether to use multithreading or not. When set to True (the
+            default), no stable ordering of the output is guaranteed.
+
+        Returns
+        -------
+        TableGroupBy
+
+        See Also
+        --------
+        TableGroupBy.aggregate
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.group_by("year").aggregate([("n_legs", "sum")])
+        pyarrow.Table
+        year: int64
+        n_legs_sum: int64
+        ----
+        year: [[2020,2022,2021,2019]]
+        n_legs_sum: [[2,6,104,5]]
+        """
+    def join(
+        self,
+        right_table: Self,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> Self:
+        """
+        Perform a join between this table and another one.
+
+        Result of the join will be a new Table, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_table : Table
+            The table to join to the current one, acting as the right table
+            in the join operation.
+        keys : str or list[str]
+            The columns from current table that should be used as keys
+            of the join operation left side.
+        right_keys : str or list[str], default None
+            The columns from the right_table that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left table.
+        join_type : str, default "left outer"
+            The kind of join that should be performed, one of
+            ("left semi", "right semi", "left anti", "right anti",
+            "inner", "left outer", "right outer", "full outer")
+        left_suffix : str, default None
+            Which suffix to add to left column names. This prevents confusion
+            when the columns in left and right tables have colliding names.
+        right_suffix : str, default None
+            Which suffix to add to the right column names. This prevents confusion
+            when the columns in left and right tables have colliding names.
+        coalesce_keys : bool, default True
+            If the duplicated keys should be omitted from one of the sides
+            in the join result.
+        use_threads : bool, default True
+            Whether to use multithreading or not.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df1 = pd.DataFrame({"id": [1, 2, 3], "year": [2020, 2022, 2019]})
+        >>> df2 = pd.DataFrame(
+        ...     {"id": [3, 4], "n_legs": [5, 100], "animal": ["Brittle stars", "Centipede"]}
+        ... )
+        >>> t1 = pa.Table.from_pandas(df1)
+        >>> t2 = pa.Table.from_pandas(df2)
+
+        Left outer join:
+
+        >>> t1.join(t2, "id").combine_chunks().sort_by("year")
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[3,1,2]]
+        year: [[2019,2020,2022]]
+        n_legs: [[5,null,null]]
+        animal: [["Brittle stars",null,null]]
+
+        Full outer join:
+
+        >>> t1.join(t2, "id", join_type="full outer").combine_chunks().sort_by("year")
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[3,1,2,4]]
+        year: [[2019,2020,2022,null]]
+        n_legs: [[5,null,null,100]]
+        animal: [["Brittle stars",null,null,"Centipede"]]
+
+        Right outer join:
+
+        >>> t1.join(t2, "id", join_type="right outer").combine_chunks().sort_by("year")
+        pyarrow.Table
+        year: int64
+        id: int64
+        n_legs: int64
+        animal: string
+        ----
+        year: [[2019,null]]
+        id: [[3,4]]
+        n_legs: [[5,100]]
+        animal: [["Brittle stars","Centipede"]]
+
+        Right anti join
+
+        >>> t1.join(t2, "id", join_type="right anti")
+        pyarrow.Table
+        id: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[4]]
+        n_legs: [[100]]
+        animal: [["Centipede"]]
+        """
+    def join_asof(
+        self,
+        right_table: Self,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> Self:
+        """
+        Perform an asof join between this table and another one.
+
+        This is similar to a left-join except that we match on nearest key rather
+        than equal keys. Both tables must be sorted by the key. This type of join
+        is most useful for time series data that are not perfectly aligned.
+
+        Optionally match on equivalent keys with "by" before searching with "on".
+
+        Result of the join will be a new Table, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_table : Table
+            The table to join to the current one, acting as the right table
+            in the join operation.
+        on : str
+            The column from current table that should be used as the "on" key
+            of the join operation left side.
+
+            An inexact match is used on the "on" key, i.e. a row is considered a
+            match if and only if left_on - tolerance <= right_on <= left_on.
+
+            The input dataset must be sorted by the "on" key. Must be a single
+            field of a common type.
+
+            Currently, the "on" key must be an integer, date, or timestamp type.
+        by : str or list[str]
+            The columns from current table that should be used as the keys
+            of the join operation left side. The join operation is then done
+            only for the matches in these columns.
+        tolerance : int
+            The tolerance for inexact "on" key matching. A right row is considered
+            a match with the left row ``right.on - left.on <= tolerance``. The
+            ``tolerance`` may be:
+
+            - negative, in which case a past-as-of-join occurs;
+            - or positive, in which case a future-as-of-join occurs;
+            - or zero, in which case an exact-as-of-join occurs.
+
+            The tolerance is interpreted in the same units as the "on" key.
+        right_on : str or list[str], default None
+            The columns from the right_table that should be used as the on key
+            on the join operation right side.
+            When ``None`` use the same key name as the left table.
+        right_by : str or list[str], default None
+            The columns from the right_table that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left table.
+
+        Returns
+        -------
+        Table
+
+        Example
+        --------
+        >>> import pyarrow as pa
+        >>> t1 = pa.table({"id": [1, 3, 2, 3, 3], "year": [2020, 2021, 2022, 2022, 2023]})
+        >>> t2 = pa.table(
+        ...     {
+        ...         "id": [3, 4],
+        ...         "year": [2020, 2021],
+        ...         "n_legs": [5, 100],
+        ...         "animal": ["Brittle stars", "Centipede"],
+        ...     }
+        ... )
+
+        >>> t1.join_asof(t2, on="year", by="id", tolerance=-2)
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[1,3,2,3,3]]
+        year: [[2020,2021,2022,2022,2023]]
+        n_legs: [[null,5,null,5,null]]
+        animal: [[null,"Brittle stars",null,"Brittle stars",null]]
+        """
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export the table as an Arrow C stream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+            Currently, this is not supported and will raise a
+            NotImplementedError if the schema doesn't match the current schema.
+
+        Returns
+        -------
+        PyCapsule
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether all ChunkedArrays are CPU-accessible.
+        """
+
+def record_batch(
+    data: dict[str, list[Any] | Array[Any]]
+    | Collection[Array[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowDeviceArray,
+    names: list[str] | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+) -> RecordBatch:
+    """
+    Create a pyarrow.RecordBatch from another Python data structure or sequence
+    of arrays.
+
+    Parameters
+    ----------
+    data : dict, list, pandas.DataFrame, Arrow-compatible table
+        A mapping of strings to Arrays or Python lists, a list of Arrays,
+        a pandas DataFame, or any tabular object implementing the
+        Arrow PyCapsule Protocol (has an ``__arrow_c_array__`` or
+        ``__arrow_c_device_array__`` method).
+    names : list, default None
+        Column names if list of arrays passed as data. Mutually exclusive with
+        'schema' argument.
+    schema : Schema, default None
+        The expected schema of the RecordBatch. If not passed, will be inferred
+        from the data. Mutually exclusive with 'names' argument.
+    metadata : dict or Mapping, default None
+        Optional metadata for the schema (if schema not passed).
+
+    Returns
+    -------
+    RecordBatch
+
+    See Also
+    --------
+    RecordBatch.from_arrays, RecordBatch.from_pandas, table
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Construct a RecordBatch from a python dictionary:
+
+    >>> pa.record_batch({"n_legs": n_legs, "animals": animals})
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+    >>> pa.record_batch({"n_legs": n_legs, "animals": animals}).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    Creating a RecordBatch from a list of arrays with names:
+
+    >>> pa.record_batch([n_legs, animals], names=names)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+
+    Creating a RecordBatch from a list of arrays with names and metadata:
+
+    >>> my_metadata = {"n_legs": "How many legs does an animal have?"}
+    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'How many legs does an animal have?'
+
+    Creating a RecordBatch from a pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022],
+    ...         "month": [3, 5, 7, 9],
+    ...         "day": [1, 5, 9, 13],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.record_batch(df)
+    pyarrow.RecordBatch
+    year: int64
+    month: int64
+    day: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [2020,2022,2021,2022]
+    month: [3,5,7,9]
+    day: [1,5,9,13]
+    n_legs: [2,4,5,100]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+    >>> pa.record_batch(df).to_pandas()
+       year  month  day  n_legs        animals
+    0  2020      3    1       2       Flamingo
+    1  2022      5    5       4          Horse
+    2  2021      7    9       5  Brittle stars
+    3  2022      9   13     100      Centipede
+
+    Creating a RecordBatch from a pandas DataFrame with schema:
+
+    >>> my_schema = pa.schema(
+    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"},
+    ... )
+    >>> pa.record_batch(df, my_schema).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+    pandas: ...
+    >>> pa.record_batch(df, my_schema).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       4          Horse
+    2       5  Brittle stars
+    3     100      Centipede
+    """
+
+@overload
+def table(
+    data: dict[str, list[Any] | Array[Any]],
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+    nthreads: int | None = None,
+) -> Table: ...
+@overload
+def table(
+    data: Collection[ArrayOrChunkedArray[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowStream
+    | SupportArrowDeviceArray,
+    names: list[str] | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+    nthreads: int | None = None,
+) -> Table: ...
+def table(*args, **kwargs):
+    """
+    Create a pyarrow.Table from a Python data structure or sequence of arrays.
+
+    Parameters
+    ----------
+    data : dict, list, pandas.DataFrame, Arrow-compatible table
+        A mapping of strings to Arrays or Python lists, a list of arrays or
+        chunked arrays, a pandas DataFame, or any tabular object implementing
+        the Arrow PyCapsule Protocol (has an ``__arrow_c_array__``,
+        ``__arrow_c_device_array__`` or ``__arrow_c_stream__`` method).
+    names : list, default None
+        Column names if list of arrays passed as data. Mutually exclusive with
+        'schema' argument.
+    schema : Schema, default None
+        The expected schema of the Arrow Table. If not passed, will be inferred
+        from the data. Mutually exclusive with 'names' argument.
+        If passed, the output will have exactly this schema (raising an error
+        when columns are not found in the data and ignoring additional data not
+        specified in the schema, when data is a dict or DataFrame).
+    metadata : dict or Mapping, default None
+        Optional metadata for the schema (if schema not passed).
+    nthreads : int, default None
+        For pandas.DataFrame inputs: if greater than 1, convert columns to
+        Arrow in parallel using indicated number of threads. By default,
+        this follows :func:`pyarrow.cpu_count` (may use up to system CPU count
+        threads).
+
+    Returns
+    -------
+    Table
+
+    See Also
+    --------
+    Table.from_arrays, Table.from_pandas, Table.from_pydict
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Construct a Table from a python dictionary:
+
+    >>> pa.table({"n_legs": n_legs, "animals": animals})
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from arrays:
+
+    >>> pa.table([n_legs, animals], names=names)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from arrays with metadata:
+
+    >>> my_metadata = {"n_legs": "Number of legs per animal"}
+    >>> pa.table([n_legs, animals], names=names, metadata=my_metadata).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+
+    Construct a Table from pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2019, 2021],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.table(df)
+    pyarrow.Table
+    year: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [[2020,2022,2019,2021]]
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from pandas DataFrame with pyarrow schema:
+
+    >>> my_schema = pa.schema(
+    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"},
+    ... )
+    >>> pa.table(df, my_schema).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+    pandas: '{"index_columns": [], "column_indexes": [{"name": null, ...
+
+    Construct a Table from chunked arrays:
+
+    >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+    >>> animals = pa.chunked_array(
+    ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
+    ... )
+    >>> table = pa.table([n_legs, animals], names=names)
+    >>> table
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,2,4],[4,5,100]]
+    animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
+    """
+
+def concat_tables(
+    tables: Iterable[Table],
+    memory_pool: MemoryPool | None = None,
+    promote_options: Literal["none", "default", "permissive"] = "none",
+    **kwargs: Any,
+) -> Table:
+    """
+    Concatenate pyarrow.Table objects.
+
+    If promote_options="none", a zero-copy concatenation will be performed. The schemas
+    of all the Tables must be the same (except the metadata), otherwise an
+    exception will be raised. The result Table will share the metadata with the
+    first table.
+
+    If promote_options="default", any null type arrays will be casted to the type of other
+    arrays in the column of the same name. If a table is missing a particular
+    field, null values of the appropriate type will be generated to take the
+    place of the missing field. The new schema will share the metadata with the
+    first table. Each field in the new schema will share the metadata with the
+    first table which has the field defined. Note that type promotions may
+    involve additional allocations on the given ``memory_pool``.
+
+    If promote_options="permissive", the behavior of default plus types will be promoted
+    to the common denominator that fits all the fields.
+
+    Parameters
+    ----------
+    tables : iterable of pyarrow.Table objects
+        Pyarrow tables to concatenate into a single Table.
+    memory_pool : MemoryPool, default None
+        For memory allocations, if required, otherwise use default pool.
+    promote_options : str, default none
+        Accepts strings "none", "default" and "permissive".
+    **kwargs : dict, optional
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> t1 = pa.table(
+    ...     [
+    ...         pa.array([2, 4, 5, 100]),
+    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
+    ...     ],
+    ...     names=["n_legs", "animals"],
+    ... )
+    >>> t2 = pa.table([pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"])
+    >>> pa.concat_tables([t1, t2])
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100],[2,4]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Parrot","Dog"]]
+
+    """
+
+class TableGroupBy:
+    """
+    A grouping of columns in a table on which to perform aggregations.
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+        Input table to execute the aggregation on.
+    keys : str or list[str]
+        Name of the grouped columns.
+    use_threads : bool, default True
+        Whether to use multithreading or not. When set to True (the default),
+        no stable ordering of the output is guaranteed.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> t = pa.table(
+    ...     [
+    ...         pa.array(["a", "a", "b", "b", "c"]),
+    ...         pa.array([1, 2, 3, 4, 5]),
+    ...     ],
+    ...     names=["keys", "values"],
+    ... )
+
+    Grouping of columns:
+
+    >>> pa.TableGroupBy(t, "keys")
+    <pyarrow.lib.TableGroupBy object at ...>
+
+    Perform aggregations:
+
+    >>> pa.TableGroupBy(t, "keys").aggregate([("values", "sum")])
+    pyarrow.Table
+    keys: string
+    values_sum: int64
+    ----
+    keys: [["a","b","c"]]
+    values_sum: [[3,7,5]]
+    """
+
+    keys: str | list[str]
+    def __init__(self, table: Table, keys: str | list[str], use_threads: bool = True): ...
+    def aggregate(
+        self,
+        aggregations: Iterable[
+            tuple[ColumnSelector, Aggregation]
+            | tuple[ColumnSelector, Aggregation, AggregateOptions | None]
+        ],
+    ) -> Table:
+        """
+        Perform an aggregation over the grouped columns of the table.
+
+        Parameters
+        ----------
+        aggregations : list[tuple(str, str)] or \
+list[tuple(str, str, FunctionOptions)]
+            List of tuples, where each tuple is one aggregation specification
+            and consists of: aggregation column name followed
+            by function name and optionally aggregation function option.
+            Pass empty list to get a single row for each group.
+            The column name can be a string, an empty list or a list of
+            column names, for unary, nullary and n-ary aggregation functions
+            respectively.
+
+            For the list of function names and respective aggregation
+            function options see :ref:`py-grouped-aggrs`.
+
+        Returns
+        -------
+        Table
+            Results of the aggregation functions.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.table([
+        ...       pa.array(["a", "a", "b", "b", "c"]),
+        ...       pa.array([1, 2, 3, 4, 5]),
+        ... ], names=["keys", "values"])
+
+        Sum the column "values" over the grouped column "keys":
+
+        >>> t.group_by("keys").aggregate([("values", "sum")])
+        pyarrow.Table
+        keys: string
+        values_sum: int64
+        ----
+        keys: [["a","b","c"]]
+        values_sum: [[3,7,5]]
+
+        Count the rows over the grouped column "keys":
+
+        >>> t.group_by("keys").aggregate([([], "count_all")])
+        pyarrow.Table
+        keys: string
+        count_all: int64
+        ----
+        keys: [["a","b","c"]]
+        count_all: [[2,2,1]]
+
+        Do multiple aggregations:
+
+        >>> t.group_by("keys").aggregate([
+        ...    ("values", "sum"),
+        ...    ("keys", "count")
+        ... ])
+        pyarrow.Table
+        keys: string
+        values_sum: int64
+        keys_count: int64
+        ----
+        keys: [["a","b","c"]]
+        values_sum: [[3,7,5]]
+        keys_count: [[2,2,1]]
+
+        Count the number of non-null values for column "values"
+        over the grouped column "keys":
+
+        >>> import pyarrow.compute as pc
+        >>> t.group_by(["keys"]).aggregate([
+        ...    ("values", "count", pc.CountOptions(mode="only_valid"))
+        ... ])
+        pyarrow.Table
+        keys: string
+        values_count: int64
+        ----
+        keys: [["a","b","c"]]
+        values_count: [[2,2,1]]
+
+        Get a single row for each group in column "keys":
+
+        >>> t.group_by("keys").aggregate([])
+        pyarrow.Table
+        keys: string
+        ----
+        keys: [["a","b","c"]]
+        """
+    def _table(self) -> Table: ...
+    @property
+    def _use_threads(self) -> bool: ...
+
+def concat_batches(
+    recordbatches: Iterable[RecordBatch], memory_pool: MemoryPool | None = None
+) -> RecordBatch:
+    """
+    Concatenate pyarrow.RecordBatch objects.
+
+    All recordbatches must share the same Schema,
+    the operation implies a copy of the data to merge
+    the arrays of the different RecordBatches.
+
+    Parameters
+    ----------
+    recordbatches : iterable of pyarrow.RecordBatch objects
+        Pyarrow record batches to concatenate into a single RecordBatch.
+    memory_pool : MemoryPool, default None
+        For memory allocations, if required, otherwise use default pool.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> t1 = pa.record_batch(
+    ...     [
+    ...         pa.array([2, 4, 5, 100]),
+    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
+    ...     ],
+    ...     names=["n_legs", "animals"],
+    ... )
+    >>> t2 = pa.record_batch(
+    ...     [pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"]
+    ... )
+    >>> pa.concat_batches([t1, t2])
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,4,5,100,2,4]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede","Parrot","Dog"]
+
+    """
+
+__all__ = [
+    "ChunkedArray",
+    "chunked_array",
+    "_Tabular",
+    "RecordBatch",
+    "table_to_blocks",
+    "Table",
+    "record_batch",
+    "table",
+    "concat_tables",
+    "TableGroupBy",
+    "concat_batches",
+]
diff --git a/python/pyarrow/__lib_pxi/tensor.pyi b/python/pyarrow/__lib_pxi/tensor.pyi
new file mode 100644
index 00000000000..d849abd0f1f
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/tensor.pyi
@@ -0,0 +1,688 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+import numpy as np
+
+from pyarrow.lib import _Weakrefable
+from scipy.sparse import coo_matrix, csr_matrix
+from sparse import COO
+
+class Tensor(_Weakrefable):
+    """
+    A n-dimensional array a.k.a Tensor.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import numpy as np
+    >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+    >>> pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+    <pyarrow.Tensor>
+    type: int32
+    shape: (2, 3)
+    strides: (12, 4)
+    """
+
+    @classmethod
+    def from_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Create a Tensor from a numpy array.
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            The source numpy array
+        dim_names : list, optional
+            Names of each dimension of the Tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        <pyarrow.Tensor>
+        type: int32
+        shape: (2, 3)
+        strides: (12, 4)
+        """
+    def to_numpy(self) -> np.ndarray:
+        """
+        Convert arrow::Tensor to numpy.ndarray with zero copy
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.to_numpy()
+        array([[  2,   2,   4],
+               [  4,   5, 100]], dtype=int32)
+        """
+    def equals(self, other: Tensor) -> bool:
+        """
+        Return true if the tensors contains exactly equal data.
+
+        Parameters
+        ----------
+        other : Tensor
+            The other tensor to compare for equality.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> y = np.array([[2, 2, 4], [4, 5, 10]], np.int32)
+        >>> tensor2 = pa.Tensor.from_numpy(y, dim_names=["a", "b"])
+        >>> tensor.equals(tensor)
+        True
+        >>> tensor.equals(tensor2)
+        False
+        """
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.dim_name(0)
+        'dim1'
+        >>> tensor.dim_name(1)
+        'dim2'
+        """
+    @property
+    def dim_names(self) -> list[str]:
+        """
+        Names of this tensor dimensions.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.dim_names
+        ['dim1', 'dim2']
+        """
+    @property
+    def is_mutable(self) -> bool:
+        """
+        Is this tensor mutable or immutable.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.is_mutable
+        True
+        """
+    @property
+    def is_contiguous(self) -> bool:
+        """
+        Is this tensor contiguous in memory.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.is_contiguous
+        True
+        """
+    @property
+    def ndim(self) -> int:
+        """
+        The dimension (n) of this tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.ndim
+        2
+        """
+    @property
+    def size(self) -> str:
+        """
+        The size of this tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.size
+        6
+        """
+    @property
+    def shape(self) -> tuple[int, ...]:
+        """
+        The shape of this tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.shape
+        (2, 3)
+        """
+    @property
+    def strides(self) -> tuple[int, ...]:
+        """
+        Strides of this tensor.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor.strides
+        (12, 4)
+        """
+
+class SparseCOOTensor(_Weakrefable):
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert numpy.ndarray to arrow::SparseCOOTensor
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            Data used to populate the rows.
+        dim_names : list[str], optional
+            Names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCOOTensor
+        """
+
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        coords: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self:
+        """
+        Create arrow::SparseCOOTensor from numpy.ndarrays
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Data used to populate the rows.
+        coords : numpy.ndarray
+            Coordinates of the data.
+        shape : tuple
+            Shape of the tensor.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert scipy.sparse.coo_matrix to arrow::SparseCOOTensor
+
+        Parameters
+        ----------
+        obj : scipy.sparse.csr_matrix
+            The scipy matrix that should be converted.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_pydata_sparse(cls, obj: COO, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert pydata/sparse.COO to arrow::SparseCOOTensor.
+
+        Parameters
+        ----------
+        obj : pydata.sparse.COO
+            The sparse multidimensional array that should be converted.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self:
+        """
+        Convert arrow::Tensor to arrow::SparseCOOTensor.
+
+        Parameters
+        ----------
+        obj : Tensor
+            The tensor that should be converted.
+        """
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy.
+        """
+    def to_scipy(self) -> coo_matrix:
+        """
+        Convert arrow::SparseCOOTensor to scipy.sparse.coo_matrix.
+        """
+    def to_pydata_sparse(self) -> COO:
+        """
+        Convert arrow::SparseCOOTensor to pydata/sparse.COO.
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert arrow::SparseCOOTensor to arrow::Tensor.
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return true if sparse tensors contains exactly equal data.
+
+        Parameters
+        ----------
+        other : SparseCOOTensor
+            The other tensor to compare for equality.
+        """
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+    @property
+    def has_canonical_format(self) -> bool: ...
+
+class SparseCSRMatrix(_Weakrefable):
+    """
+    A sparse CSR matrix.
+    """
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert numpy.ndarray to arrow::SparseCSRMatrix
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            The dense numpy array that should be converted.
+        dim_names : list, optional
+            The names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCSRMatrix
+        """
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self:
+        """
+        Create arrow::SparseCSRMatrix from numpy.ndarrays.
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Data used to populate the sparse matrix.
+        indptr : numpy.ndarray
+            Range of the rows,
+            The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+        indices : numpy.ndarray
+            Column indices of the corresponding non-zero values.
+        shape : tuple
+            Shape of the matrix.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert scipy.sparse.csr_matrix to arrow::SparseCSRMatrix.
+
+        Parameters
+        ----------
+        obj : scipy.sparse.csr_matrix
+            The scipy matrix that should be converted.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self:
+        """
+        Convert arrow::Tensor to arrow::SparseCSRMatrix.
+
+        Parameters
+        ----------
+        obj : Tensor
+            The dense tensor that should be converted.
+        """
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy.
+        """
+    def to_scipy(self) -> csr_matrix:
+        """
+        Convert arrow::SparseCSRMatrix to scipy.sparse.csr_matrix.
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert arrow::SparseCSRMatrix to arrow::Tensor.
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return true if sparse tensors contains exactly equal data.
+
+        Parameters
+        ----------
+        other : SparseCSRMatrix
+            The other tensor to compare for equality.
+        """
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+class SparseCSCMatrix(_Weakrefable):
+    """
+    A sparse CSC matrix.
+    """
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert numpy.ndarray to arrow::SparseCSCMatrix
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            Data used to populate the rows.
+        dim_names : list[str], optional
+            Names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCSCMatrix
+        """
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self:
+        """
+        Create arrow::SparseCSCMatrix from numpy.ndarrays
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Data used to populate the sparse matrix.
+        indptr : numpy.ndarray
+            Range of the rows,
+            The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
+        indices : numpy.ndarray
+            Column indices of the corresponding non-zero values.
+        shape : tuple
+            Shape of the matrix.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert scipy.sparse.csc_matrix to arrow::SparseCSCMatrix
+
+        Parameters
+        ----------
+        obj : scipy.sparse.csc_matrix
+            The scipy matrix that should be converted.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self:
+        """
+        Convert arrow::Tensor to arrow::SparseCSCMatrix
+
+        Parameters
+        ----------
+        obj : Tensor
+            The dense tensor that should be converted.
+        """
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy
+        """
+    def to_scipy(self) -> csr_matrix:
+        """
+        Convert arrow::SparseCSCMatrix to scipy.sparse.csc_matrix
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert arrow::SparseCSCMatrix to arrow::Tensor
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return true if sparse tensors contains exactly equal data
+
+        Parameters
+        ----------
+        other : SparseCSCMatrix
+            The other tensor to compare for equality.
+        """
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+class SparseCSFTensor(_Weakrefable):
+    """
+    A sparse CSF tensor.
+
+    CSF is a generalization of compressed sparse row (CSR) index.
+
+    CSF index recursively compresses each dimension of a tensor into a set
+    of prefix trees. Each path from a root to leaf forms one tensor
+    non-zero index. CSF is implemented with two arrays of buffers and one
+    arrays of integers.
+    """
+
+    @classmethod
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
+        """
+        Convert numpy.ndarray to arrow::SparseCSFTensor
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            Data used to populate the rows.
+        dim_names : list[str], optional
+            Names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCSFTensor
+        """
+    @classmethod
+    def from_numpy(
+        cls,
+        data: np.ndarray,
+        indptr: np.ndarray,
+        indices: np.ndarray,
+        shape: tuple[int, ...],
+        dim_names: list[str] | None = None,
+    ) -> Self:
+        """
+        Create arrow::SparseCSFTensor from numpy.ndarrays
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Data used to populate the sparse tensor.
+        indptr : numpy.ndarray
+            The sparsity structure.
+            Each two consecutive dimensions in a tensor correspond to
+            a buffer in indices.
+            A pair of consecutive values at `indptr[dim][i]`
+            `indptr[dim][i + 1]` signify a range of nodes in
+            `indices[dim + 1]` who are children of `indices[dim][i]` node.
+        indices : numpy.ndarray
+            Stores values of nodes.
+            Each tensor dimension corresponds to a buffer in indptr.
+        shape : tuple
+            Shape of the matrix.
+        axis_order : list, optional
+            the sequence in which dimensions were traversed to
+            produce the prefix tree.
+        dim_names : list, optional
+            Names of the dimensions.
+        """
+    @classmethod
+    def from_tensor(cls, obj: Tensor) -> Self:
+        """
+        Convert arrow::Tensor to arrow::SparseCSFTensor
+
+        Parameters
+        ----------
+        obj : Tensor
+            The dense tensor that should be converted.
+        """
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy
+        """
+    def to_tensor(self) -> Tensor:
+        """
+        Convert arrow::SparseCSFTensor to arrow::Tensor
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return true if sparse tensors contains exactly equal data
+
+        Parameters
+        ----------
+        other : SparseCSFTensor
+            The other tensor to compare for equality.
+        """
+    @property
+    def is_mutable(self) -> bool: ...
+    @property
+    def ndim(self) -> int: ...
+    @property
+    def size(self) -> str: ...
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+    def dim_name(self, i: int) -> str:
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
+    @property
+    def dim_names(self) -> list[str]: ...
+    @property
+    def non_zero_length(self) -> int: ...
+
+__all__ = [
+    "Tensor",
+    "SparseCOOTensor",
+    "SparseCSRMatrix",
+    "SparseCSCMatrix",
+    "SparseCSFTensor",
+]
diff --git a/python/pyarrow/__lib_pxi/types.pyi b/python/pyarrow/__lib_pxi/types.pyi
new file mode 100644
index 00000000000..7fe6c36e332
--- /dev/null
+++ b/python/pyarrow/__lib_pxi/types.pyi
@@ -0,0 +1,4413 @@
+import datetime as dt
+import sys
+
+from collections.abc import Mapping, Sequence
+from decimal import Decimal
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from typing import Any, Generic, Iterable, Iterator, Literal, overload
+
+import numpy as np
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportArrowSchema
+from pyarrow.lib import (
+    Array,
+    ChunkedArray,
+    ExtensionArray,
+    MemoryPool,
+    MonthDayNano,
+    Table,
+)
+from typing_extensions import TypeVar, deprecated
+
+from .io import Buffer
+from .scalar import ExtensionScalar
+
+_AsPyType = TypeVar("_AsPyType")
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
+
+class _Weakrefable: ...
+class _Metadata(_Weakrefable): ...
+
+class DataType(_Weakrefable):
+    """
+    Base class of all Arrow data types.
+
+    Each data type is an *instance* of this class.
+
+    Examples
+    --------
+    Instance of int64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int64()
+    DataType(int64)
+    """
+    def field(self, i: int) -> Field:
+        """
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        pyarrow.Field
+        """
+    @property
+    def id(self) -> int: ...
+    @property
+    def bit_width(self) -> int:
+        """
+        Bit width for fixed width type.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64()
+        DataType(int64)
+        >>> pa.int64().bit_width
+        64
+        """
+    @property
+    def byte_width(self) -> int:
+        """
+        Byte width for fixed width type.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64()
+        DataType(int64)
+        >>> pa.int64().byte_width
+        8
+        """
+    @property
+    def num_fields(self) -> int:
+        """
+        The number of child fields.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64()
+        DataType(int64)
+        >>> pa.int64().num_fields
+        0
+        >>> pa.list_(pa.string())
+        ListType(list<item: string>)
+        >>> pa.list_(pa.string()).num_fields
+        1
+        >>> struct = pa.struct({"x": pa.int32(), "y": pa.string()})
+        >>> struct.num_fields
+        2
+        """
+    @property
+    def num_buffers(self) -> int:
+        """
+        Number of data buffers required to construct Array type
+        excluding children.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64().num_buffers
+        2
+        >>> pa.string().num_buffers
+        3
+        """
+    def __hash__(self) -> int: ...
+    def equals(self, other: DataType | str, *, check_metadata: bool = False) -> bool:
+        """
+        Return true if type is equivalent to passed value.
+
+        Parameters
+        ----------
+        other : DataType or string convertible to DataType
+        check_metadata : bool
+            Whether nested Field metadata equality should be checked as well.
+
+        Returns
+        -------
+        is_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64().equals(pa.string())
+        False
+        >>> pa.int64().equals(pa.int64())
+        True
+        """
+    def to_pandas_dtype(self) -> np.generic:
+        """
+        Return the equivalent NumPy / Pandas dtype.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64().to_pandas_dtype()
+        <class 'numpy.int64'>
+        """
+    def _export_to_c(self, out_ptr: int) -> None:
+        """
+        Export to a C ArrowSchema struct, given its pointer.
+
+        Be careful: if you don't pass the ArrowSchema struct to a consumer,
+        its memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self:
+        """
+        Import DataType from a C ArrowSchema struct, given its pointer.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_schema__(self) -> Any:
+        """
+        Export to a ArrowSchema PyCapsule
+
+        Unlike _export_to_c, this will not leak memory if the capsule is not used.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, schema) -> Self:
+        """
+        Import a DataType from a ArrowSchema PyCapsule
+
+        Parameters
+        ----------
+        schema : PyCapsule
+            A valid PyCapsule with name 'arrow_schema' containing an
+            ArrowSchema pointer.
+        """
+
+class _BasicDataType(DataType, Generic[_AsPyType]): ...
+class NullType(_BasicDataType[None]): ...
+class BoolType(_BasicDataType[bool]): ...
+class UInt8Type(_BasicDataType[int]): ...
+class Int8Type(_BasicDataType[int]): ...
+class UInt16Type(_BasicDataType[int]): ...
+class Int16Type(_BasicDataType[int]): ...
+class Uint32Type(_BasicDataType[int]): ...
+class Int32Type(_BasicDataType[int]): ...
+class UInt64Type(_BasicDataType[int]): ...
+class Int64Type(_BasicDataType[int]): ...
+class Float16Type(_BasicDataType[float]): ...
+class Float32Type(_BasicDataType[float]): ...
+class Float64Type(_BasicDataType[float]): ...
+class Date32Type(_BasicDataType[dt.date]): ...
+class Date64Type(_BasicDataType[dt.date]): ...
+class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ...
+class StringType(_BasicDataType[str]): ...
+class LargeStringType(_BasicDataType[str]): ...
+class StringViewType(_BasicDataType[str]): ...
+class BinaryType(_BasicDataType[bytes]): ...
+class LargeBinaryType(_BasicDataType[bytes]): ...
+class BinaryViewType(_BasicDataType[bytes]): ...
+
+_Unit = TypeVar("_Unit", bound=Literal["s", "ms", "us", "ns"], default=Literal["us"])
+_Tz = TypeVar("_Tz", str, None, default=None)
+
+class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
+    """
+    Concrete class for timestamp data types.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+
+    Create an instance of timestamp type:
+
+    >>> pa.timestamp("us")
+    TimestampType(timestamp[us])
+
+    Create an instance of timestamp type with timezone:
+
+    >>> pa.timestamp("s", tz="UTC")
+    TimestampType(timestamp[s, tz=UTC])
+    """
+    @property
+    def unit(self) -> _Unit:
+        """
+        The timestamp unit ('s', 'ms', 'us' or 'ns').
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.timestamp("us")
+        >>> t.unit
+        'us'
+        """
+    @property
+    def tz(self) -> _Tz:
+        """
+        The timestamp time zone, if any, or None.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.timestamp("s", tz="UTC")
+        >>> t.tz
+        'UTC'
+        """
+
+_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"])
+
+class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]):
+    """
+    Concrete class for time32 data types.
+
+    Supported time unit resolutions are 's' [second]
+    and 'ms' [millisecond].
+
+    Examples
+    --------
+    Create an instance of time32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.time32("ms")
+    Time32Type(time32[ms])
+    """
+    @property
+    def unit(self) -> _Time32Unit:
+        """
+        The time unit ('s' or 'ms').
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.time32("ms")
+        >>> t.unit
+        'ms'
+        """
+
+_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"])
+
+class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]):
+    """
+    Concrete class for time64 data types.
+
+    Supported time unit resolutions are 'us' [microsecond]
+    and 'ns' [nanosecond].
+
+    Examples
+    --------
+    Create an instance of time64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.time64("us")
+    Time64Type(time64[us])
+    """
+    @property
+    def unit(self) -> _Time64Unit:
+        """
+        The time unit ('us' or 'ns').
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.time64("us")
+        >>> t.unit
+        'us'
+        """
+
+class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]):
+    """
+    Concrete class for duration data types.
+
+    Examples
+    --------
+    Create an instance of duration type:
+
+    >>> import pyarrow as pa
+    >>> pa.duration("s")
+    DurationType(duration[s])
+    """
+    @property
+    def unit(self) -> _Unit:
+        """
+        The duration unit ('s', 'ms', 'us' or 'ns').
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.duration("s")
+        >>> t.unit
+        's'
+        """
+
+class FixedSizeBinaryType(_BasicDataType[Decimal]):
+    """
+    Concrete class for fixed-size binary data types.
+
+    Examples
+    --------
+    Create an instance of fixed-size binary type:
+
+    >>> import pyarrow as pa
+    >>> pa.binary(3)
+    FixedSizeBinaryType(fixed_size_binary[3])
+    """
+
+_Precision = TypeVar("_Precision", default=Any)
+_Scale = TypeVar("_Scale", default=Any)
+
+class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    """
+    Concrete class for decimal32 data types.
+
+    Examples
+    --------
+    Create an instance of decimal32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal32(5, 2)
+    Decimal32Type(decimal32(5, 2))
+    """
+    @property
+    def precision(self) -> _Precision:
+        """
+        The decimal precision, in number of decimal digits (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal32(5, 2)
+        >>> t.precision
+        5
+        """
+    @property
+    def scale(self) -> _Scale:
+        """
+        The decimal scale (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal32(5, 2)
+        >>> t.scale
+        2
+        """
+
+class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    """
+    Concrete class for decimal64 data types.
+
+    Examples
+    --------
+    Create an instance of decimal64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal64(5, 2)
+    Decimal64Type(decimal64(5, 2))
+    """
+    @property
+    def precision(self) -> _Precision:
+        """
+        The decimal precision, in number of decimal digits (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal64(5, 2)
+        >>> t.precision
+        5
+        """
+    @property
+    def scale(self) -> _Scale:
+        """
+        The decimal scale (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal64(5, 2)
+        >>> t.scale
+        2
+        """
+
+class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    """
+    Concrete class for decimal128 data types.
+
+    Examples
+    --------
+    Create an instance of decimal128 type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal128(5, 2)
+    Decimal128Type(decimal128(5, 2))
+    """
+    @property
+    def precision(self) -> _Precision:
+        """
+        The decimal precision, in number of decimal digits (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal128(5, 2)
+        >>> t.precision
+        5
+        """
+    @property
+    def scale(self) -> _Scale:
+        """
+        The decimal scale (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal128(5, 2)
+        >>> t.scale
+        2
+        """
+
+class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
+    """
+    Concrete class for decimal256 data types.
+
+    Examples
+    --------
+    Create an instance of decimal256 type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal256(76, 38)
+    Decimal256Type(decimal256(76, 38))
+    """
+    @property
+    def precision(self) -> _Precision:
+        """
+        The decimal precision, in number of decimal digits (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal256(76, 38)
+        >>> t.precision
+        76
+        """
+    @property
+    def scale(self) -> _Scale:
+        """
+        The decimal scale (an integer).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.decimal256(76, 38)
+        >>> t.scale
+        38
+        """
+
+class ListType(DataType, Generic[_DataTypeT]):
+    """
+    Concrete class for list data types.
+
+    Examples
+    --------
+    Create an instance of ListType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_(pa.string())
+    ListType(list<item: string>)
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]:
+        """
+        The field for list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.string()).value_type
+        DataType(string)
+        """
+
+class LargeListType(DataType, Generic[_DataTypeT]):
+    """
+    Concrete class for large list data types
+    (like ListType, but with 64-bit offsets).
+
+    Examples
+    --------
+    Create an instance of LargeListType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list(pa.string())
+    LargeListType(large_list<item: string>)
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]: ...
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of large list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list(pa.string()).value_type
+        DataType(string)
+        """
+
+class ListViewType(DataType, Generic[_DataTypeT]):
+    """
+    Concrete class for list view data types.
+
+    Examples
+    --------
+    Create an instance of ListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_view(pa.string())
+    ListViewType(list_view<item: string>)
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]:
+        """
+        The field for list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_view(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_view(pa.string()).value_type
+        DataType(string)
+        """
+
+class LargeListViewType(DataType, Generic[_DataTypeT]):
+    """
+    Concrete class for large list view data types
+    (like ListViewType, but with 64-bit offsets).
+
+    Examples
+    --------
+    Create an instance of LargeListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list_view(pa.string())
+    LargeListViewType(large_list_view<item: string>)
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]:
+        """
+        The field for large list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list_view(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of large list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list_view(pa.string()).value_type
+        DataType(string)
+        """
+
+class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]):
+    """
+    Concrete class for fixed size list data types.
+
+    Examples
+    --------
+    Create an instance of FixedSizeListType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_(pa.int32(), 2)
+    FixedSizeListType(fixed_size_list<item: int32>[2])
+    """
+    @property
+    def value_field(self) -> Field[_DataTypeT]:
+        """
+        The field for list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.int32(), 2).value_field
+        pyarrow.Field<item: int32>
+        """
+    @property
+    def value_type(self) -> _DataTypeT:
+        """
+        The data type of large list values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.int32(), 2).value_type
+        DataType(int32)
+        """
+    @property
+    def list_size(self) -> _Size:
+        """
+        The size of the fixed size lists.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_(pa.int32(), 2).list_size
+        2
+        """
+
+class DictionaryMemo(_Weakrefable):
+    """
+    Tracking container for dictionary-encoded fields.
+    """
+
+_IndexT = TypeVar(
+    "_IndexT",
+    UInt8Type,
+    Int8Type,
+    UInt16Type,
+    Int16Type,
+    Uint32Type,
+    Int32Type,
+    UInt64Type,
+    Int64Type,
+)
+_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType)
+_ValueT = TypeVar("_ValueT", bound=DataType)
+_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False])
+
+class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]):
+    """
+    Concrete class for dictionary data types.
+
+    Examples
+    --------
+    Create an instance of dictionary type:
+
+    >>> import pyarrow as pa
+    >>> pa.dictionary(pa.int64(), pa.utf8())
+    DictionaryType(dictionary<values=string, indices=int64, ordered=0>)
+    """
+
+    @property
+    def ordered(self) -> _Ordered:
+        """
+        Whether the dictionary is ordered, i.e. whether the ordering of values
+        in the dictionary is important.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.dictionary(pa.int64(), pa.utf8()).ordered
+        False
+        """
+    @property
+    def index_type(self) -> _IndexT:
+        """
+        The data type of dictionary indices (a signed integer type).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.dictionary(pa.int16(), pa.utf8()).index_type
+        DataType(int16)
+        """
+    @property
+    def value_type(self) -> _BasicValueT:
+        """
+        The dictionary value type.
+
+        The dictionary values are found in an instance of DictionaryArray.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.dictionary(pa.int16(), pa.utf8()).value_type
+        DataType(string)
+        """
+
+_K = TypeVar("_K", bound=DataType)
+
+class MapType(DataType, Generic[_K, _ValueT, _Ordered]):
+    """
+    Concrete class for map data types.
+
+    Examples
+    --------
+    Create an instance of MapType:
+
+    >>> import pyarrow as pa
+    >>> pa.map_(pa.string(), pa.int32())
+    MapType(map<string, int32>)
+    >>> pa.map_(pa.string(), pa.int32(), keys_sorted=True)
+    MapType(map<string, int32, keys_sorted>)
+    """
+
+    @property
+    def key_field(self) -> Field[_K]:
+        """
+        The field for keys in the map entries.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32()).key_field
+        pyarrow.Field<key: string not null>
+        """
+    @property
+    def key_type(self) -> _K:
+        """
+        The data type of keys in the map entries.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32()).key_type
+        DataType(string)
+        """
+    @property
+    def item_field(self) -> Field[_ValueT]:
+        """
+        The field for items in the map entries.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32()).item_field
+        pyarrow.Field<value: int32>
+        """
+    @property
+    def item_type(self) -> _ValueT:
+        """
+        The data type of items in the map entries.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32()).item_type
+        DataType(int32)
+        """
+    @property
+    def keys_sorted(self) -> _Ordered:
+        """
+        Should the entries be sorted according to keys.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.map_(pa.string(), pa.int32(), keys_sorted=True).keys_sorted
+        True
+        """
+
+_Size = TypeVar("_Size", default=int)
+
+class StructType(DataType):
+    """
+    Concrete class for struct data types.
+
+    ``StructType`` supports direct indexing using ``[...]`` (implemented via
+    ``__getitem__``) to access its fields.
+    It will return the struct field with the given index or name.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+
+    Accessing fields using direct indexing:
+
+    >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+    >>> struct_type[0]
+    pyarrow.Field<x: int32>
+    >>> struct_type["y"]
+    pyarrow.Field<y: string>
+
+    Accessing fields using ``field()``:
+
+    >>> struct_type.field(1)
+    pyarrow.Field<y: string>
+    >>> struct_type.field("x")
+    pyarrow.Field<x: int32>
+
+    # Creating a schema from the struct type's fields:
+    >>> pa.schema(list(struct_type))
+    x: int32
+    y: string
+    """
+    def get_field_index(self, name: str) -> int:
+        """
+        Return index of the unique field with the given name.
+
+        Parameters
+        ----------
+        name : str
+            The name of the field to look up.
+
+        Returns
+        -------
+        index : int
+            The index of the field with the given name; -1 if the
+            name isn't found or there are several fields with the given
+            name.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+
+        Index of the field with a name 'y':
+
+        >>> struct_type.get_field_index("y")
+        1
+
+        Index of the field that does not exist:
+
+        >>> struct_type.get_field_index("z")
+        -1
+        """
+    def field(self, i: int | str) -> Field:
+        """
+        Select a field by its column name or numeric index.
+
+        Parameters
+        ----------
+        i : int or str
+
+        Returns
+        -------
+        pyarrow.Field
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+
+        Select the second field:
+
+        >>> struct_type.field(1)
+        pyarrow.Field<y: string>
+
+        Select the field named 'x':
+
+        >>> struct_type.field("x")
+        pyarrow.Field<x: int32>
+        """
+    def get_all_field_indices(self, name: str) -> list[int]:
+        """
+        Return sorted list of indices for the fields with the given name.
+
+        Parameters
+        ----------
+        name : str
+            The name of the field to look up.
+
+        Returns
+        -------
+        indices : List[int]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+        >>> struct_type.get_all_field_indices("x")
+        [0]
+        """
+    def __len__(self) -> int: ...
+    def __iter__(self) -> Iterator[Field]: ...
+    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
+    @property
+    def names(self) -> list[str]:
+        """
+        Lists the field names.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct([("a", pa.int64()), ("b", pa.float64()), ("c", pa.string())])
+        >>> struct_type.names
+        ['a', 'b', 'c']
+        """
+    @property
+    def fields(self) -> list[Field]:
+        """
+        Lists all fields within the StructType.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct([("a", pa.int64()), ("b", pa.float64()), ("c", pa.string())])
+        >>> struct_type.fields
+        [pyarrow.Field<a: int64>, pyarrow.Field<b: double>, pyarrow.Field<c: string>]
+        """
+
+class UnionType(DataType):
+    """
+    Base class for union data types.
+
+    Examples
+    --------
+    Create an instance of a dense UnionType using ``pa.union``:
+
+    >>> import pyarrow as pa
+    >>> (
+    ...     pa.union(
+    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
+    ...         mode=pa.lib.UnionMode_DENSE,
+    ...     ),
+    ... )
+    (DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>),)
+
+    Create an instance of a dense UnionType using ``pa.dense_union``:
+
+    >>> pa.dense_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>)
+
+    Create an instance of a sparse UnionType using ``pa.union``:
+
+    >>> (
+    ...     pa.union(
+    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
+    ...         mode=pa.lib.UnionMode_SPARSE,
+    ...     ),
+    ... )
+    (SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>),)
+
+    Create an instance of a sparse UnionType using ``pa.sparse_union``:
+
+    >>> pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>)
+    """
+    @property
+    def mode(self) -> Literal["sparse", "dense"]:
+        """
+        The mode of the union ("dense" or "sparse").
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> union = pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+        >>> union.mode
+        'sparse'
+        """
+    @property
+    def type_codes(self) -> list[int]:
+        """
+        The type code to indicate each data type in this union.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> union = pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+        >>> union.type_codes
+        [0, 1]
+        """
+    def __len__(self) -> int: ...
+    def __iter__(self) -> Iterator[Field]: ...
+    def field(self, i: int) -> Field:
+        """
+        Return a child field by its numeric index.
+
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> union = pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+        >>> union[0]
+        pyarrow.Field<a: fixed_size_binary[10]>
+        """
+    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
+
+class SparseUnionType(UnionType):
+    """
+    Concrete class for sparse union types.
+
+    Examples
+    --------
+    Create an instance of a sparse UnionType using ``pa.union``:
+
+    >>> import pyarrow as pa
+    >>> (
+    ...     pa.union(
+    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
+    ...         mode=pa.lib.UnionMode_SPARSE,
+    ...     ),
+    ... )
+    (SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>),)
+
+    Create an instance of a sparse UnionType using ``pa.sparse_union``:
+
+    >>> pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>)
+    """
+    @property
+    def mode(self) -> Literal["sparse"]: ...
+
+class DenseUnionType(UnionType):
+    """
+    Concrete class for dense union types.
+
+    Examples
+    --------
+    Create an instance of a dense UnionType using ``pa.union``:
+
+    >>> import pyarrow as pa
+    >>> (
+    ...     pa.union(
+    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
+    ...         mode=pa.lib.UnionMode_DENSE,
+    ...     ),
+    ... )
+    (DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>),)
+
+    Create an instance of a dense UnionType using ``pa.dense_union``:
+
+    >>> pa.dense_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>)
+    """
+
+    @property
+    def mode(self) -> Literal["dense"]: ...
+
+_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type)
+
+class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]):
+    """
+    Concrete class for run-end encoded types.
+    """
+    @property
+    def run_end_type(self) -> _RunEndType: ...
+    @property
+    def value_type(self) -> _BasicValueT: ...
+
+_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
+
+class BaseExtensionType(DataType):
+    """
+    Concrete base class for extension types.
+    """
+    def __arrow_ext_class__(self) -> type[ExtensionArray]:
+        """
+        The associated array extension class
+        """
+    def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]:
+        """
+        The associated scalar class
+        """
+    @property
+    def extension_name(self) -> str:
+        """
+        The extension type name.
+        """
+    @property
+    def storage_type(self) -> DataType:
+        """
+        The underlying storage type.
+        """
+    def wrap_array(self, storage: _StorageT) -> _StorageT: ...
+
+class ExtensionType(BaseExtensionType):
+    """
+    Concrete base class for Python-defined extension types.
+
+    Parameters
+    ----------
+    storage_type : DataType
+        The underlying storage type for the extension type.
+    extension_name : str
+        A unique name distinguishing this extension type. The name will be
+        used when deserializing IPC data.
+
+    Examples
+    --------
+    Define a RationalType extension type subclassing ExtensionType:
+
+    >>> import pyarrow as pa
+    >>> class RationalType(pa.ExtensionType):
+    ...     def __init__(self, data_type: pa.DataType):
+    ...         if not pa.types.is_integer(data_type):
+    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
+    ...         super().__init__(
+    ...             pa.struct(
+    ...                 [
+    ...                     ("numer", data_type),
+    ...                     ("denom", data_type),
+    ...                 ],
+    ...             ),
+    ...             # N.B. This name does _not_ reference `data_type` so deserialization
+    ...             # will work for _any_ integer `data_type` after registration
+    ...             "my_package.rational",
+    ...         )
+    ...     def __arrow_ext_serialize__(self) -> bytes:
+    ...         # No parameters are necessary
+    ...         return b""
+    ...     @classmethod
+    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+    ...         # return an instance of this subclass
+    ...         return RationalType(storage_type[0].type)
+
+    Register the extension type:
+
+    >>> pa.register_extension_type(RationalType(pa.int64()))
+
+    Create an instance of RationalType extension type:
+
+    >>> rational_type = RationalType(pa.int32())
+
+    Inspect the extension type:
+
+    >>> rational_type.extension_name
+    'my_package.rational'
+    >>> rational_type.storage_type
+    StructType(struct<numer: int32, denom: int32>)
+
+    Wrap an array as an extension array:
+
+    >>> storage_array = pa.array(
+    ...     [
+    ...         {"numer": 10, "denom": 17},
+    ...         {"numer": 20, "denom": 13},
+    ...     ],
+    ...     type=rational_type.storage_type,
+    ... )
+    >>> rational_array = rational_type.wrap_array(storage_array)
+    >>> rational_array
+    <pyarrow.lib.ExtensionArray object at ...>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
+        10,
+        20
+      ]
+    -- child 1 type: int32
+      [
+        17,
+        13
+      ]
+
+    Or do the same with creating an ExtensionArray:
+
+    >>> rational_array = pa.ExtensionArray.from_storage(rational_type, storage_array)
+    >>> rational_array
+    <pyarrow.lib.ExtensionArray object at ...>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
+        10,
+        20
+      ]
+    -- child 1 type: int32
+      [
+        17,
+        13
+      ]
+
+    Unregister the extension type:
+
+    >>> pa.unregister_extension_type("my_package.rational")
+
+    Note that even though we registered the concrete type
+    ``RationalType(pa.int64())``, PyArrow will be able to deserialize
+    ``RationalType(integer_type)`` for any ``integer_type``, as the deserializer
+    will reference the name ``my_package.rational`` and the ``@classmethod``
+    ``__arrow_ext_deserialize__``.
+    """
+
+    def __init__(self, storage_type: DataType, extension_name: str) -> None: ...
+    def __arrow_ext_serialize__(self) -> bytes:
+        """
+        Serialized representation of metadata to reconstruct the type object.
+
+        This method should return a bytes object, and those serialized bytes
+        are stored in the custom metadata of the Field holding an extension
+        type in an IPC message.
+        The bytes are passed to ``__arrow_ext_deserialize`` and should hold
+        sufficient information to reconstruct the data type instance.
+        """
+    @classmethod
+    def __arrow_ext_deserialize__(cls, storage_type: DataType, serialized: bytes) -> Self:
+        """
+        Return an extension type instance from the storage type and serialized
+        metadata.
+
+        This method should return an instance of the ExtensionType subclass
+        that matches the passed storage type and serialized metadata (the
+        return value of ``__arrow_ext_serialize__``).
+        """
+
+class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]):
+    """
+    Concrete class for fixed shape tensor extension type.
+
+    Examples
+    --------
+    Create an instance of fixed shape tensor extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.fixed_shape_tensor(pa.int32(), [2, 2])
+    FixedShapeTensorType(extension<arrow.fixed_shape_tensor[value_type=int32, shape=[2,2]]>)
+
+    Create an instance of fixed shape tensor extension type with
+    permutation:
+
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3), permutation=[0, 2, 1])
+    >>> tensor_type.permutation
+    [0, 2, 1]
+    """
+    @property
+    def value_type(self) -> _ValueT:
+        """
+        Data type of an individual tensor.
+        """
+    @property
+    def shape(self) -> list[int]:
+        """
+        Shape of the tensors.
+        """
+    @property
+    def dim_names(self) -> list[str] | None:
+        """
+        Explicit names of the dimensions.
+        """
+    @property
+    def permutation(self) -> list[int] | None:
+        """
+        Indices of the dimensions ordering.
+        """
+
+class Bool8Type(BaseExtensionType):
+    """
+    Concrete class for bool8 extension type.
+
+    Bool8 is an alternate representation for boolean
+    arrays using 8 bits instead of 1 bit per value. The underlying
+    storage type is int8.
+
+    Examples
+    --------
+    Create an instance of bool8 extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.bool8()
+    Bool8Type(extension<arrow.bool8>)
+    """
+
+class UuidType(BaseExtensionType):
+    """
+    Concrete class for UUID extension type.
+    """
+
+class JsonType(BaseExtensionType):
+    """
+    Concrete class for JSON extension type.
+
+    Examples
+    --------
+    Define the extension type for JSON array
+
+    >>> import pyarrow as pa
+    >>> json_type = pa.json_(pa.large_utf8())
+
+    Create an extension array
+
+    >>> arr = [None, '{ "id":30, "values":["a", "b"] }']
+    >>> storage = pa.array(arr, pa.large_utf8())
+    >>> pa.ExtensionArray.from_storage(json_type, storage)
+    <pyarrow.lib.JsonArray object at ...>
+    [
+      null,
+      "{ "id":30, "values":["a", "b"] }"
+    ]
+    """
+
+class OpaqueType(BaseExtensionType):
+    """
+    Concrete class for opaque extension type.
+
+    Opaque is a placeholder for a type from an external (often non-Arrow)
+    system that could not be interpreted.
+
+    Examples
+    --------
+    Create an instance of opaque extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.opaque(pa.int32(), "geometry", "postgis")
+    OpaqueType(extension<arrow.opaque[storage_type=int32, type_name=geometry, vendor_name=postgis]>)
+    """
+    @property
+    def type_name(self) -> str:
+        """
+        The name of the type in the external system.
+        """
+    @property
+    def vendor_name(self) -> str:
+        """
+        The name of the external system.
+        """
+
+@deprecated(
+    "This class is deprecated and its deserialization is disabled by default. "
+    ":class:`ExtensionType` is recommended instead."
+)
+class PyExtensionType(ExtensionType):
+    """
+    Concrete base class for Python-defined extension types based on pickle
+    for (de)serialization.
+
+    .. warning::
+       This class is deprecated and its deserialization is disabled by default.
+       :class:`ExtensionType` is recommended instead.
+
+    Parameters
+    ----------
+    storage_type : DataType
+        The storage type for which the extension is built.
+    """
+    def __init__(self, storage_type: DataType) -> None: ...
+    @classmethod
+    def set_auto_load(cls, value: bool) -> None:
+        """
+        Enable or disable auto-loading of serialized PyExtensionType instances.
+
+        Parameters
+        ----------
+        value : bool
+            Whether to enable auto-loading.
+        """
+
+class UnknownExtensionType(PyExtensionType):  # type: ignore
+    """
+    A concrete class for Python-defined extension types that refer to
+    an unknown Python implementation.
+
+    Parameters
+    ----------
+    storage_type : DataType
+        The storage type for which the extension is built.
+    serialized : bytes
+        The serialised output.
+    """
+    def __init__(self, storage_type: DataType, serialized: bytes) -> None: ...
+
+def register_extension_type(ext_type: PyExtensionType) -> None:  # type: ignore
+    """
+    Register a Python extension type.
+
+    Registration is based on the extension name (so different registered types
+    need unique extension names). Registration needs an extension type
+    instance, but then works for any instance of the same subclass regardless
+    of parametrization of the type.
+
+    Parameters
+    ----------
+    ext_type : BaseExtensionType instance
+        The ExtensionType subclass to register.
+
+    Examples
+    --------
+    Define a RationalType extension type subclassing ExtensionType:
+
+    >>> import pyarrow as pa
+    >>> class RationalType(pa.ExtensionType):
+    ...     def __init__(self, data_type: pa.DataType):
+    ...         if not pa.types.is_integer(data_type):
+    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
+    ...         super().__init__(
+    ...             pa.struct(
+    ...                 [
+    ...                     ("numer", data_type),
+    ...                     ("denom", data_type),
+    ...                 ],
+    ...             ),
+    ...             # N.B. This name does _not_ reference `data_type` so deserialization
+    ...             # will work for _any_ integer `data_type` after registration
+    ...             "my_package.rational",
+    ...         )
+    ...     def __arrow_ext_serialize__(self) -> bytes:
+    ...         # No parameters are necessary
+    ...         return b""
+    ...     @classmethod
+    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+    ...         # return an instance of this subclass
+    ...         return RationalType(storage_type[0].type)
+
+    Register the extension type:
+
+    >>> pa.register_extension_type(RationalType(pa.int64()))
+
+    Unregister the extension type:
+
+    >>> pa.unregister_extension_type("my_package.rational")
+    """
+
+def unregister_extension_type(type_name: str) -> None:
+    """
+    Unregister a Python extension type.
+
+    Parameters
+    ----------
+    type_name : str
+        The name of the ExtensionType subclass to unregister.
+
+    Examples
+    --------
+    Define a RationalType extension type subclassing ExtensionType:
+
+    >>> import pyarrow as pa
+    >>> class RationalType(pa.ExtensionType):
+    ...     def __init__(self, data_type: pa.DataType):
+    ...         if not pa.types.is_integer(data_type):
+    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
+    ...         super().__init__(
+    ...             pa.struct(
+    ...                 [
+    ...                     ("numer", data_type),
+    ...                     ("denom", data_type),
+    ...                 ],
+    ...             ),
+    ...             # N.B. This name does _not_ reference `data_type` so deserialization
+    ...             # will work for _any_ integer `data_type` after registration
+    ...             "my_package.rational",
+    ...         )
+    ...     def __arrow_ext_serialize__(self) -> bytes:
+    ...         # No parameters are necessary
+    ...         return b""
+    ...     @classmethod
+    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+    ...         # return an instance of this subclass
+    ...         return RationalType(storage_type[0].type)
+
+    Register the extension type:
+
+    >>> pa.register_extension_type(RationalType(pa.int64()))
+
+    Unregister the extension type:
+
+    >>> pa.unregister_extension_type("my_package.rational")
+    """
+
+class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]):
+    """
+    KeyValueMetadata
+
+    Parameters
+    ----------
+    __arg0__ : dict
+        A dict of the key-value metadata
+    **kwargs : optional
+        additional key-value metadata
+    """
+    def __init__(self, __arg0__: Mapping[bytes, bytes] | None = None, **kwargs) -> None: ...
+    def equals(self, other: KeyValueMetadata) -> bool: ...
+    def __len__(self) -> int: ...
+    def __contains__(self, __key: object) -> bool: ...
+    def __getitem__(self, __key: Any) -> Any: ...
+    def __iter__(self) -> Iterator[bytes]: ...
+    def get_all(self, key: str) -> list[bytes]: ...
+    def to_dict(self) -> dict[bytes, bytes]:
+        """
+        Convert KeyValueMetadata to dict. If a key occurs twice, the value for
+        the first one is returned
+        """
+
+def ensure_metadata(
+    meta: Mapping[bytes | str, bytes | str] | KeyValueMetadata | None, allow_none: bool = False
+) -> KeyValueMetadata | None: ...
+
+class Field(_Weakrefable, Generic[_DataTypeT]):
+    """
+    A named field, with a data type, nullability, and optional metadata.
+
+    Notes
+    -----
+    Do not use this class's constructor directly; use pyarrow.field
+
+    Examples
+    --------
+    Create an instance of pyarrow.Field:
+
+    >>> import pyarrow as pa
+    >>> pa.field("key", pa.int32())
+    pyarrow.Field<key: int32>
+    >>> pa.field("key", pa.int32(), nullable=False)
+    pyarrow.Field<key: int32 not null>
+    >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+    >>> field
+    pyarrow.Field<key: int32>
+    >>> field.metadata
+    {b'key': b'Something important'}
+
+    Use the field to create a struct type:
+
+    >>> pa.struct([field])
+    StructType(struct<key: int32>)
+    """
+
+    def equals(self, other: Field, check_metadata: bool = False) -> bool:
+        """
+        Test if this field is equal to the other
+
+        Parameters
+        ----------
+        other : pyarrow.Field
+        check_metadata : bool, default False
+            Whether Field metadata equality should be checked as well.
+
+        Returns
+        -------
+        is_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> f1 = pa.field("key", pa.int32())
+        >>> f2 = pa.field("key", pa.int32(), nullable=False)
+        >>> f1.equals(f2)
+        False
+        >>> f1.equals(f1)
+        True
+        """
+    def __hash__(self) -> int: ...
+    @property
+    def nullable(self) -> bool:
+        """
+        The field nullability.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> f1 = pa.field("key", pa.int32())
+        >>> f2 = pa.field("key", pa.int32(), nullable=False)
+        >>> f1.nullable
+        True
+        >>> f2.nullable
+        False
+        """
+    @property
+    def name(self) -> str:
+        """
+        The field name.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+        >>> field.name
+        'key'
+        """
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None:
+        """
+        The field metadata (if any is set).
+
+        Returns
+        -------
+        metadata : dict or None
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+        >>> field.metadata
+        {b'key': b'Something important'}
+        """
+    @property
+    def type(self) -> _DataTypeT: ...
+    def with_metadata(self, metadata: dict[bytes | str, bytes | str]) -> Self:
+        """
+        Add metadata as dict of string keys and values to Field
+
+        Parameters
+        ----------
+        metadata : dict
+            Keys and values must be string-like / coercible to bytes
+
+        Returns
+        -------
+        field : pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+
+        Create new field by adding metadata to existing one:
+
+        >>> field_new = field.with_metadata({"key": "Something important"})
+        >>> field_new
+        pyarrow.Field<key: int32>
+        >>> field_new.metadata
+        {b'key': b'Something important'}
+        """
+    def remove_metadata(self) -> Self:
+        """
+        Create new field without metadata, if any
+
+        Returns
+        -------
+        field : pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+        >>> field.metadata
+        {b'key': b'Something important'}
+
+        Create new field by removing the metadata from the existing one:
+
+        >>> field_new = field.remove_metadata()
+        >>> field_new.metadata
+        """
+    def with_type(self, new_type: _DataTypeT) -> Field[_DataTypeT]:
+        """
+        A copy of this field with the replaced type
+
+        Parameters
+        ----------
+        new_type : pyarrow.DataType
+
+        Returns
+        -------
+        field : pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+        >>> field
+        pyarrow.Field<key: int32>
+
+        Create new field by replacing type of an existing one:
+
+        >>> field_new = field.with_type(pa.int64())
+        >>> field_new
+        pyarrow.Field<key: int64>
+        """
+    def with_name(self, name: str) -> Self:
+        """
+        A copy of this field with the replaced name
+
+        Parameters
+        ----------
+        name : str
+
+        Returns
+        -------
+        field : pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+        >>> field
+        pyarrow.Field<key: int32>
+
+        Create new field by replacing the name of an existing one:
+
+        >>> field_new = field.with_name("lock")
+        >>> field_new
+        pyarrow.Field<lock: int32>
+        """
+    def with_nullable(self, nullable: bool) -> Field[_DataTypeT]:
+        """
+        A copy of this field with the replaced nullability
+
+        Parameters
+        ----------
+        nullable : bool
+
+        Returns
+        -------
+        field: pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> field = pa.field("key", pa.int32())
+        >>> field
+        pyarrow.Field<key: int32>
+        >>> field.nullable
+        True
+
+        Create new field by replacing the nullability of an existing one:
+
+        >>> field_new = field.with_nullable(False)
+        >>> field_new
+        pyarrow.Field<key: int32 not null>
+        >>> field_new.nullable
+        False
+        """
+    def flatten(self) -> list[Field]:
+        """
+        Flatten this field.  If a struct field, individual child fields
+        will be returned with their names prefixed by the parent's name.
+
+        Returns
+        -------
+        fields : List[pyarrow.Field]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> f1 = pa.field("bar", pa.float64(), nullable=False)
+        >>> f2 = pa.field("foo", pa.int32()).with_metadata({"key": "Something important"})
+        >>> ff = pa.field("ff", pa.struct([f1, f2]), nullable=False)
+
+        Flatten a struct field:
+
+        >>> ff
+        pyarrow.Field<ff: struct<bar: double not null, foo: int32> not null>
+        >>> ff.flatten()
+        [pyarrow.Field<ff.bar: double not null>, pyarrow.Field<ff.foo: int32>]
+        """
+    def _export_to_c(self, out_ptr: int) -> None:
+        """
+        Export to a C ArrowSchema struct, given its pointer.
+
+        Be careful: if you don't pass the ArrowSchema struct to a consumer,
+        its memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self:
+        """
+        Import Field from a C ArrowSchema struct, given its pointer.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_schema__(self) -> Any:
+        """
+        Export to a ArrowSchema PyCapsule
+
+        Unlike _export_to_c, this will not leak memory if the capsule is not used.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, schema) -> Self:
+        """
+        Import a Field from a ArrowSchema PyCapsule
+
+        Parameters
+        ----------
+        schema : PyCapsule
+            A valid PyCapsule with name 'arrow_schema' containing an
+            ArrowSchema pointer.
+        """
+
+class Schema(_Weakrefable):
+    """
+    A named collection of types a.k.a schema. A schema defines the
+    column names and types in a record batch or table data structure.
+    They also contain metadata about the columns. For example, schemas
+    converted from Pandas contain metadata about their original Pandas
+    types so they can be converted back to the same types.
+
+    Warnings
+    --------
+    Do not call this class's constructor directly. Instead use
+    :func:`pyarrow.schema` factory function which makes a new Arrow
+    Schema object.
+
+    Examples
+    --------
+    Create a new Arrow Schema object:
+
+    >>> import pyarrow as pa
+    >>> pa.schema([("some_int", pa.int32()), ("some_string", pa.string())])
+    some_int: int32
+    some_string: string
+
+    Create Arrow Schema with metadata:
+
+    >>> pa.schema(
+    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"},
+    ... )
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+    """
+
+    def __len__(self) -> int: ...
+    def __getitem__(self, key: str) -> Field: ...
+    _field = __getitem__  # pyright: ignore[reportUnknownVariableType]
+    def __iter__(self) -> Iterator[Field]: ...
+    def __hash__(self) -> int: ...
+    def __sizeof__(self) -> int: ...
+    @property
+    def pandas_metadata(self) -> dict:
+        """
+        Return deserialized-from-JSON pandas metadata field (if it exists)
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> schema = pa.Table.from_pandas(df).schema
+
+        Select pandas metadata field from Arrow Schema:
+
+        >>> schema.pandas_metadata
+        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, 'stop': 4, 'step': 1}], ...
+        """
+    @property
+    def names(self) -> list[str]:
+        """
+        The schema's field names.
+
+        Returns
+        -------
+        list of str
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Get the names of the schema's fields:
+
+        >>> schema.names
+        ['n_legs', 'animals']
+        """
+    @property
+    def types(self) -> list[DataType]:
+        """
+        The schema's field types.
+
+        Returns
+        -------
+        list of DataType
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Get the types of the schema's fields:
+
+        >>> schema.types
+        [DataType(int64), DataType(string)]
+        """
+    @property
+    def metadata(self) -> dict[bytes, bytes]:
+        """
+        The schema's metadata (if any is set).
+
+        Returns
+        -------
+        metadata: dict or None
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+
+        Get the metadata of the schema's fields:
+
+        >>> schema.metadata
+        {b'n_legs': b'Number of legs per animal'}
+        """
+    def empty_table(self) -> Table:
+        """
+        Provide an empty table according to the schema.
+
+        Returns
+        -------
+        table: pyarrow.Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Create an empty table with schema's fields:
+
+        >>> schema.empty_table()
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[]]
+        animals: [[]]
+        """
+    def equals(self, other: Schema, check_metadata: bool = False) -> bool:
+        """
+        Test if this schema is equal to the other
+
+        Parameters
+        ----------
+        other :  pyarrow.Schema
+        check_metadata : bool, default False
+            Key/value metadata must be equal too
+
+        Returns
+        -------
+        is_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema1 = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> schema2 = pa.schema([("some_int", pa.int32()), ("some_string", pa.string())])
+
+        Test two equal schemas:
+
+        >>> schema1.equals(schema1)
+        True
+
+        Test two unequal schemas:
+
+        >>> schema1.equals(schema2)
+        False
+        """
+    @classmethod
+    def from_pandas(cls, df: pd.DataFrame, preserve_index: bool | None = None) -> Schema:
+        """
+        Returns implied schema from dataframe
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+        preserve_index : bool, default True
+            Whether to store the index as an additional column (or columns, for
+            MultiIndex) in the resulting `Table`.
+            The default of None will store the index as a column, except for
+            RangeIndex which is stored as metadata only. Use
+            ``preserve_index=True`` to force it to be stored as a column.
+
+        Returns
+        -------
+        pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df = pd.DataFrame({"int": [1, 2], "str": ["a", "b"]})
+
+        Create an Arrow Schema from the schema of a pandas dataframe:
+
+        >>> pa.Schema.from_pandas(df)
+        int: int64
+        str: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, ...
+        """
+    def field(self, i: int | str | bytes) -> Field:
+        """
+        Select a field by its column name or numeric index.
+
+        Parameters
+        ----------
+        i : int or string
+
+        Returns
+        -------
+        pyarrow.Field
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Select the second field:
+
+        >>> schema.field(1)
+        pyarrow.Field<animals: string>
+
+        Select the field of the column named 'n_legs':
+
+        >>> schema.field("n_legs")
+        pyarrow.Field<n_legs: int64>
+        """
+    @deprecated("Use 'field' instead")
+    def field_by_name(self, name: str) -> Field:
+        """
+        DEPRECATED
+
+        Parameters
+        ----------
+        name : str
+
+        Returns
+        -------
+        field: pyarrow.Field
+        """
+    def get_field_index(self, name: str) -> int:
+        """
+        Return index of the unique field with the given name.
+
+        Parameters
+        ----------
+        name : str
+            The name of the field to look up.
+
+        Returns
+        -------
+        index : int
+            The index of the field with the given name; -1 if the
+            name isn't found or there are several fields with the given
+            name.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Get the index of the field named 'animals':
+
+        >>> schema.get_field_index("animals")
+        1
+
+        Index in case of several fields with the given name:
+
+        >>> schema = pa.schema(
+        ...     [
+        ...         pa.field("n_legs", pa.int64()),
+        ...         pa.field("animals", pa.string()),
+        ...         pa.field("animals", pa.bool_()),
+        ...     ],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> schema.get_field_index("animals")
+        -1
+        """
+    def get_all_field_indices(self, name: str) -> list[int]:
+        """
+        Return sorted list of indices for the fields with the given name.
+
+        Parameters
+        ----------
+        name : str
+            The name of the field to look up.
+
+        Returns
+        -------
+        indices : List[int]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema(
+        ...     [
+        ...         pa.field("n_legs", pa.int64()),
+        ...         pa.field("animals", pa.string()),
+        ...         pa.field("animals", pa.bool_()),
+        ...     ]
+        ... )
+
+        Get the indexes of the fields named 'animals':
+
+        >>> schema.get_all_field_indices("animals")
+        [1, 2]
+        """
+    def append(self, field: Field) -> Schema:
+        """
+        Append a field at the end of the schema.
+
+        In contrast to Python's ``list.append()`` it does return a new
+        object, leaving the original Schema unmodified.
+
+        Parameters
+        ----------
+        field : Field
+
+        Returns
+        -------
+        schema: Schema
+            New object with appended field.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Append a field 'extra' at the end of the schema:
+
+        >>> schema_new = schema.append(pa.field("extra", pa.bool_()))
+        >>> schema_new
+        n_legs: int64
+        animals: string
+        extra: bool
+
+        Original schema is unmodified:
+
+        >>> schema
+        n_legs: int64
+        animals: string
+        """
+    def insert(self, i: int, field: Field) -> Schema:
+        """
+        Add a field at position i to the schema.
+
+        Parameters
+        ----------
+        i : int
+        field : Field
+
+        Returns
+        -------
+        schema: Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Insert a new field on the second position:
+
+        >>> schema.insert(1, pa.field("extra", pa.bool_()))
+        n_legs: int64
+        extra: bool
+        animals: string
+        """
+    def remove(self, i: int) -> Schema:
+        """
+        Remove the field at index i from the schema.
+
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        schema: Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Remove the second field of the schema:
+
+        >>> schema.remove(1)
+        n_legs: int64
+        """
+    def set(self, i: int, field: Field) -> Schema:
+        """
+        Replace a field at position i in the schema.
+
+        Parameters
+        ----------
+        i : int
+        field : Field
+
+        Returns
+        -------
+        schema: Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Replace the second field of the schema with a new field 'extra':
+
+        >>> schema.set(1, pa.field("replaced", pa.bool_()))
+        n_legs: int64
+        replaced: bool
+        """
+    @deprecated("Use 'with_metadata' instead")
+    def add_metadata(self, metadata: dict) -> Schema:
+        """
+        DEPRECATED
+
+        Parameters
+        ----------
+        metadata : dict
+            Keys and values must be string-like / coercible to bytes
+        """
+    def with_metadata(self, metadata: dict) -> Schema:
+        """
+        Add metadata as dict of string keys and values to Schema
+
+        Parameters
+        ----------
+        metadata : dict
+            Keys and values must be string-like / coercible to bytes
+
+        Returns
+        -------
+        schema : pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Add metadata to existing schema field:
+
+        >>> schema.with_metadata({"n_legs": "Number of legs per animal"})
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer:
+        """
+        Write Schema to Buffer as encapsulated IPC message
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+
+        Returns
+        -------
+        serialized : Buffer
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+
+        Write schema to Buffer:
+
+        >>> schema.serialize()
+        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
+        """
+    def remove_metadata(self) -> Schema:
+        """
+        Create new schema without metadata, if any
+
+        Returns
+        -------
+        schema : pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Create a new schema with removing the metadata from the original:
+
+        >>> schema.remove_metadata()
+        n_legs: int64
+        animals: string
+        """
+    def to_string(
+        self,
+        truncate_metadata: bool = True,
+        show_field_metadata: bool = True,
+        show_schema_metadata: bool = True,
+    ) -> str:
+        """
+        Return human-readable representation of Schema
+
+        Parameters
+        ----------
+        truncate_metadata : boolean, default True
+            Limit metadata key/value display to a single line of ~80 characters
+            or less
+        show_field_metadata : boolean, default True
+            Display Field-level KeyValueMetadata
+        show_schema_metadata : boolean, default True
+            Display Schema-level KeyValueMetadata
+
+        Returns
+        -------
+        str : the formatted output
+        """
+    def _export_to_c(self, out_ptr: int) -> None:
+        """
+        Export to a C ArrowSchema struct, given its pointer.
+
+        Be careful: if you don't pass the ArrowSchema struct to a consumer,
+        its memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Schema:
+        """
+        Import Schema from a C ArrowSchema struct, given its pointer.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_schema__(self) -> Any:
+        """
+        Export to a ArrowSchema PyCapsule
+
+        Unlike _export_to_c, this will not leak memory if the capsule is not used.
+        """
+    @staticmethod
+    def _import_from_c_capsule(schema: Any) -> Schema:
+        """
+        Import a Schema from a ArrowSchema PyCapsule
+
+        Parameters
+        ----------
+        schema : PyCapsule
+            A valid PyCapsule with name 'arrow_schema' containing an
+            ArrowSchema pointer.
+        """
+
+def unify_schemas(
+    schemas: list[Schema], *, promote_options: Literal["default", "permissive"] = "default"
+) -> Schema:
+    """
+    Unify schemas by merging fields by name.
+
+    The resulting schema will contain the union of fields from all schemas.
+    Fields with the same name will be merged. Note that two fields with
+    different types will fail merging by default.
+
+    - The unified field will inherit the metadata from the schema where
+        that field is first defined.
+    - The first N fields in the schema will be ordered the same as the
+        N fields in the first schema.
+
+    The resulting schema will inherit its metadata from the first input
+    schema.
+
+    Parameters
+    ----------
+    schemas : list of Schema
+        Schemas to merge into a single one.
+    promote_options : str, default default
+        Accepts strings "default" and "permissive".
+        Default: null and only null can be unified with another type.
+        Permissive: types are promoted to the greater common denominator.
+
+    Returns
+    -------
+    Schema
+
+    Raises
+    ------
+    ArrowInvalid :
+        If any input schema contains fields with duplicate names.
+        If Fields of the same name are not mergeable.
+    """
+
+@overload
+def field(name: SupportArrowSchema) -> Field[Any]: ...
+@overload
+def field(
+    name: str, type: _DataTypeT, nullable: bool = ..., metadata: dict[Any, Any] | None = None
+) -> Field[_DataTypeT]: ...
+def field(*args, **kwargs):
+    """
+    Create a pyarrow.Field instance.
+
+    Parameters
+    ----------
+    name : str or bytes
+        Name of the field.
+        Alternatively, you can also pass an object that implements the Arrow
+        PyCapsule Protocol for schemas (has an ``__arrow_c_schema__`` method).
+    type : pyarrow.DataType or str
+        Arrow datatype of the field or a string matching one.
+    nullable : bool, default True
+        Whether the field's values are nullable.
+    metadata : dict, default None
+        Optional field metadata, the keys and values must be coercible to
+        bytes.
+
+    Returns
+    -------
+    field : pyarrow.Field
+
+    Examples
+    --------
+    Create an instance of pyarrow.Field:
+
+    >>> import pyarrow as pa
+    >>> pa.field("key", pa.int32())
+    pyarrow.Field<key: int32>
+    >>> pa.field("key", pa.int32(), nullable=False)
+    pyarrow.Field<key: int32 not null>
+
+    >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+    >>> field
+    pyarrow.Field<key: int32>
+    >>> field.metadata
+    {b'key': b'Something important'}
+
+    Use the field to create a struct type:
+
+    >>> pa.struct([field])
+    StructType(struct<key: int32>)
+
+    A str can also be passed for the type parameter:
+
+    >>> pa.field("key", "int32")
+    pyarrow.Field<key: int32>
+    """
+
+def null() -> NullType:
+    """
+    Create instance of null type.
+
+    Examples
+    --------
+    Create an instance of a null type:
+
+    >>> import pyarrow as pa
+    >>> pa.null()
+    DataType(null)
+    >>> print(pa.null())
+    null
+
+    Create a ``Field`` type with a null type and a name:
+
+    >>> pa.field("null_field", pa.null())
+    pyarrow.Field<null_field: null>
+    """
+
+def bool_() -> BoolType:
+    """
+    Create instance of boolean type.
+
+    Examples
+    --------
+    Create an instance of a boolean type:
+
+    >>> import pyarrow as pa
+    >>> pa.bool_()
+    DataType(bool)
+    >>> print(pa.bool_())
+    bool
+
+    Create a ``Field`` type with a boolean type
+    and a name:
+
+    >>> pa.field("bool_field", pa.bool_())
+    pyarrow.Field<bool_field: bool>
+    """
+
+def uint8() -> UInt8Type:
+    """
+    Create instance of unsigned int8 type.
+
+    Examples
+    --------
+    Create an instance of unsigned int8 type:
+
+    >>> import pyarrow as pa
+    >>> pa.uint8()
+    DataType(uint8)
+    >>> print(pa.uint8())
+    uint8
+
+    Create an array with unsigned int8 type:
+
+    >>> pa.array([0, 1, 2], type=pa.uint8())
+    <pyarrow.lib.UInt8Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def int8() -> Int8Type:
+    """
+    Create instance of signed int8 type.
+
+    Examples
+    --------
+    Create an instance of int8 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int8()
+    DataType(int8)
+    >>> print(pa.int8())
+    int8
+
+    Create an array with int8 type:
+
+    >>> pa.array([0, 1, 2], type=pa.int8())
+    <pyarrow.lib.Int8Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def uint16() -> UInt16Type:
+    """
+    Create instance of unsigned uint16 type.
+
+    Examples
+    --------
+    Create an instance of unsigned int16 type:
+
+    >>> import pyarrow as pa
+    >>> pa.uint16()
+    DataType(uint16)
+    >>> print(pa.uint16())
+    uint16
+
+    Create an array with unsigned int16 type:
+
+    >>> pa.array([0, 1, 2], type=pa.uint16())
+    <pyarrow.lib.UInt16Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def int16() -> Int16Type:
+    """
+    Create instance of signed int16 type.
+
+    Examples
+    --------
+    Create an instance of int16 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int16()
+    DataType(int16)
+    >>> print(pa.int16())
+    int16
+
+    Create an array with int16 type:
+
+    >>> pa.array([0, 1, 2], type=pa.int16())
+    <pyarrow.lib.Int16Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def uint32() -> Uint32Type:
+    """
+    Create instance of unsigned uint32 type.
+
+    Examples
+    --------
+    Create an instance of unsigned int32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.uint32()
+    DataType(uint32)
+    >>> print(pa.uint32())
+    uint32
+
+    Create an array with unsigned int32 type:
+
+    >>> pa.array([0, 1, 2], type=pa.uint32())
+    <pyarrow.lib.UInt32Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def int32() -> Int32Type:
+    """
+    Create instance of signed int32 type.
+
+    Examples
+    --------
+    Create an instance of int32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int32()
+    DataType(int32)
+    >>> print(pa.int32())
+    int32
+
+    Create an array with int32 type:
+
+    >>> pa.array([0, 1, 2], type=pa.int32())
+    <pyarrow.lib.Int32Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def int64() -> Int64Type:
+    """
+    Create instance of signed int64 type.
+
+    Examples
+    --------
+    Create an instance of int64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.int64()
+    DataType(int64)
+    >>> print(pa.int64())
+    int64
+
+    Create an array with int64 type:
+
+    >>> pa.array([0, 1, 2], type=pa.int64())
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def uint64() -> UInt64Type:
+    """
+    Create instance of unsigned uint64 type.
+
+    Examples
+    --------
+    Create an instance of unsigned int64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.uint64()
+    DataType(uint64)
+    >>> print(pa.uint64())
+    uint64
+
+    Create an array with unsigned uint64 type:
+
+    >>> pa.array([0, 1, 2], type=pa.uint64())
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def tzinfo_to_string(tz: dt.tzinfo) -> str:
+    """
+    Converts a time zone object into a string indicating the name of a time
+    zone, one of:
+    * As used in the Olson time zone database (the "tz database" or
+      "tzdata"), such as "America/New_York"
+    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+
+    Parameters
+    ----------
+      tz : datetime.tzinfo
+        Time zone object
+
+    Returns
+    -------
+      name : str
+        Time zone name
+    """
+
+def string_to_tzinfo(name: str) -> dt.tzinfo:
+    """
+    Convert a time zone name into a time zone object.
+
+    Supported input strings are:
+    * As used in the Olson time zone database (the "tz database" or
+      "tzdata"), such as "America/New_York"
+    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+
+    Parameters
+    ----------
+      name: str
+        Time zone name.
+
+    Returns
+    -------
+      tz : datetime.tzinfo
+        Time zone object
+    """
+
+@overload
+def timestamp(unit: _Unit) -> TimestampType[_Unit, _Tz]: ...
+@overload
+def timestamp(unit: _Unit, tz: _Tz) -> TimestampType[_Unit, _Tz]: ...
+def timestamp(*args, **kwargs):
+    """
+    Create instance of timestamp type with resolution and optional time zone.
+
+    Parameters
+    ----------
+    unit : str
+        one of 's' [second], 'ms' [millisecond], 'us' [microsecond], or 'ns'
+        [nanosecond]
+    tz : str, default None
+        Time zone name. None indicates time zone naive
+
+    Examples
+    --------
+    Create an instance of timestamp type:
+
+    >>> import pyarrow as pa
+    >>> pa.timestamp("us")
+    TimestampType(timestamp[us])
+    >>> pa.timestamp("s", tz="America/New_York")
+    TimestampType(timestamp[s, tz=America/New_York])
+    >>> pa.timestamp("s", tz="+07:30")
+    TimestampType(timestamp[s, tz=+07:30])
+
+    Use timestamp type when creating a scalar object:
+
+    >>> from datetime import datetime
+    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp("s", tz="UTC"))
+    <pyarrow.TimestampScalar: '2012-01-01T00:00:00+0000'>
+    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp("us"))
+    <pyarrow.TimestampScalar: '2012-01-01T00:00:00.000000'>
+
+    Returns
+    -------
+    timestamp_type : TimestampType
+    """
+
+def time32(unit: _Time32Unit) -> Time32Type[_Time32Unit]:
+    """
+    Create instance of 32-bit time (time of day) type with unit resolution.
+
+    Parameters
+    ----------
+    unit : str
+        one of 's' [second], or 'ms' [millisecond]
+
+    Returns
+    -------
+    type : pyarrow.Time32Type
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.time32("s")
+    Time32Type(time32[s])
+    >>> pa.time32("ms")
+    Time32Type(time32[ms])
+    """
+
+def time64(unit: _Time64Unit) -> Time64Type[_Time64Unit]:
+    """
+    Create instance of 64-bit time (time of day) type with unit resolution.
+
+    Parameters
+    ----------
+    unit : str
+        One of 'us' [microsecond], or 'ns' [nanosecond].
+
+    Returns
+    -------
+    type : pyarrow.Time64Type
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.time64("us")
+    Time64Type(time64[us])
+    >>> pa.time64("ns")
+    Time64Type(time64[ns])
+    """
+
+def duration(unit: _Unit) -> DurationType[_Unit]:
+    """
+    Create instance of a duration type with unit resolution.
+
+    Parameters
+    ----------
+    unit : str
+        One of 's' [second], 'ms' [millisecond], 'us' [microsecond], or
+        'ns' [nanosecond].
+
+    Returns
+    -------
+    type : pyarrow.DurationType
+
+    Examples
+    --------
+    Create an instance of duration type:
+
+    >>> import pyarrow as pa
+    >>> pa.duration("us")
+    DurationType(duration[us])
+    >>> pa.duration("s")
+    DurationType(duration[s])
+
+    Create an array with duration type:
+
+    >>> pa.array([0, 1, 2], type=pa.duration("s"))
+    <pyarrow.lib.DurationArray object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def month_day_nano_interval() -> MonthDayNanoIntervalType:
+    """
+    Create instance of an interval type representing months, days and
+    nanoseconds between two dates.
+
+    Examples
+    --------
+    Create an instance of an month_day_nano_interval type:
+
+    >>> import pyarrow as pa
+    >>> pa.month_day_nano_interval()
+    DataType(month_day_nano_interval)
+
+    Create a scalar with month_day_nano_interval type:
+
+    >>> pa.scalar((1, 15, -30), type=pa.month_day_nano_interval())
+    <pyarrow.MonthDayNanoIntervalScalar: MonthDayNano(months=1, days=15, nanoseconds=-30)>
+    """
+
+def date32() -> Date32Type:
+    """
+    Create instance of 32-bit date (days since UNIX epoch 1970-01-01).
+
+    Examples
+    --------
+    Create an instance of 32-bit date type:
+
+    >>> import pyarrow as pa
+    >>> pa.date32()
+    DataType(date32[day])
+
+    Create a scalar with 32-bit date type:
+
+    >>> from datetime import date
+    >>> pa.scalar(date(2012, 1, 1), type=pa.date32())
+    <pyarrow.Date32Scalar: datetime.date(2012, 1, 1)>
+    """
+
+def date64() -> Date64Type:
+    """
+    Create instance of 64-bit date (milliseconds since UNIX epoch 1970-01-01).
+
+    Examples
+    --------
+    Create an instance of 64-bit date type:
+
+    >>> import pyarrow as pa
+    >>> pa.date64()
+    DataType(date64[ms])
+
+    Create a scalar with 64-bit date type:
+
+    >>> from datetime import datetime
+    >>> pa.scalar(datetime(2012, 1, 1), type=pa.date64())
+    <pyarrow.Date64Scalar: datetime.date(2012, 1, 1)>
+    """
+
+def float16() -> Float16Type:
+    """
+    Create half-precision floating point type.
+
+    Examples
+    --------
+    Create an instance of float16 type:
+
+    >>> import pyarrow as pa
+    >>> pa.float16()
+    DataType(halffloat)
+    >>> print(pa.float16())
+    halffloat
+
+    Create an array with float16 type:
+
+    >>> arr = np.array([1.5, np.nan], dtype=np.float16)
+    >>> a = pa.array(arr, type=pa.float16())
+    >>> a
+    <pyarrow.lib.HalfFloatArray object at ...>
+    [
+      15872,
+      32256
+    ]
+
+    Note that unlike other float types, if you convert this array
+    to a python list, the types of its elements will be ``np.float16``
+
+    >>> [type(val) for val in a.to_pylist()]
+    [<class 'numpy.float16'>, <class 'numpy.float16'>]
+    """
+
+def float32() -> Float32Type:
+    """
+    Create single-precision floating point type.
+
+    Examples
+    --------
+    Create an instance of float32 type:
+
+    >>> import pyarrow as pa
+    >>> pa.float32()
+    DataType(float)
+    >>> print(pa.float32())
+    float
+
+    Create an array with float32 type:
+
+    >>> pa.array([0.0, 1.0, 2.0], type=pa.float32())
+    <pyarrow.lib.FloatArray object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+def float64() -> Float64Type:
+    """
+    Create double-precision floating point type.
+
+    Examples
+    --------
+    Create an instance of float64 type:
+
+    >>> import pyarrow as pa
+    >>> pa.float64()
+    DataType(double)
+    >>> print(pa.float64())
+    double
+
+    Create an array with float64 type:
+
+    >>> pa.array([0.0, 1.0, 2.0], type=pa.float64())
+    <pyarrow.lib.DoubleArray object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+@overload
+def decimal32(precision: _Precision) -> Decimal32Type[_Precision, Literal[0]]: ...
+@overload
+def decimal32(precision: _Precision, scale: _Scale) -> Decimal32Type[_Precision, _Scale]: ...
+def decimal32(*args, **kwargs):
+    """
+    Create decimal type with precision and scale and 32-bit width.
+
+    Arrow decimals are fixed-point decimal numbers encoded as a scaled
+    integer.  The precision is the number of significant digits that the
+    decimal type can represent; the scale is the number of digits after
+    the decimal point (note the scale can be negative).
+
+    As an example, ``decimal32(7, 3)`` can exactly represent the numbers
+    1234.567 and -1234.567 (encoded internally as the 32-bit integers
+    1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
+
+    ``decimal32(5, -3)`` can exactly represent the number 12345000
+    (encoded internally as the 32-bit integer 12345), but neither
+    123450000 nor 1234500.
+
+    If you need a precision higher than 9 significant digits, consider
+    using ``decimal64``, ``decimal128``, or ``decimal256``.
+
+    Parameters
+    ----------
+    precision : int
+        Must be between 1 and 9
+    scale : int
+
+    Returns
+    -------
+    decimal_type : Decimal32Type
+
+    Examples
+    --------
+    Create an instance of decimal type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal32(5, 2)
+    Decimal32Type(decimal32(5, 2))
+
+    Create an array with decimal type:
+
+    >>> import decimal
+    >>> a = decimal.Decimal("123.45")
+    >>> pa.array([a], pa.decimal32(5, 2))
+    <pyarrow.lib.Decimal32Array object at ...>
+    [
+      123.45
+    ]
+    """
+
+@overload
+def decimal64(precision: _Precision) -> Decimal64Type[_Precision, Literal[0]]: ...
+@overload
+def decimal64(precision: _Precision, scale: _Scale) -> Decimal64Type[_Precision, _Scale]: ...
+def decimal64(*args, **kwargs):
+    """
+    Create decimal type with precision and scale and 64-bit width.
+
+    Arrow decimals are fixed-point decimal numbers encoded as a scaled
+    integer.  The precision is the number of significant digits that the
+    decimal type can represent; the scale is the number of digits after
+    the decimal point (note the scale can be negative).
+
+    As an example, ``decimal64(7, 3)`` can exactly represent the numbers
+    1234.567 and -1234.567 (encoded internally as the 64-bit integers
+    1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
+
+    ``decimal64(5, -3)`` can exactly represent the number 12345000
+    (encoded internally as the 64-bit integer 12345), but neither
+    123450000 nor 1234500.
+
+    If you need a precision higher than 18 significant digits, consider
+    using ``decimal128``, or ``decimal256``.
+
+    Parameters
+    ----------
+    precision : int
+        Must be between 1 and 18
+    scale : int
+
+    Returns
+    -------
+    decimal_type : Decimal64Type
+
+    Examples
+    --------
+    Create an instance of decimal type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal64(5, 2)
+    Decimal64Type(decimal64(5, 2))
+
+    Create an array with decimal type:
+
+    >>> import decimal
+    >>> a = decimal.Decimal("123.45")
+    >>> pa.array([a], pa.decimal64(5, 2))
+    <pyarrow.lib.Decimal64Array object at ...>
+    [
+      123.45
+    ]
+    """
+
+@overload
+def decimal128(precision: _Precision) -> Decimal128Type[_Precision, Literal[0]]: ...
+@overload
+def decimal128(precision: _Precision, scale: _Scale) -> Decimal128Type[_Precision, _Scale]: ...
+def decimal128(*args, **kwargs):
+    """
+    Create decimal type with precision and scale and 128-bit width.
+
+    Arrow decimals are fixed-point decimal numbers encoded as a scaled
+    integer.  The precision is the number of significant digits that the
+    decimal type can represent; the scale is the number of digits after
+    the decimal point (note the scale can be negative).
+
+    As an example, ``decimal128(7, 3)`` can exactly represent the numbers
+    1234.567 and -1234.567 (encoded internally as the 128-bit integers
+    1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
+
+    ``decimal128(5, -3)`` can exactly represent the number 12345000
+    (encoded internally as the 128-bit integer 12345), but neither
+    123450000 nor 1234500.
+
+    If you need a precision higher than 38 significant digits, consider
+    using ``decimal256``.
+
+    Parameters
+    ----------
+    precision : int
+        Must be between 1 and 38
+    scale : int
+
+    Returns
+    -------
+    decimal_type : Decimal128Type
+
+    Examples
+    --------
+    Create an instance of decimal type:
+
+    >>> import pyarrow as pa
+    >>> pa.decimal128(5, 2)
+    Decimal128Type(decimal128(5, 2))
+
+    Create an array with decimal type:
+
+    >>> import decimal
+    >>> a = decimal.Decimal("123.45")
+    >>> pa.array([a], pa.decimal128(5, 2))
+    <pyarrow.lib.Decimal128Array object at ...>
+    [
+      123.45
+    ]
+    """
+
+@overload
+def decimal256(precision: _Precision) -> Decimal256Type[_Precision, Literal[0]]: ...
+@overload
+def decimal256(precision: _Precision, scale: _Scale) -> Decimal256Type[_Precision, _Scale]: ...
+def decimal256(*args, **kwargs):
+    """
+    Create decimal type with precision and scale and 256-bit width.
+
+    Arrow decimals are fixed-point decimal numbers encoded as a scaled
+    integer.  The precision is the number of significant digits that the
+    decimal type can represent; the scale is the number of digits after
+    the decimal point (note the scale can be negative).
+
+    For most use cases, the maximum precision offered by ``decimal128``
+    is sufficient, and it will result in a more compact and more efficient
+    encoding.  ``decimal256`` is useful if you need a precision higher
+    than 38 significant digits.
+
+    Parameters
+    ----------
+    precision : int
+        Must be between 1 and 76
+    scale : int
+
+    Returns
+    -------
+    decimal_type : Decimal256Type
+    """
+
+def string() -> StringType:
+    """
+    Create UTF8 variable-length string type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.string()
+    DataType(string)
+
+    and use the string type to create an array:
+
+    >>> pa.array(["foo", "bar", "baz"], type=pa.string())
+    <pyarrow.lib.StringArray object at ...>
+    [
+      "foo",
+      "bar",
+      "baz"
+    ]
+    """
+
+utf8 = string
+"""
+Alias for string().
+
+Examples
+--------
+Create an instance of a string type:
+
+>>> import pyarrow as pa
+>>> pa.utf8()
+DataType(string)
+
+and use the string type to create an array:
+
+>>> pa.array(['foo', 'bar', 'baz'], type=pa.utf8())
+<pyarrow.lib.StringArray object at ...>
+[
+    "foo",
+    "bar",
+    "baz"
+]
+"""
+
+@overload
+def binary(length: Literal[-1] = ...) -> BinaryType: ...
+@overload
+def binary(length: int) -> FixedSizeBinaryType: ...
+def binary(length):
+    """
+    Create variable-length or fixed size binary type.
+
+    Parameters
+    ----------
+    length : int, optional, default -1
+        If length == -1 then return a variable length binary type. If length is
+        greater than or equal to 0 then return a fixed size binary type of
+        width `length`.
+
+    Examples
+    --------
+    Create an instance of a variable-length binary type:
+
+    >>> import pyarrow as pa
+    >>> pa.binary()
+    DataType(binary)
+
+    and use the variable-length binary type to create an array:
+
+    >>> pa.array(["foo", "bar", "baz"], type=pa.binary())
+    <pyarrow.lib.BinaryArray object at ...>
+    [
+      666F6F,
+      626172,
+      62617A
+    ]
+
+    Create an instance of a fixed-size binary type:
+
+    >>> pa.binary(3)
+    FixedSizeBinaryType(fixed_size_binary[3])
+
+    and use the fixed-length binary type to create an array:
+
+    >>> pa.array(["foo", "bar", "baz"], type=pa.binary(3))
+    <pyarrow.lib.FixedSizeBinaryArray object at ...>
+    [
+      666F6F,
+      626172,
+      62617A
+    ]
+    """
+
+def large_binary() -> LargeBinaryType:
+    """
+    Create large variable-length binary type.
+
+    This data type may not be supported by all Arrow implementations.  Unless
+    you need to represent data larger than 2GB, you should prefer binary().
+
+    Examples
+    --------
+    Create an instance of large variable-length binary type:
+
+    >>> import pyarrow as pa
+    >>> pa.large_binary()
+    DataType(large_binary)
+
+    and use the type to create an array:
+
+    >>> pa.array(["foo", "bar", "baz"], type=pa.large_binary())
+    <pyarrow.lib.LargeBinaryArray object at ...>
+    [
+      666F6F,
+      626172,
+      62617A
+    ]
+    """
+
+def large_string() -> LargeStringType:
+    """
+    Create large UTF8 variable-length string type.
+
+    This data type may not be supported by all Arrow implementations.  Unless
+    you need to represent data larger than 2GB, you should prefer string().
+
+    Examples
+    --------
+    Create an instance of large UTF8 variable-length binary type:
+
+    >>> import pyarrow as pa
+    >>> pa.large_string()
+    DataType(large_string)
+
+    and use the type to create an array:
+
+    >>> pa.array(["foo", "bar"] * 50, type=pa.large_string())
+    <pyarrow.lib.LargeStringArray object at ...>
+    [
+      "foo",
+      "bar",
+      ...
+      "foo",
+      "bar"
+    ]
+    """
+
+large_utf8 = large_string
+"""
+Alias for large_string().
+
+Examples
+--------
+Create an instance of large UTF8 variable-length binary type:
+
+>>> import pyarrow as pa
+>>> pa.large_utf8()
+DataType(large_string)
+
+and use the type to create an array:
+
+>>> pa.array(['foo', 'bar'] * 50, type=pa.large_utf8())
+<pyarrow.lib.LargeStringArray object at ...>
+[
+    "foo",
+    "bar",
+    ...
+    "foo",
+    "bar"
+]
+"""
+
+def binary_view() -> BinaryViewType:
+    """
+    Create a variable-length binary view type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.binary_view()
+    DataType(binary_view)
+    """
+
+def string_view() -> StringViewType:
+    """
+    Create UTF8 variable-length string view type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.string_view()
+    DataType(string_view)
+    """
+
+@overload
+def list_(
+    value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] = ...
+) -> ListType[_DataTypeT]: ...
+@overload
+def list_(
+    value_type: _DataTypeT | Field[_DataTypeT], list_size: _Size
+) -> FixedSizeListType[_DataTypeT, _Size]: ...
+def list_(*args, **kwargs):
+    """
+    Create ListType instance from child data type or field.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+    list_size : int, optional, default -1
+        If length == -1 then return a variable length list type. If length is
+        greater than or equal to 0 then return a fixed size list type.
+
+    Returns
+    -------
+    list_type : DataType
+
+    Examples
+    --------
+    Create an instance of ListType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_(pa.string())
+    ListType(list<item: string>)
+    >>> pa.list_(pa.int32(), 2)
+    FixedSizeListType(fixed_size_list<item: int32>[2])
+
+    Use the ListType to create a scalar:
+
+    >>> pa.scalar(["foo", None], type=pa.list_(pa.string(), 2))
+    <pyarrow.FixedSizeListScalar: ['foo', None]>
+
+    or an array:
+
+    >>> pa.array([[1, 2], [3, 4]], pa.list_(pa.int32(), 2))
+    <pyarrow.lib.FixedSizeListArray object at ...>
+    [
+      [
+        1,
+        2
+      ],
+      [
+        3,
+        4
+      ]
+    ]
+    """
+
+def large_list(value_type: _DataTypeT | Field[_DataTypeT]) -> LargeListType[_DataTypeT]:
+    """
+    Create LargeListType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations.
+    Unless you need to represent data larger than 2**31 elements, you should
+    prefer list_().
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_type : DataType
+
+    Examples
+    --------
+    Create an instance of LargeListType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list(pa.int8())
+    LargeListType(large_list<item: int8>)
+
+    Use the LargeListType to create an array:
+
+    >>> pa.array([[-1, 3]] * 5, type=pa.large_list(pa.int8()))
+    <pyarrow.lib.LargeListArray object at ...>
+    [
+      [
+        -1,
+        3
+      ],
+      [
+        -1,
+        3
+      ],
+    ...
+    """
+
+def list_view(value_type: _DataTypeT | Field[_DataTypeT]) -> ListViewType[_DataTypeT]:
+    """
+    Create ListViewType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations
+    because it is an alternative to the ListType.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_view_type : DataType
+
+    Examples
+    --------
+    Create an instance of ListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_view(pa.string())
+    ListViewType(list_view<item: string>)
+    """
+
+def large_list_view(
+    value_type: _DataTypeT | Field[_DataTypeT],
+) -> LargeListViewType[_DataTypeT]:
+    """
+    Create LargeListViewType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations
+    because it is an alternative to the ListType.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_view_type : DataType
+
+    Examples
+    --------
+    Create an instance of LargeListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list_view(pa.int8())
+    LargeListViewType(large_list_view<item: int8>)
+    """
+
+@overload
+def map_(key_type: _K, item_type: _ValueT) -> MapType[_K, _ValueT, _Ordered]: ...
+@overload
+def map_(
+    key_type: _K, item_type: _ValueT, key_sorted: _Ordered
+) -> MapType[_K, _ValueT, _Ordered]: ...
+def map_(*args, **kwargs):
+    """
+    Create MapType instance from key and item data types or fields.
+
+    Parameters
+    ----------
+    key_type : DataType or Field
+    item_type : DataType or Field
+    keys_sorted : bool
+
+    Returns
+    -------
+    map_type : DataType
+
+    Examples
+    --------
+    Create an instance of MapType:
+
+    >>> import pyarrow as pa
+    >>> pa.map_(pa.string(), pa.int32())
+    MapType(map<string, int32>)
+    >>> pa.map_(pa.string(), pa.int32(), keys_sorted=True)
+    MapType(map<string, int32, keys_sorted>)
+
+    Use MapType to create an array:
+
+    >>> data = [[{"key": "a", "value": 1}, {"key": "b", "value": 2}], [{"key": "c", "value": 3}]]
+    >>> pa.array(data, type=pa.map_(pa.string(), pa.int32(), keys_sorted=True))
+    <pyarrow.lib.MapArray object at ...>
+    [
+      keys:
+      [
+        "a",
+        "b"
+      ]
+      values:
+      [
+        1,
+        2
+      ],
+      keys:
+      [
+        "c"
+      ]
+      values:
+      [
+        3
+      ]
+    ]
+    """
+
+@overload
+def dictionary(
+    index_type: _IndexT, value_type: _BasicValueT
+) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
+@overload
+def dictionary(
+    index_type: _IndexT, value_type: _BasicValueT, ordered: _Ordered
+) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
+def dictionary(*args, **kwargs):
+    """
+    Dictionary (categorical, or simply encoded) type.
+
+    Parameters
+    ----------
+    index_type : DataType
+    value_type : DataType
+    ordered : bool
+
+    Returns
+    -------
+    type : DictionaryType
+
+    Examples
+    --------
+    Create an instance of dictionary type:
+
+    >>> import pyarrow as pa
+    >>> pa.dictionary(pa.int64(), pa.utf8())
+    DictionaryType(dictionary<values=string, indices=int64, ordered=0>)
+
+    Use dictionary type to create an array:
+
+    >>> pa.array(["a", "b", None, "d"], pa.dictionary(pa.int64(), pa.utf8()))
+    <pyarrow.lib.DictionaryArray object at ...>
+    ...
+    -- dictionary:
+      [
+        "a",
+        "b",
+        "d"
+      ]
+    -- indices:
+      [
+        0,
+        1,
+        null,
+        2
+      ]
+    """
+
+def struct(
+    fields: Iterable[Field[Any] | tuple[str, Field[Any]] | tuple[str, DataType]]
+    | Mapping[str, Field[Any]],
+) -> StructType:
+    """
+    Create StructType instance from fields.
+
+    A struct is a nested type parameterized by an ordered sequence of types
+    (which can all be distinct), called its fields.
+
+    Parameters
+    ----------
+    fields : iterable of Fields or tuples, or mapping of strings to DataTypes
+        Each field must have a UTF8-encoded name, and these field names are
+        part of the type metadata.
+
+    Examples
+    --------
+    Create an instance of StructType from an iterable of tuples:
+
+    >>> import pyarrow as pa
+    >>> fields = [
+    ...     ("f1", pa.int32()),
+    ...     ("f2", pa.string()),
+    ... ]
+    >>> struct_type = pa.struct(fields)
+    >>> struct_type
+    StructType(struct<f1: int32, f2: string>)
+
+    Retrieve a field from a StructType:
+
+    >>> struct_type[0]
+    pyarrow.Field<f1: int32>
+    >>> struct_type["f1"]
+    pyarrow.Field<f1: int32>
+
+    Create an instance of StructType from an iterable of Fields:
+
+    >>> fields = [
+    ...     pa.field("f1", pa.int32()),
+    ...     pa.field("f2", pa.string(), nullable=False),
+    ... ]
+    >>> pa.struct(fields)
+    StructType(struct<f1: int32, f2: string not null>)
+
+    Returns
+    -------
+    type : DataType
+    """
+
+def sparse_union(
+    child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> SparseUnionType:
+    """
+    Create SparseUnionType from child fields.
+
+    A sparse union is a nested type where each logical value is taken from
+    a single child.  A buffer of 8-bit type ids indicates which child
+    a given logical value is to be taken from.
+
+    In a sparse union, each child array should have the same length as the
+    union array, regardless of the actual number of union values that
+    refer to it.
+
+    Parameters
+    ----------
+    child_fields : sequence of Field values
+        Each field must have a UTF8-encoded name, and these field names are
+        part of the type metadata.
+    type_codes : list of integers, default None
+
+    Returns
+    -------
+    type : SparseUnionType
+    """
+
+def dense_union(
+    child_fields: list[Field[Any]], type_codes: list[int] | None = None
+) -> DenseUnionType:
+    """
+    Create DenseUnionType from child fields.
+
+    A dense union is a nested type where each logical value is taken from
+    a single child, at a specific offset.  A buffer of 8-bit type ids
+    indicates which child a given logical value is to be taken from,
+    and a buffer of 32-bit offsets indicates at which physical position
+    in the given child array the logical value is to be taken from.
+
+    Unlike a sparse union, a dense union allows encoding only the child array
+    values which are actually referred to by the union array.  This is
+    counterbalanced by the additional footprint of the offsets buffer, and
+    the additional indirection cost when looking up values.
+
+    Parameters
+    ----------
+    child_fields : sequence of Field values
+        Each field must have a UTF8-encoded name, and these field names are
+        part of the type metadata.
+    type_codes : list of integers, default None
+
+    Returns
+    -------
+    type : DenseUnionType
+    """
+
+@overload
+def union(
+    child_fields: list[Field[Any]], mode: Literal["sparse"], type_codes: list[int] | None = None
+) -> SparseUnionType: ...
+@overload
+def union(
+    child_fields: list[Field[Any]], mode: Literal["dense"], type_codes: list[int] | None = None
+) -> DenseUnionType: ...
+def union(*args, **kwargs):
+    """
+    Create UnionType from child fields.
+
+    A union is a nested type where each logical value is taken from a
+    single child.  A buffer of 8-bit type ids indicates which child
+    a given logical value is to be taken from.
+
+    Unions come in two flavors: sparse and dense
+    (see also `pyarrow.sparse_union` and `pyarrow.dense_union`).
+
+    Parameters
+    ----------
+    child_fields : sequence of Field values
+        Each field must have a UTF8-encoded name, and these field names are
+        part of the type metadata.
+    mode : str
+        Must be 'sparse' or 'dense'
+    type_codes : list of integers, default None
+
+    Returns
+    -------
+    type : UnionType
+    """
+
+def run_end_encoded(
+    run_end_type: _RunEndType, value_type: _BasicValueT
+) -> RunEndEncodedType[_RunEndType, _BasicValueT]:
+    """
+    Create RunEndEncodedType from run-end and value types.
+
+    Parameters
+    ----------
+    run_end_type : pyarrow.DataType
+        The integer type of the run_ends array. Must be 'int16', 'int32', or 'int64'.
+    value_type : pyarrow.DataType
+        The type of the values array.
+
+    Returns
+    -------
+    type : RunEndEncodedType
+    """
+
+def json_(storage_type: DataType = ...) -> JsonType:
+    """
+    Create instance of JSON extension type.
+
+    Parameters
+    ----------
+    storage_type : DataType, default pyarrow.string()
+        The underlying data type. Can be on of the following types:
+        string, large_string, string_view.
+
+    Returns
+    -------
+    type : JsonType
+
+    Examples
+    --------
+    Create an instance of JSON extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.json_(pa.utf8())
+    JsonType(extension<arrow.json>)
+
+    Use the JSON type to create an array:
+
+    >>> pa.array(['{"a": 1}', '{"b": 2}'], type=pa.json_(pa.utf8()))
+    <pyarrow.lib.JsonArray object at ...>
+    [
+      "{"a": 1}",
+      "{"b": 2}"
+    ]
+    """
+
+def uuid() -> UuidType:
+    """
+    Create UuidType instance.
+
+    Returns
+    -------
+    type : UuidType
+    """
+
+def fixed_shape_tensor(
+    value_type: _ValueT,
+    shape: Sequence[int],
+    dim_names: Sequence[str] | None = None,
+    permutation: Sequence[int] | None = None,
+) -> FixedShapeTensorType[_ValueT]:
+    """
+    Create instance of fixed shape tensor extension type with shape and optional
+    names of tensor dimensions and indices of the desired logical
+    ordering of dimensions.
+
+    Parameters
+    ----------
+    value_type : DataType
+        Data type of individual tensor elements.
+    shape : tuple or list of integers
+        The physical shape of the contained tensors.
+    dim_names : tuple or list of strings, default None
+        Explicit names to tensor dimensions.
+    permutation : tuple or list integers, default None
+        Indices of the desired ordering of the original dimensions.
+        The indices contain a permutation of the values ``[0, 1, .., N-1]`` where
+        N is the number of dimensions. The permutation indicates which dimension
+        of the logical layout corresponds to which dimension of the physical tensor.
+        For more information on this parameter see
+        :ref:`fixed_shape_tensor_extension`.
+
+    Examples
+    --------
+    Create an instance of fixed shape tensor extension type:
+
+    >>> import pyarrow as pa
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
+    >>> tensor_type
+    FixedShapeTensorType(extension<arrow.fixed_shape_tensor[value_type=int32, shape=[2,2]]>)
+
+    Inspect the data type:
+
+    >>> tensor_type.value_type
+    DataType(int32)
+    >>> tensor_type.shape
+    [2, 2]
+
+    Create a table with fixed shape tensor extension array:
+
+    >>> arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
+    >>> storage = pa.array(arr, pa.list_(pa.int32(), 4))
+    >>> tensor = pa.ExtensionArray.from_storage(tensor_type, storage)
+    >>> pa.table([tensor], names=["tensor_array"])
+    pyarrow.Table
+    tensor_array: extension<arrow.fixed_shape_tensor[value_type=int32, shape=[2,2]]>
+    ----
+    tensor_array: [[[1,2,3,4],[10,20,30,40],[100,200,300,400]]]
+
+    Create an instance of fixed shape tensor extension type with names
+    of tensor dimensions:
+
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3), dim_names=["C", "H", "W"])
+    >>> tensor_type.dim_names
+    ['C', 'H', 'W']
+
+    Create an instance of fixed shape tensor extension type with
+    permutation:
+
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3), permutation=[0, 2, 1])
+    >>> tensor_type.permutation
+    [0, 2, 1]
+
+    Returns
+    -------
+    type : FixedShapeTensorType
+    """
+
+def bool8() -> Bool8Type:
+    """
+    Create instance of bool8 extension type.
+
+    Examples
+    --------
+    Create an instance of bool8 extension type:
+
+    >>> import pyarrow as pa
+    >>> type = pa.bool8()
+    >>> type
+    Bool8Type(extension<arrow.bool8>)
+
+    Inspect the data type:
+
+    >>> type.storage_type
+    DataType(int8)
+
+    Create a table with a bool8 array:
+
+    >>> arr = [-1, 0, 1, 2, None]
+    >>> storage = pa.array(arr, pa.int8())
+    >>> other = pa.ExtensionArray.from_storage(type, storage)
+    >>> pa.table([other], names=["unknown_col"])
+    pyarrow.Table
+    unknown_col: extension<arrow.bool8>
+    ----
+    unknown_col: [[-1,0,1,2,null]]
+
+    Returns
+    -------
+    type : Bool8Type
+    """
+
+def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType:
+    """
+    Create instance of opaque extension type.
+
+    Parameters
+    ----------
+    storage_type : DataType
+        The underlying data type.
+    type_name : str
+        The name of the type in the external system.
+    vendor_name : str
+        The name of the external system.
+
+    Examples
+    --------
+    Create an instance of an opaque extension type:
+
+    >>> import pyarrow as pa
+    >>> type = pa.opaque(pa.binary(), "other", "jdbc")
+    >>> type
+    OpaqueType(extension<arrow.opaque[storage_type=binary, type_name=other, vendor_name=jdbc]>)
+
+    Inspect the data type:
+
+    >>> type.storage_type
+    DataType(binary)
+    >>> type.type_name
+    'other'
+    >>> type.vendor_name
+    'jdbc'
+
+    Create a table with an opaque array:
+
+    >>> arr = [None, b"foobar"]
+    >>> storage = pa.array(arr, pa.binary())
+    >>> other = pa.ExtensionArray.from_storage(type, storage)
+    >>> pa.table([other], names=["unknown_col"])
+    pyarrow.Table
+    unknown_col: extension<arrow.opaque[storage_type=binary, type_name=other, vendor_name=jdbc]>
+    ----
+    unknown_col: [[null,666F6F626172]]
+
+    Returns
+    -------
+    type : OpaqueType
+    """
+
+@overload
+def type_for_alias(name: Literal["null"]) -> NullType: ...
+@overload
+def type_for_alias(name: Literal["bool", "boolean"]) -> BoolType: ...
+@overload
+def type_for_alias(name: Literal["i1", "int8"]) -> Int8Type: ...
+@overload
+def type_for_alias(name: Literal["i2", "int16"]) -> Int16Type: ...
+@overload
+def type_for_alias(name: Literal["i4", "int32"]) -> Int32Type: ...
+@overload
+def type_for_alias(name: Literal["i8", "int64"]) -> Int64Type: ...
+@overload
+def type_for_alias(name: Literal["u1", "uint8"]) -> UInt8Type: ...
+@overload
+def type_for_alias(name: Literal["u2", "uint16"]) -> UInt16Type: ...
+@overload
+def type_for_alias(name: Literal["u4", "uint32"]) -> Uint32Type: ...
+@overload
+def type_for_alias(name: Literal["u8", "uint64"]) -> UInt64Type: ...
+@overload
+def type_for_alias(name: Literal["f2", "halffloat", "float16"]) -> Float16Type: ...
+@overload
+def type_for_alias(name: Literal["f4", "float", "float32"]) -> Float32Type: ...
+@overload
+def type_for_alias(name: Literal["f8", "double", "float64"]) -> Float64Type: ...
+@overload
+def type_for_alias(name: Literal["string", "str", "utf8"]) -> StringType: ...
+@overload
+def type_for_alias(name: Literal["binary"]) -> BinaryType: ...
+@overload
+def type_for_alias(
+    name: Literal["large_string", "large_str", "large_utf8"],
+) -> LargeStringType: ...
+@overload
+def type_for_alias(name: Literal["large_binary"]) -> LargeBinaryType: ...
+@overload
+def type_for_alias(name: Literal["binary_view"]) -> BinaryViewType: ...
+@overload
+def type_for_alias(name: Literal["string_view"]) -> StringViewType: ...
+@overload
+def type_for_alias(name: Literal["date32", "date32[day]"]) -> Date32Type: ...
+@overload
+def type_for_alias(name: Literal["date64", "date64[ms]"]) -> Date64Type: ...
+@overload
+def type_for_alias(name: Literal["time32[s]"]) -> Time32Type[Literal["s"]]: ...
+@overload
+def type_for_alias(name: Literal["time32[ms]"]) -> Time32Type[Literal["ms"]]: ...
+@overload
+def type_for_alias(name: Literal["time64[us]"]) -> Time64Type[Literal["us"]]: ...
+@overload
+def type_for_alias(name: Literal["time64[ns]"]) -> Time64Type[Literal["ns"]]: ...
+@overload
+def type_for_alias(name: Literal["timestamp[s]"]) -> TimestampType[Literal["s"], Any]: ...
+@overload
+def type_for_alias(name: Literal["timestamp[ms]"]) -> TimestampType[Literal["ms"], Any]: ...
+@overload
+def type_for_alias(name: Literal["timestamp[us]"]) -> TimestampType[Literal["us"], Any]: ...
+@overload
+def type_for_alias(name: Literal["timestamp[ns]"]) -> TimestampType[Literal["ns"], Any]: ...
+@overload
+def type_for_alias(name: Literal["duration[s]"]) -> DurationType[Literal["s"]]: ...
+@overload
+def type_for_alias(name: Literal["duration[ms]"]) -> DurationType[Literal["ms"]]: ...
+@overload
+def type_for_alias(name: Literal["duration[us]"]) -> DurationType[Literal["us"]]: ...
+@overload
+def type_for_alias(name: Literal["duration[ns]"]) -> DurationType[Literal["ns"]]: ...
+@overload
+def type_for_alias(name: Literal["month_day_nano_interval"]) -> MonthDayNanoIntervalType: ...
+def type_for_alias(name):
+    """
+    Return DataType given a string alias if one exists.
+
+    Parameters
+    ----------
+    name : str
+        The alias of the DataType that should be retrieved.
+
+    Returns
+    -------
+    type : DataType
+    """
+
+@overload
+def ensure_type(ty: None, allow_none: Literal[True]) -> None: ...
+@overload
+def ensure_type(ty: _DataTypeT) -> _DataTypeT: ...
+@overload
+def ensure_type(ty: Literal["null"]) -> NullType: ...
+@overload
+def ensure_type(ty: Literal["bool", "boolean"]) -> BoolType: ...
+@overload
+def ensure_type(ty: Literal["i1", "int8"]) -> Int8Type: ...
+@overload
+def ensure_type(ty: Literal["i2", "int16"]) -> Int16Type: ...
+@overload
+def ensure_type(ty: Literal["i4", "int32"]) -> Int32Type: ...
+@overload
+def ensure_type(ty: Literal["i8", "int64"]) -> Int64Type: ...
+@overload
+def ensure_type(ty: Literal["u1", "uint8"]) -> UInt8Type: ...
+@overload
+def ensure_type(ty: Literal["u2", "uint16"]) -> UInt16Type: ...
+@overload
+def ensure_type(ty: Literal["u4", "uint32"]) -> Uint32Type: ...
+@overload
+def ensure_type(ty: Literal["u8", "uint64"]) -> UInt64Type: ...
+@overload
+def ensure_type(ty: Literal["f2", "halffloat", "float16"]) -> Float16Type: ...
+@overload
+def ensure_type(ty: Literal["f4", "float", "float32"]) -> Float32Type: ...
+@overload
+def ensure_type(ty: Literal["f8", "double", "float64"]) -> Float64Type: ...
+@overload
+def ensure_type(ty: Literal["string", "str", "utf8"]) -> StringType: ...
+@overload
+def ensure_type(ty: Literal["binary"]) -> BinaryType: ...
+@overload
+def ensure_type(
+    ty: Literal["large_string", "large_str", "large_utf8"],
+) -> LargeStringType: ...
+@overload
+def ensure_type(ty: Literal["large_binary"]) -> LargeBinaryType: ...
+@overload
+def ensure_type(ty: Literal["binary_view"]) -> BinaryViewType: ...
+@overload
+def ensure_type(ty: Literal["string_view"]) -> StringViewType: ...
+@overload
+def ensure_type(ty: Literal["date32", "date32[day]"]) -> Date32Type: ...
+@overload
+def ensure_type(ty: Literal["date64", "date64[ms]"]) -> Date64Type: ...
+@overload
+def ensure_type(ty: Literal["time32[s]"]) -> Time32Type[Literal["s"]]: ...
+@overload
+def ensure_type(ty: Literal["time32[ms]"]) -> Time32Type[Literal["ms"]]: ...
+@overload
+def ensure_type(ty: Literal["time64[us]"]) -> Time64Type[Literal["us"]]: ...
+@overload
+def ensure_type(ty: Literal["time64[ns]"]) -> Time64Type[Literal["ns"]]: ...
+@overload
+def ensure_type(ty: Literal["timestamp[s]"]) -> TimestampType[Literal["s"], Any]: ...
+@overload
+def ensure_type(ty: Literal["timestamp[ms]"]) -> TimestampType[Literal["ms"], Any]: ...
+@overload
+def ensure_type(ty: Literal["timestamp[us]"]) -> TimestampType[Literal["us"], Any]: ...
+@overload
+def ensure_type(ty: Literal["timestamp[ns]"]) -> TimestampType[Literal["ns"], Any]: ...
+@overload
+def ensure_type(ty: Literal["duration[s]"]) -> DurationType[Literal["s"]]: ...
+@overload
+def ensure_type(ty: Literal["duration[ms]"]) -> DurationType[Literal["ms"]]: ...
+@overload
+def ensure_type(ty: Literal["duration[us]"]) -> DurationType[Literal["us"]]: ...
+@overload
+def ensure_type(ty: Literal["duration[ns]"]) -> DurationType[Literal["ns"]]: ...
+@overload
+def ensure_type(ty: Literal["month_day_nano_interval"]) -> MonthDayNanoIntervalType: ...
+def schema(
+    fields: Iterable[Field[Any]] | Iterable[tuple[str, DataType]] | Mapping[str, DataType],
+    metadata: dict[bytes | str, bytes | str] | None = None,
+) -> Schema:
+    """
+    Construct pyarrow.Schema from collection of fields.
+
+    Parameters
+    ----------
+    fields : iterable of Fields or tuples, or mapping of strings to DataTypes
+        Can also pass an object that implements the Arrow PyCapsule Protocol
+        for schemas (has an ``__arrow_c_schema__`` method).
+    metadata : dict, default None
+        Keys and values must be coercible to bytes.
+
+    Examples
+    --------
+    Create a Schema from iterable of tuples:
+
+    >>> import pyarrow as pa
+    >>> pa.schema(
+    ...     [
+    ...         ("some_int", pa.int32()),
+    ...         ("some_string", pa.string()),
+    ...         pa.field("some_required_string", pa.string(), nullable=False),
+    ...     ]
+    ... )
+    some_int: int32
+    some_string: string
+    some_required_string: string not null
+
+    Create a Schema from iterable of Fields:
+
+    >>> pa.schema([pa.field("some_int", pa.int32()), pa.field("some_string", pa.string())])
+    some_int: int32
+    some_string: string
+
+    DataTypes can also be passed as strings. The following is equivalent to the
+    above example:
+
+    >>> pa.schema([pa.field("some_int", "int32"), pa.field("some_string", "string")])
+    some_int: int32
+    some_string: string
+
+    Or more concisely:
+
+    >>> pa.schema([("some_int", "int32"), ("some_string", "string")])
+    some_int: int32
+    some_string: string
+
+    Returns
+    -------
+    schema : pyarrow.Schema
+    """
+
+def from_numpy_dtype(dtype: np.dtype[Any]) -> DataType:
+    """
+    Convert NumPy dtype to pyarrow.DataType.
+
+    Parameters
+    ----------
+    dtype : the numpy dtype to convert
+
+
+    Examples
+    --------
+    Create a pyarrow DataType from NumPy dtype:
+
+    >>> import pyarrow as pa
+    >>> import numpy as np
+    >>> pa.from_numpy_dtype(np.dtype("float16"))
+    DataType(halffloat)
+    >>> pa.from_numpy_dtype("U")
+    DataType(string)
+    >>> pa.from_numpy_dtype(bool)
+    DataType(bool)
+    >>> pa.from_numpy_dtype(np.str_)
+    DataType(string)
+    """
+
+def is_boolean_value(obj: Any) -> bool:
+    """
+    Check if the object is a boolean.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check
+    """
+
+def is_integer_value(obj: Any) -> bool:
+    """
+    Check if the object is an integer.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check
+    """
+
+def is_float_value(obj: Any) -> bool:
+    """
+    Check if the object is a float.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check
+    """
+
+__all__ = [
+    "_Weakrefable",
+    "_Metadata",
+    "DataType",
+    "_BasicDataType",
+    "NullType",
+    "BoolType",
+    "UInt8Type",
+    "Int8Type",
+    "UInt16Type",
+    "Int16Type",
+    "Uint32Type",
+    "Int32Type",
+    "UInt64Type",
+    "Int64Type",
+    "Float16Type",
+    "Float32Type",
+    "Float64Type",
+    "Date32Type",
+    "Date64Type",
+    "MonthDayNanoIntervalType",
+    "StringType",
+    "LargeStringType",
+    "StringViewType",
+    "BinaryType",
+    "LargeBinaryType",
+    "BinaryViewType",
+    "TimestampType",
+    "Time32Type",
+    "Time64Type",
+    "DurationType",
+    "FixedSizeBinaryType",
+    "Decimal32Type",
+    "Decimal64Type",
+    "Decimal128Type",
+    "Decimal256Type",
+    "ListType",
+    "LargeListType",
+    "ListViewType",
+    "LargeListViewType",
+    "FixedSizeListType",
+    "DictionaryMemo",
+    "DictionaryType",
+    "MapType",
+    "StructType",
+    "UnionType",
+    "SparseUnionType",
+    "DenseUnionType",
+    "RunEndEncodedType",
+    "BaseExtensionType",
+    "ExtensionType",
+    "FixedShapeTensorType",
+    "Bool8Type",
+    "UuidType",
+    "JsonType",
+    "OpaqueType",
+    "PyExtensionType",
+    "UnknownExtensionType",
+    "register_extension_type",
+    "unregister_extension_type",
+    "KeyValueMetadata",
+    "ensure_metadata",
+    "Field",
+    "Schema",
+    "unify_schemas",
+    "field",
+    "null",
+    "bool_",
+    "uint8",
+    "int8",
+    "uint16",
+    "int16",
+    "uint32",
+    "int32",
+    "int64",
+    "uint64",
+    "tzinfo_to_string",
+    "string_to_tzinfo",
+    "timestamp",
+    "time32",
+    "time64",
+    "duration",
+    "month_day_nano_interval",
+    "date32",
+    "date64",
+    "float16",
+    "float32",
+    "float64",
+    "decimal32",
+    "decimal64",
+    "decimal128",
+    "decimal256",
+    "string",
+    "utf8",
+    "binary",
+    "large_binary",
+    "large_string",
+    "large_utf8",
+    "binary_view",
+    "string_view",
+    "list_",
+    "large_list",
+    "list_view",
+    "large_list_view",
+    "map_",
+    "dictionary",
+    "struct",
+    "sparse_union",
+    "dense_union",
+    "union",
+    "run_end_encoded",
+    "json_",
+    "uuid",
+    "fixed_shape_tensor",
+    "bool8",
+    "opaque",
+    "type_for_alias",
+    "ensure_type",
+    "schema",
+    "from_numpy_dtype",
+    "is_boolean_value",
+    "is_integer_value",
+    "is_float_value",
+]
diff --git a/python/pyarrow/_azurefs.pyi b/python/pyarrow/_azurefs.pyi
new file mode 100644
index 00000000000..317943ce20f
--- /dev/null
+++ b/python/pyarrow/_azurefs.pyi
@@ -0,0 +1,74 @@
+from typing import Literal
+
+from ._fs import FileSystem
+
+class AzureFileSystem(FileSystem):
+    """
+    Azure Blob Storage backed FileSystem implementation
+
+    This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a.
+    Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific
+    features will be used when they provide a performance advantage. Azurite emulator is
+    also supported. Note: `/` is the only supported delimiter.
+
+    The storage account is considered the root of the filesystem. When enabled, containers
+    will be created or deleted during relevant directory operations. Obviously, this also
+    requires authentication with the additional permissions.
+
+    By default `DefaultAzureCredential <https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
+    is used for authentication. This means it will try several types of authentication
+    and go with the first one that works. If any authentication parameters are provided when
+    initialising the FileSystem, they will be used instead of the default credential.
+
+    Parameters
+    ----------
+    account_name : str
+        Azure Blob Storage account name. This is the globally unique identifier for the
+        storage account.
+    account_key : str, default None
+        Account key of the storage account. If sas_token and account_key are None the
+        default credential will be used. The parameters account_key and sas_token are
+        mutually exclusive.
+    blob_storage_authority : str, default None
+        hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful
+        for connecting to a local emulator, like Azurite.
+    dfs_storage_authority : str, default None
+        hostname[:port] of the Data Lake Gen 2 Service. Defaults to
+        `.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite.
+    blob_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
+        emulator, like Azurite.
+    dfs_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
+        emulator, like Azurite.
+    sas_token : str, default None
+        SAS token for the storage account, used as an alternative to account_key. If sas_token
+        and account_key are None the default credential will be used. The parameters
+        account_key and sas_token are mutually exclusive.
+
+    Examples
+    --------
+    >>> from pyarrow import fs
+    >>> azure_fs = fs.AzureFileSystem(account_name="myaccount")
+    >>> azurite_fs = fs.AzureFileSystem(
+    ...     account_name="devstoreaccount1",
+    ...     account_key="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
+    ...     blob_storage_authority="127.0.0.1:10000",
+    ...     dfs_storage_authority="127.0.0.1:10000",
+    ...     blob_storage_scheme="http",
+    ...     dfs_storage_scheme="http",
+    ... )
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+
+    def __init__(
+        self,
+        account_name: str,
+        account_key: str | None = None,
+        blob_storage_authority: str | None = None,
+        dfs_storage_authority: str | None = None,
+        blob_storage_schema: Literal["http", "https"] = "https",
+        dfs_storage_schema: Literal["http", "https"] = "https",
+        sas_token: str | None = None,
+    ) -> None: ...
diff --git a/python/pyarrow/_compute.pyi b/python/pyarrow/_compute.pyi
new file mode 100644
index 00000000000..3d61ae42787
--- /dev/null
+++ b/python/pyarrow/_compute.pyi
@@ -0,0 +1,1721 @@
+from typing import (
+    Any,
+    Callable,
+    Iterable,
+    Literal,
+    Sequence,
+    TypeAlias,
+    TypedDict,
+    overload,
+)
+
+from . import lib
+
+_Order: TypeAlias = Literal["ascending", "descending"]
+_Placement: TypeAlias = Literal["at_start", "at_end"]
+
+class Kernel(lib._Weakrefable):
+    """
+    A kernel object.
+
+    Kernels handle the execution of a Function for a certain signature.
+    """
+
+class Function(lib._Weakrefable):
+    """
+    A compute function.
+
+    A function implements a certain logical computation over a range of
+    possible input signatures.  Each signature accepts a range of input
+    types and is implemented by a given Kernel.
+
+    Functions can be of different kinds:
+
+    * "scalar" functions apply an item-wise computation over all items
+      of their inputs.  Each item in the output only depends on the values
+      of the inputs at the same position.  Examples: addition, comparisons,
+      string predicates...
+
+    * "vector" functions apply a collection-wise computation, such that
+      each item in the output may depend on the values of several items
+      in each input.  Examples: dictionary encoding, sorting, extracting
+      unique values...
+
+    * "scalar_aggregate" functions reduce the dimensionality of the inputs by
+      applying a reduction function.  Examples: sum, min_max, mode...
+
+    * "hash_aggregate" functions apply a reduction function to an input
+      subdivided by grouping criteria.  They may not be directly called.
+      Examples: hash_sum, hash_min_max...
+
+    * "meta" functions dispatch to other functions.
+    """
+    @property
+    def arity(self) -> int:
+        """
+        The function arity.
+
+        If Ellipsis (i.e. `...`) is returned, the function takes a variable
+        number of arguments.
+        """
+    @property
+    def kind(
+        self,
+    ) -> Literal["scalar", "vector", "scalar_aggregate", "hash_aggregate", "meta"]:
+        """
+        The function kind.
+        """
+    @property
+    def name(self) -> str:
+        """
+        The function name.
+        """
+    @property
+    def num_kernels(self) -> int:
+        """
+        The number of kernels implementing this function.
+        """
+    def call(
+        self,
+        args: Iterable,
+        options: FunctionOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        length: int | None = None,
+    ) -> Any:
+        """
+        Call the function on the given arguments.
+
+        Parameters
+        ----------
+        args : iterable
+            The arguments to pass to the function.  Accepted types depend
+            on the specific function.
+        options : FunctionOptions, optional
+            Options instance for executing this function.  This should have
+            the right concrete options type.
+        memory_pool : pyarrow.MemoryPool, optional
+            If not passed, will allocate memory from the default memory pool.
+        length : int, optional
+            Batch size for execution, for nullary (no argument) functions. If
+            not passed, will be inferred from passed data.
+        """
+
+class FunctionOptions(lib._Weakrefable):
+    def serialize(self) -> lib.Buffer: ...
+    @classmethod
+    def deserialize(cls, buf: lib.Buffer) -> FunctionOptions: ...
+
+class FunctionRegistry(lib._Weakrefable):
+    def get_function(self, name: str) -> Function:
+        """
+        Look up a function by name in the registry.
+
+        Parameters
+        ----------
+        name : str
+            The name of the function to lookup
+        """
+
+    def list_functions(self) -> list[str]:
+        """
+        Return all function names in the registry.
+        """
+
+class HashAggregateFunction(Function): ...
+class HashAggregateKernel(Kernel): ...
+class ScalarAggregateFunction(Function): ...
+class ScalarAggregateKernel(Kernel): ...
+class ScalarFunction(Function): ...
+class ScalarKernel(Kernel): ...
+class VectorFunction(Function): ...
+class VectorKernel(Kernel): ...
+
+# ==================== _compute.pyx Option classes ====================
+class ArraySortOptions(FunctionOptions):
+    """
+    Options for the `array_sort_indices` function.
+
+    Parameters
+    ----------
+    order : str, default "ascending"
+        Which order to sort values in.
+        Accepted values are "ascending", "descending".
+    null_placement : str, default "at_end"
+        Where nulls in the input should be sorted.
+        Accepted values are "at_start", "at_end".
+    """
+    def __init__(
+        self,
+        order: _Order = "ascending",
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+class AssumeTimezoneOptions(FunctionOptions):
+    """
+    Options for the `assume_timezone` function.
+
+    Parameters
+    ----------
+    timezone : str
+        Timezone to assume for the input.
+    ambiguous : str, default "raise"
+        How to handle timestamps that are ambiguous in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    nonexistent : str, default "raise"
+        How to handle timestamps that don't exist in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    """
+
+    def __init__(
+        self,
+        timezone: str,
+        *,
+        ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+        nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    ) -> None: ...
+
+class CastOptions(FunctionOptions):
+    """
+    Options for the `cast` function.
+
+    Parameters
+    ----------
+    target_type : DataType, optional
+        The PyArrow type to cast to.
+    allow_int_overflow : bool, default False
+        Whether integer overflow is allowed when casting.
+    allow_time_truncate : bool, default False
+        Whether time precision truncation is allowed when casting.
+    allow_time_overflow : bool, default False
+        Whether date/time range overflow is allowed when casting.
+    allow_decimal_truncate : bool, default False
+        Whether decimal precision truncation is allowed when casting.
+    allow_float_truncate : bool, default False
+        Whether floating-point precision truncation is allowed when casting.
+    allow_invalid_utf8 : bool, default False
+        Whether producing invalid utf8 data is allowed when casting.
+    """
+
+    allow_int_overflow: bool
+    allow_time_truncate: bool
+    allow_time_overflow: bool
+    allow_decimal_truncate: bool
+    allow_float_truncate: bool
+    allow_invalid_utf8: bool
+
+    def __init__(
+        self,
+        target_type: lib.DataType | None = None,
+        *,
+        allow_int_overflow: bool | None = None,
+        allow_time_truncate: bool | None = None,
+        allow_time_overflow: bool | None = None,
+        allow_decimal_truncate: bool | None = None,
+        allow_float_truncate: bool | None = None,
+        allow_invalid_utf8: bool | None = None,
+    ) -> None: ...
+    @staticmethod
+    def safe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    @staticmethod
+    def unsafe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    def is_safe(self) -> bool: ...
+
+class CountOptions(FunctionOptions):
+    """
+    Options for the `count` function.
+
+    Parameters
+    ----------
+    mode : str, default "only_valid"
+        Which values to count in the input.
+        Accepted values are "only_valid", "only_null", "all".
+    """
+    def __init__(self, mode: Literal["only_valid", "only_null", "all"] = "only_valid") -> None: ...
+
+class CumulativeOptions(FunctionOptions):
+    """
+    Options for `cumulative_*` functions.
+
+    - cumulative_sum
+    - cumulative_sum_checked
+    - cumulative_prod
+    - cumulative_prod_checked
+    - cumulative_max
+    - cumulative_min
+
+    Parameters
+    ----------
+    start : Scalar, default None
+        Starting value for the cumulative operation. If none is given,
+        a default value depending on the operation and input type is used.
+    skip_nulls : bool, default False
+        When false, the first encountered null is propagated.
+    """
+    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
+
+class CumulativeSumOptions(FunctionOptions):
+    """
+    Options for `cumulative_sum` function.
+
+    Parameters
+    ----------
+    start : Scalar, default None
+        Starting value for sum computation
+    skip_nulls : bool, default False
+        When false, the first encountered null is propagated.
+    """
+    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
+
+class DayOfWeekOptions(FunctionOptions):
+    """
+    Options for the `day_of_week` function.
+
+    Parameters
+    ----------
+    count_from_zero : bool, default True
+        If True, number days from 0, otherwise from 1.
+    week_start : int, default 1
+        Which day does the week start with (Monday=1, Sunday=7).
+        How this value is numbered is unaffected by `count_from_zero`.
+    """
+
+    def __init__(self, *, count_from_zero: bool = True, week_start: int = 1) -> None: ...
+
+class DictionaryEncodeOptions(FunctionOptions):
+    """
+    Options for dictionary encoding.
+
+    Parameters
+    ----------
+    null_encoding : str, default "mask"
+        How to encode nulls in the input.
+        Accepted values are "mask" (null inputs emit a null in the indices
+        array), "encode" (null inputs emit a non-null index pointing to
+        a null value in the dictionary array).
+    """
+    def __init__(self, null_encoding: Literal["mask", "encode"] = "mask") -> None: ...
+
+class RunEndEncodeOptions(FunctionOptions):
+    """
+    Options for run-end encoding.
+
+    Parameters
+    ----------
+    run_end_type : DataType, default pyarrow.int32()
+        The data type of the run_ends array.
+
+        Accepted values are pyarrow.{int16(), int32(), int64()}.
+    """
+    # TODO: default is DataType(int32)
+    def __init__(self, run_end_type: lib.DataType = ...) -> None: ...
+
+class ElementWiseAggregateOptions(FunctionOptions):
+    """
+    Options for element-wise aggregate functions.
+
+    Parameters
+    ----------
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    """
+    def __init__(self, *, skip_nulls: bool = True) -> None: ...
+
+class ExtractRegexOptions(FunctionOptions):
+    """
+    Options for the `extract_regex` function.
+
+    Parameters
+    ----------
+    pattern : str
+        Regular expression with named capture fields.
+    """
+    def __init__(self, pattern: str) -> None: ...
+
+class ExtractRegexSpanOptions(FunctionOptions):
+    """
+    Options for the `extract_regex_span` function.
+
+    Parameters
+    ----------
+    pattern : str
+        Regular expression with named capture fields.
+    """
+    def __init__(self, pattern: str) -> None: ...
+
+class FilterOptions(FunctionOptions):
+    """
+    Options for selecting with a boolean filter.
+
+    Parameters
+    ----------
+    null_selection_behavior : str, default "drop"
+        How to handle nulls in the selection filter.
+        Accepted values are "drop", "emit_null".
+    """
+
+    def __init__(self, null_selection_behavior: Literal["drop", "emit_null"] = "drop") -> None: ...
+
+class IndexOptions(FunctionOptions):
+    """
+    Options for the `index` function.
+
+    Parameters
+    ----------
+    value : Scalar
+        The value to search for.
+    """
+    def __init__(self, value: lib.Scalar) -> None: ...
+
+class JoinOptions(FunctionOptions):
+    """
+    Options for the `binary_join_element_wise` function.
+
+    Parameters
+    ----------
+    null_handling : str, default "emit_null"
+        How to handle null values in the inputs.
+        Accepted values are "emit_null", "skip", "replace".
+    null_replacement : str, default ""
+        Replacement string to emit for null inputs if `null_handling`
+        is "replace".
+    """
+    @overload
+    def __init__(self, null_handling: Literal["emit_null", "skip"] = "emit_null") -> None: ...
+    @overload
+    def __init__(self, null_handling: Literal["replace"], null_replacement: str = "") -> None: ...
+
+class ListSliceOptions(FunctionOptions):
+    """
+    Options for list array slicing.
+
+    Parameters
+    ----------
+    start : int
+        Index to start slicing inner list elements (inclusive).
+    stop : Optional[int], default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end. (NotImplemented)
+    step : int, default 1
+        Slice step.
+    return_fixed_size_list : Optional[bool], default None
+        Whether to return a FixedSizeListArray. If true _and_ stop is after
+        a list element's length, nulls will be appended to create the
+        requested slice size. The default of `None` will return the same
+        type which was passed in.
+    """
+    def __init__(
+        self,
+        start: int,
+        stop: int | None = None,
+        step: int = 1,
+        return_fixed_size_list: bool | None = None,
+    ) -> None: ...
+
+class ListFlattenOptions(FunctionOptions):
+    """
+    Options for `list_flatten` function
+
+    Parameters
+    ----------
+    recursive : bool, default False
+        When True, the list array is flattened recursively until an array
+        of non-list values is formed.
+    """
+    def __init__(self, recursive: bool = False) -> None: ...
+
+class MakeStructOptions(FunctionOptions):
+    """
+    Options for the `make_struct` function.
+
+    Parameters
+    ----------
+    field_names : sequence of str
+        Names of the struct fields to create.
+    field_nullability : sequence of bool, optional
+        Nullability information for each struct field.
+        If omitted, all fields are nullable.
+    field_metadata : sequence of KeyValueMetadata, optional
+        Metadata for each struct field.
+    """
+    def __init__(
+        self,
+        field_names: Sequence[str] = (),
+        *,
+        field_nullability: Sequence[bool] | None = None,
+        field_metadata: Sequence[lib.KeyValueMetadata] | None = None,
+    ) -> None: ...
+
+class MapLookupOptions(FunctionOptions):
+    """
+    Options for the `map_lookup` function.
+
+    Parameters
+    ----------
+    query_key : Scalar or Object can be converted to Scalar
+        The key to search for.
+    occurrence : str
+        The occurrence(s) to return from the Map
+        Accepted values are "first", "last", or "all".
+    """
+    # TODO: query_key: Scalar or Object can be converted to Scalar
+    def __init__(
+        self, query_key: lib.Scalar, occurrence: Literal["first", "last", "all"]
+    ) -> None: ...
+
+class MatchSubstringOptions(FunctionOptions):
+    """
+    Options for looking for a substring.
+
+    Parameters
+    ----------
+    pattern : str
+        Substring pattern to look for inside input values.
+    ignore_case : bool, default False
+        Whether to perform a case-insensitive match.
+    """
+
+    def __init__(self, pattern: str, *, ignore_case: bool = False) -> None: ...
+
+class ModeOptions(FunctionOptions):
+    """
+    Options for the `mode` function.
+
+    Parameters
+    ----------
+    n : int, default 1
+        Number of distinct most-common values to return.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(self, n: int = 1, *, skip_nulls: bool = True, min_count: int = 0) -> None: ...
+
+class NullOptions(FunctionOptions):
+    """
+    Options for the `is_null` function.
+
+    Parameters
+    ----------
+    nan_is_null : bool, default False
+        Whether floating-point NaN values are considered null.
+    """
+    def __init__(self, *, nan_is_null: bool = False) -> None: ...
+
+class PadOptions(FunctionOptions):
+    """
+    Options for padding strings.
+
+    Parameters
+    ----------
+    width : int
+        Desired string length.
+    padding : str, default " "
+        What to pad the string with. Should be one byte or codepoint.
+    lean_left_on_odd_padding : bool, default True
+        What to do if there is an odd number of padding characters (in case
+        of centered padding). Defaults to aligning on the left (i.e. adding
+        the extra padding character on the right).
+    """
+    def __init__(
+        self, width: int, padding: str = " ", lean_left_on_odd_padding: bool = True
+    ) -> None: ...
+
+class PairwiseOptions(FunctionOptions):
+    """
+    Options for `pairwise` functions.
+
+    Parameters
+    ----------
+    period : int, default 1
+        Period for applying the period function.
+    """
+    def __init__(self, period: int = 1) -> None: ...
+
+class PartitionNthOptions(FunctionOptions):
+    """
+    Options for the `partition_nth_indices` function.
+
+    Parameters
+    ----------
+    pivot : int
+        Index into the equivalent sorted array of the pivot element.
+    null_placement : str, default "at_end"
+        Where nulls in the input should be partitioned.
+        Accepted values are "at_start", "at_end".
+    """
+    def __init__(self, pivot: int, *, null_placement: _Placement = "at_end") -> None: ...
+
+class WinsorizeOptions(FunctionOptions):
+    """
+    Options for the `winsorize` function.
+
+    Parameters
+    ----------
+    lower_limit : float, between 0 and 1
+        The quantile below which all values are replaced with the quantile's value.
+    upper_limit : float, between 0 and 1
+        The quantile above which all values are replaced with the quantile's value.
+    """
+    def __init__(self, lower_limit: float, upper_limit: float) -> None: ...
+
+class QuantileOptions(FunctionOptions):
+    """
+    Options for the `quantile` function.
+
+    Parameters
+    ----------
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to compute. All values must be in
+        [0, 1].
+    interpolation : str, default "linear"
+        How to break ties between competing data points for a given quantile.
+        Accepted values are:
+
+        - "linear": compute an interpolation
+        - "lower": always use the smallest of the two data points
+        - "higher": always use the largest of the two data points
+        - "nearest": select the data point that is closest to the quantile
+        - "midpoint": compute the (unweighted) mean of the two data points
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(
+        self,
+        q: float | Sequence[float],
+        *,
+        interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+class RandomOptions(FunctionOptions):
+    """
+    Options for random generation.
+
+    Parameters
+    ----------
+    initializer : int or str
+        How to initialize the underlying random generator.
+        If an integer is given, it is used as a seed.
+        If "system" is given, the random generator is initialized with
+        a system-specific source of (hopefully true) randomness.
+        Other values are invalid.
+    """
+    def __init__(self, *, initializer: int | Literal["system"] = "system") -> None: ...
+
+class RankOptions(FunctionOptions):
+    """
+    Options for the `rank` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    tiebreaker : str, default "first"
+        Configure how ties between equal values are handled.
+        Accepted values are:
+
+        - "min": Ties get the smallest possible rank in sorted order.
+        - "max": Ties get the largest possible rank in sorted order.
+        - "first": Ranks are assigned in order of when ties appear in the
+                   input. This ensures the ranks are a stable permutation
+                   of the input.
+        - "dense": The ranks span a dense [1, M] interval where M is the
+                   number of distinct values in the input.
+    """
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+        tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    ) -> None: ...
+
+class RankQuantileOptions(FunctionOptions):
+    """
+    Options for the `rank_quantile` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    """
+
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+class PivotWiderOptions(FunctionOptions):
+    """
+    Options for the `pivot_wider` function.
+
+    Parameters
+    ----------
+    key_names : sequence of str
+        The pivot key names expected in the pivot key column.
+        For each entry in `key_names`, a column with the same name is emitted
+        in the struct output.
+    unexpected_key_behavior : str, default "ignore"
+        The behavior when pivot keys not in `key_names` are encountered.
+        Accepted values are "ignore", "raise".
+        If "ignore", unexpected keys are silently ignored.
+        If "raise", unexpected keys raise a KeyError.
+    """
+    def __init__(
+        self,
+        key_names: Sequence[str],
+        *,
+        unexpected_key_behavior: Literal["ignore", "raise"] = "ignore",
+    ) -> None: ...
+
+class ReplaceSliceOptions(FunctionOptions):
+    """
+    Options for replacing slices.
+
+    Parameters
+    ----------
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int
+        Index to stop slicing at (exclusive).
+    replacement : str
+        What to replace the slice with.
+    """
+    def __init__(self, start: int, stop: int, replacement: str) -> None: ...
+
+class ReplaceSubstringOptions(FunctionOptions):
+    """
+    Options for replacing matched substrings.
+
+    Parameters
+    ----------
+    pattern : str
+        Substring pattern to look for inside input values.
+    replacement : str
+        What to replace the pattern with.
+    max_replacements : int or None, default None
+        The maximum number of strings to replace in each
+        input value (unlimited if None).
+    """
+    def __init__(
+        self, pattern: str, replacement: str, *, max_replacements: int | None = None
+    ) -> None: ...
+
+_RoundMode: TypeAlias = Literal[
+    "down",
+    "up",
+    "towards_zero",
+    "towards_infinity",
+    "half_down",
+    "half_up",
+    "half_towards_zero",
+    "half_towards_infinity",
+    "half_to_even",
+    "half_to_odd",
+]
+
+class RoundBinaryOptions(FunctionOptions):
+    """
+    Options for rounding numbers when ndigits is provided by a second array
+
+    Parameters
+    ----------
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    """
+    def __init__(
+        self,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+class RoundOptions(FunctionOptions):
+    """
+    Options for rounding numbers.
+
+    Parameters
+    ----------
+    ndigits : int, default 0
+        Number of fractional digits to round to.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    """
+    def __init__(
+        self,
+        ndigits: int = 0,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+_DateTimeUint: TypeAlias = Literal[
+    "year",
+    "quarter",
+    "month",
+    "week",
+    "day",
+    "hour",
+    "minute",
+    "second",
+    "millisecond",
+    "microsecond",
+    "nanosecond",
+]
+
+class RoundTemporalOptions(FunctionOptions):
+    """
+    Options for rounding temporal values.
+
+    Parameters
+    ----------
+    multiple : int, default 1
+        Number of units to round to.
+    unit : str, default "day"
+        The unit in which `multiple` is expressed.
+        Accepted values are "year", "quarter", "month", "week", "day",
+        "hour", "minute", "second", "millisecond", "microsecond",
+        "nanosecond".
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    ceil_is_strictly_greater : bool, default False
+        If True, ceil returns a rounded value that is strictly greater than the
+        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+        if set to False.
+        This applies to the ceil_temporal function only.
+    calendar_based_origin : bool, default False
+        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+        rounding origin will be beginning of one less precise calendar unit.
+        E.g.: rounding to hours will use beginning of day as origin.
+
+        By default time is rounded to a multiple of units since
+        1970-01-01T00:00:00. By setting calendar_based_origin to true,
+        time will be rounded to number of units since the last greater
+        calendar unit.
+        For example: rounding to multiple of days since the beginning of the
+        month or to hours since the beginning of the day.
+        Exceptions: week and quarter are not used as greater units,
+        therefore days will be rounded to the beginning of the month not
+        week. Greater unit of week is a year.
+        Note that ceiling and rounding might change sorting order of an array
+        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+        order of an already ordered array.
+    """
+    def __init__(
+        self,
+        multiple: int = 1,
+        unit: _DateTimeUint = "day",
+        *,
+        week_starts_monday: bool = True,
+        ceil_is_strictly_greater: bool = False,
+        calendar_based_origin: bool = False,
+    ) -> None: ...
+
+class RoundToMultipleOptions(FunctionOptions):
+    """
+    Options for rounding numbers to a multiple.
+
+    Parameters
+    ----------
+    multiple : numeric scalar, default 1.0
+        Multiple to round to. Should be a scalar of a type compatible
+        with the argument to be rounded.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    """
+    def __init__(self, multiple: float = 1.0, round_mode: _RoundMode = "half_to_even") -> None: ...
+
+class ScalarAggregateOptions(FunctionOptions):
+    """
+    Options for scalar aggregations.
+
+    Parameters
+    ----------
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(self, *, skip_nulls: bool = True, min_count: int = 1) -> None: ...
+
+class SelectKOptions(FunctionOptions):
+    """
+    Options for top/bottom k-selection.
+
+    Parameters
+    ----------
+    k : int
+        Number of leading values to select in sorted order
+        (i.e. the largest values if sort order is "descending",
+        the smallest otherwise).
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    """
+
+    def __init__(self, k: int, sort_keys: Sequence[tuple[str, _Order]]) -> None: ...
+
+class SetLookupOptions(FunctionOptions):
+    """
+    Options for the `is_in` and `index_in` functions.
+
+    Parameters
+    ----------
+    value_set : Array
+        Set of values to look for in the input.
+    skip_nulls : bool, default False
+        If False, nulls in the input are matched in the value_set just
+        like regular values.
+        If True, nulls in the input always fail matching.
+    """
+    def __init__(self, value_set: lib.Array, *, skip_nulls: bool = True) -> None: ...
+
+class SliceOptions(FunctionOptions):
+    """
+    Options for slicing.
+
+    Parameters
+    ----------
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int or None, default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end.
+    step : int, default 1
+        Slice step.
+    """
+
+    def __init__(self, start: int, stop: int | None = None, step: int = 1) -> None: ...
+
+class SortOptions(FunctionOptions):
+    """
+    Options for the `sort_indices` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted, only applying to
+        columns/fields mentioned in `sort_keys`.
+        Accepted values are "at_start", "at_end".
+    """
+    def __init__(
+        self, sort_keys: Sequence[tuple[str, _Order]], *, null_placement: _Placement = "at_end"
+    ) -> None: ...
+
+class SplitOptions(FunctionOptions):
+    """
+    Options for splitting on whitespace.
+
+    Parameters
+    ----------
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    """
+
+    def __init__(self, *, max_splits: int | None = None, reverse: bool = False) -> None: ...
+
+class SplitPatternOptions(FunctionOptions):
+    """
+    Options for splitting on a string pattern.
+
+    Parameters
+    ----------
+    pattern : str
+        String pattern to split on.
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    """
+    def __init__(
+        self, pattern: str, *, max_splits: int | None = None, reverse: bool = False
+    ) -> None: ...
+
+class StrftimeOptions(FunctionOptions):
+    """
+    Options for the `strftime` function.
+
+    Parameters
+    ----------
+    format : str, default "%Y-%m-%dT%H:%M:%S"
+        Pattern for formatting input values.
+    locale : str, default "C"
+        Locale to use for locale-specific format specifiers.
+    """
+    def __init__(self, format: str = "%Y-%m-%dT%H:%M:%S", locale: str = "C") -> None: ...
+
+class StrptimeOptions(FunctionOptions):
+    """
+    Options for the `strptime` function.
+
+    Parameters
+    ----------
+    format : str
+        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
+        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
+        There are differences in behavior, for example how the "%y" placeholder
+        handles years with less than four digits.
+    unit : str
+        Timestamp unit of the output.
+        Accepted values are "s", "ms", "us", "ns".
+    error_is_null : boolean, default False
+        Return null on parsing errors if true or raise if false.
+    """
+    def __init__(
+        self, format: str, unit: Literal["s", "ms", "us", "ns"], error_is_null: bool = False
+    ) -> None: ...
+
+class StructFieldOptions(FunctionOptions):
+    """
+    Options for the `struct_field` function.
+
+    Parameters
+    ----------
+    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
+        List of indices for chained field lookup, for example `[4, 1]`
+        will look up the second nested field in the fifth outer field.
+    """
+    def __init__(
+        self, indices: list[str] | list[bytes] | list[int] | Expression | bytes | str | int
+    ) -> None: ...
+
+class TakeOptions(FunctionOptions):
+    """
+    Options for the `take` and `array_take` functions.
+
+    Parameters
+    ----------
+    boundscheck : boolean, default True
+        Whether to check indices are within bounds. If False and an
+        index is out of bounds, behavior is undefined (the process
+        may crash).
+    """
+    def __init__(self, boundscheck: bool = True) -> None: ...
+
+class TDigestOptions(FunctionOptions):
+    """
+    Options for the `tdigest` function.
+
+    Parameters
+    ----------
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to approximate. All values must be
+        in [0, 1].
+    delta : int, default 100
+        Compression parameter for the T-digest algorithm.
+    buffer_size : int, default 500
+        Buffer size for the T-digest algorithm.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(
+        self,
+        q: float | Sequence[float] = 0.5,
+        *,
+        delta: int = 100,
+        buffer_size: int = 500,
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+class TrimOptions(FunctionOptions):
+    """
+    Options for trimming characters from strings.
+
+    Parameters
+    ----------
+    characters : str
+        Individual characters to be trimmed from the string.
+    """
+    def __init__(self, characters: str) -> None: ...
+
+class Utf8NormalizeOptions(FunctionOptions):
+    """
+    Options for the `utf8_normalize` function.
+
+    Parameters
+    ----------
+    form : str
+        Unicode normalization form.
+        Accepted values are "NFC", "NFKC", "NFD", NFKD".
+    """
+
+    def __init__(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: ...
+
+class VarianceOptions(FunctionOptions):
+    """
+    Options for the `variance` and `stddev` functions.
+
+    Parameters
+    ----------
+    ddof : int, default 0
+        Number of degrees of freedom.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(self, *, ddof: int = 0, skip_nulls: bool = True, min_count: int = 0) -> None: ...
+
+class SkewOptions(FunctionOptions):
+    """
+    Options for the `skew` and `kurtosis` functions.
+
+    Parameters
+    ----------
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    biased : bool, default True
+        Whether the calculated value is biased.
+        If False, the value computed includes a correction factor to reduce bias.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(
+        self, *, skip_nulls: bool = True, biased: bool = True, min_count: int = 0
+    ) -> None: ...
+
+class WeekOptions(FunctionOptions):
+    """
+    Options for the `week` function.
+
+    Parameters
+    ----------
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    count_from_zero : bool, default False
+        If True, dates at the start of a year that fall into the last week
+        of the previous year emit 0.
+        If False, they emit 52 or 53 (the week number of the last week
+        of the previous year).
+    first_week_is_fully_in_year : bool, default False
+        If True, week number 0 is fully in January.
+        If False, a week that begins on December 29, 30 or 31 is considered
+        to be week number 0 of the following year.
+    """
+    def __init__(
+        self,
+        *,
+        week_starts_monday: bool = True,
+        count_from_zero: bool = False,
+        first_week_is_fully_in_year: bool = False,
+    ) -> None: ...
+
+# ==================== _compute.pyx Functions ====================
+
+def call_function(
+    name: str,
+    args: list,
+    options: FunctionOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+    length: int | None = None,
+) -> Any:
+    """
+    Call a named function.
+
+    The function is looked up in the global registry
+    (as returned by `function_registry()`).
+
+    Parameters
+    ----------
+    name : str
+        The name of the function to call.
+    args : list
+        The arguments to the function.
+    options : optional
+        options provided to the function.
+    memory_pool : MemoryPool, optional
+        memory pool to use for allocations during function execution.
+    length : int, optional
+        Batch size for execution, for nullary (no argument) functions. If not
+        passed, inferred from data.
+    """
+
+def function_registry() -> FunctionRegistry: ...
+def get_function(name: str) -> Function:
+    """
+    Get a function by name.
+
+    The function is looked up in the global registry
+    (as returned by `function_registry()`).
+
+    Parameters
+    ----------
+    name : str
+        The name of the function to lookup
+    """
+
+def list_functions() -> list[str]:
+    """
+    Return all function names in the global registry.
+    """
+
+# ==================== _compute.pyx Udf ====================
+
+def call_tabular_function(
+    function_name: str, args: Iterable | None = None, func_registry: FunctionRegistry | None = None
+) -> lib.RecordBatchReader:
+    """
+    Get a record batch iterator from a tabular function.
+
+    Parameters
+    ----------
+    function_name : str
+        Name of the function.
+    args : iterable
+        The arguments to pass to the function.  Accepted types depend
+        on the specific function.  Currently, only an empty args is supported.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+    """
+
+class _FunctionDoc(TypedDict):
+    summary: str
+    description: str
+
+def register_scalar_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined scalar function.
+
+    This API is EXPERIMENTAL.
+
+    A scalar function is a function that executes elementwise
+    operations on arrays or scalars, i.e. a scalar function must
+    be computed row-by-row with no state where each output row
+    is computed only from its corresponding input row.
+    In other words, all argument arrays have the same length,
+    and the output array is of the same length as the arguments.
+    Scalar functions are the only functions allowed in query engine
+    expressions.
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The first argument is the context argument of type
+        UdfContext.
+        Then, it must take arguments equal to the number of
+        in_types defined. It must return an Array or Scalar
+        matching the out_type. It must return a Scalar if
+        all arguments are scalar, else it must return an Array.
+
+        To define a varargs function, pass a callable that takes
+        *args. The last in_type will be the type of all varargs
+        arguments.
+    function_name : str
+        Name of the function. There should only be one function
+        registered with this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        A dictionary mapping function argument names to
+        their respective DataType.
+        The argument names will be used to generate
+        documentation for the function. The number of
+        arguments specified here determines the function
+        arity.
+    out_type : DataType
+        Output type of the function.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>>
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "simple udf"
+    >>> func_doc["description"] = "add a constant to a scalar"
+    >>>
+    >>> def add_constant(ctx, array):
+    ...     return pc.add(array, 1, memory_pool=ctx.memory_pool)
+    >>>
+    >>> func_name = "py_add_func"
+    >>> in_types = {"array": pa.int64()}
+    >>> out_type = pa.int64()
+    >>> pc.register_scalar_function(add_constant, func_name, func_doc, in_types, out_type)
+    >>>
+    >>> func = pc.get_function(func_name)
+    >>> func.name
+    'py_add_func'
+    >>> answer = pc.call_function(func_name, [pa.array([20])])
+    >>> answer
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      21
+    ]
+    """
+
+def register_tabular_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined tabular function.
+
+    This API is EXPERIMENTAL.
+
+    A tabular function is one accepting a context argument of type
+    UdfContext and returning a generator of struct arrays.
+    The in_types argument must be empty and the out_type argument
+    specifies a schema. Each struct array must have field types
+    corresponding to the schema.
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The only argument is the context argument of type
+        UdfContext. It must return a callable that
+        returns on each invocation a StructArray matching
+        the out_type, where an empty array indicates end.
+    function_name : str
+        Name of the function. There should only be one function
+        registered with this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        Must be an empty dictionary (reserved for future use).
+    out_type : Union[Schema, DataType]
+        Schema of the function's output, or a corresponding flat struct type.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+    """
+
+def register_aggregate_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined non-decomposable aggregate function.
+
+    This API is EXPERIMENTAL.
+
+    A non-decomposable aggregation function is a function that executes
+    aggregate operations on the whole data that it is aggregating.
+    In other words, non-decomposable aggregate function cannot be
+    split into consume/merge/finalize steps.
+
+    This is often used with ordered or segmented aggregation where groups
+    can be emit before accumulating all of the input data.
+
+    Note that currently the size of any input column cannot exceed 2 GB
+    for a single segment (all groups combined).
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The first argument is the context argument of type
+        UdfContext.
+        Then, it must take arguments equal to the number of
+        in_types defined. It must return a Scalar matching the
+        out_type.
+        To define a varargs function, pass a callable that takes
+        *args. The in_type needs to match in type of inputs when
+        the function gets called.
+    function_name : str
+        Name of the function. This name must be unique, i.e.,
+        there should only be one function registered with
+        this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        A dictionary mapping function argument names to
+        their respective DataType.
+        The argument names will be used to generate
+        documentation for the function. The number of
+        arguments specified here determines the function
+        arity.
+    out_type : DataType
+        Output type of the function.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>>
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "simple median udf"
+    >>> func_doc["description"] = "compute median"
+    >>>
+    >>> def compute_median(ctx, array):
+    ...     return pa.scalar(np.median(array))
+    >>>
+    >>> func_name = "py_compute_median"
+    >>> in_types = {"array": pa.int64()}
+    >>> out_type = pa.float64()
+    >>> pc.register_aggregate_function(compute_median, func_name, func_doc, in_types, out_type)
+    >>>
+    >>> func = pc.get_function(func_name)
+    >>> func.name
+    'py_compute_median'
+    >>> answer = pc.call_function(func_name, [pa.array([20, 40])])
+    >>> answer
+    <pyarrow.DoubleScalar: 30.0>
+    >>> table = pa.table([pa.array([1, 1, 2, 2]), pa.array([10, 20, 30, 40])], names=["k", "v"])
+    >>> result = table.group_by("k").aggregate([("v", "py_compute_median")])
+    >>> result
+    pyarrow.Table
+    k: int64
+    v_py_compute_median: double
+    ----
+    k: [[1,2]]
+    v_py_compute_median: [[15,35]]
+    """
+
+def register_vector_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined vector function.
+
+    This API is EXPERIMENTAL.
+
+    A vector function is a function that executes vector
+    operations on arrays. Vector function is often used
+    when compute doesn't fit other more specific types of
+    functions (e.g., scalar and aggregate).
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The first argument is the context argument of type
+        UdfContext.
+        Then, it must take arguments equal to the number of
+        in_types defined. It must return an Array or Scalar
+        matching the out_type. It must return a Scalar if
+        all arguments are scalar, else it must return an Array.
+
+        To define a varargs function, pass a callable that takes
+        *args. The last in_type will be the type of all varargs
+        arguments.
+    function_name : str
+        Name of the function. There should only be one function
+        registered with this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        A dictionary mapping function argument names to
+        their respective DataType.
+        The argument names will be used to generate
+        documentation for the function. The number of
+        arguments specified here determines the function
+        arity.
+    out_type : DataType
+        Output type of the function.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>>
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "percent rank"
+    >>> func_doc["description"] = "compute percent rank"
+    >>>
+    >>> def list_flatten_udf(ctx, x):
+    ...     return pc.list_flatten(x)
+    >>>
+    >>> func_name = "list_flatten_udf"
+    >>> in_types = {"array": pa.list_(pa.int64())}
+    >>> out_type = pa.int64()
+    >>> pc.register_vector_function(list_flatten_udf, func_name, func_doc, in_types, out_type)
+    >>>
+    >>> answer = pc.call_function(func_name, [pa.array([[1, 2], [3, 4]])])
+    >>> answer
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      1,
+      2,
+      3,
+      4
+    ]
+    """
+
+class UdfContext:
+    """
+    Per-invocation function context/state.
+
+    This object will always be the first argument to a user-defined
+    function. It should not be used outside of a call to the function.
+    """
+
+    @property
+    def batch_length(self) -> int:
+        """
+        The common length of all input arguments (int).
+
+        In the case that all arguments are scalars, this value
+        is used to pass the "actual length" of the arguments,
+        e.g. because the scalar values are encoding a column
+        with a constant value.
+        """
+    @property
+    def memory_pool(self) -> lib.MemoryPool:
+        """
+        A memory pool for allocations (:class:`MemoryPool`).
+
+        This is the memory pool supplied by the user when they invoked
+        the function and it should be used in any calls to arrow that the
+        UDF makes if that call accepts a memory_pool.
+        """
+
+# ==================== _compute.pyx Expression ====================
+class Expression(lib._Weakrefable):
+    """
+    A logical expression to be evaluated against some input.
+
+    To create an expression:
+
+    - Use the factory function ``pyarrow.compute.scalar()`` to create a
+      scalar (not necessary when combined, see example below).
+    - Use the factory function ``pyarrow.compute.field()`` to reference
+      a field (column in table).
+    - Compare fields and scalars with ``<``, ``<=``, ``==``, ``>=``, ``>``.
+    - Combine expressions using python operators ``&`` (logical and),
+      ``|`` (logical or) and ``~`` (logical not).
+      Note: python keywords ``and``, ``or`` and ``not`` cannot be used
+      to combine expressions.
+    - Create expression predicates using Expression methods such as
+      ``pyarrow.compute.Expression.isin()``.
+
+    Examples
+    --------
+
+    >>> import pyarrow.compute as pc
+    >>> (pc.field("a") < pc.scalar(3)) | (pc.field("b") > 7)
+    <pyarrow.compute.Expression ((a < 3) or (b > 7))>
+    >>> pc.field("a") != 3
+    <pyarrow.compute.Expression (a != 3)>
+    >>> pc.field("a").isin([1, 2, 3])
+    <pyarrow.compute.Expression is_in(a, {value_set=int64:[
+      1,
+      2,
+      3
+    ], null_matching_behavior=MATCH})>
+    """
+
+    @staticmethod
+    def from_substrait(buffer: bytes | lib.Buffer) -> Expression:
+        """
+        Deserialize an expression from Substrait
+
+        The serialized message must be an ExtendedExpression message that has
+        only a single expression.  The name of the expression and the schema
+        the expression was bound to will be ignored.  Use
+        pyarrow.substrait.deserialize_expressions if this information is needed
+        or if the message might contain multiple expressions.
+
+        Parameters
+        ----------
+        message : bytes or Buffer or a protobuf Message
+            The Substrait message to deserialize
+
+        Returns
+        -------
+        Expression
+            The deserialized expression
+        """
+    def to_substrait(self, schema: lib.Schema, allow_arrow_extensions: bool = False) -> lib.Buffer:
+        """
+        Serialize the expression using Substrait
+
+        The expression will be serialized as an ExtendedExpression message that has a
+        single expression named "expression"
+
+        Parameters
+        ----------
+        schema : Schema
+            The input schema the expression will be bound to
+        allow_arrow_extensions : bool, default False
+            If False then only functions that are part of the core Substrait function
+            definitions will be allowed.  Set this to True to allow pyarrow-specific functions
+            but the result may not be accepted by other compute libraries.
+
+        Returns
+        -------
+        Buffer
+            A buffer containing the serialized Protobuf plan.
+        """
+    def __invert__(self) -> Expression: ...
+    def __and__(self, other) -> Expression: ...
+    def __or__(self, other) -> Expression: ...
+    def __add__(self, other) -> Expression: ...
+    def __mul__(self, other) -> Expression: ...
+    def __sub__(self, other) -> Expression: ...
+    def __eq__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __ne__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __gt__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __lt__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __ge__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __le__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __truediv__(self, other) -> Expression: ...
+    def is_valid(self) -> bool:
+        """
+        Check whether the expression is not-null (valid).
+
+        This creates a new expression equivalent to calling the
+        `is_valid` compute function on this expression.
+
+        Returns
+        -------
+        is_valid : Expression
+        """
+    def is_null(self, nan_is_null: bool = False) -> Expression:
+        """
+        Check whether the expression is null.
+
+        This creates a new expression equivalent to calling the
+        `is_null` compute function on this expression.
+
+        Parameters
+        ----------
+        nan_is_null : boolean, default False
+            Whether floating-point NaNs are considered null.
+
+        Returns
+        -------
+        is_null : Expression
+        """
+    def is_nan(self) -> Expression:
+        """
+        Check whether the expression is NaN.
+
+        This creates a new expression equivalent to calling the
+        `is_nan` compute function on this expression.
+
+        Returns
+        -------
+        is_nan : Expression
+        """
+    def cast(
+        self, type: lib.DataType, safe: bool = True, options: CastOptions | None = None
+    ) -> Expression:
+        """
+        Explicitly set or change the expression's data type.
+
+        This creates a new expression equivalent to calling the
+        `cast` compute function on this expression.
+
+        Parameters
+        ----------
+        type : DataType, default None
+            Type to cast array to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        cast : Expression
+        """
+    def isin(self, values: lib.Array | Iterable) -> Expression:
+        """
+        Check whether the expression is contained in values.
+
+        This creates a new expression equivalent to calling the
+        `is_in` compute function on this expression.
+
+        Parameters
+        ----------
+        values : Array or iterable
+            The values to check for.
+
+        Returns
+        -------
+        isin : Expression
+            A new expression that, when evaluated, checks whether
+            this expression's value is contained in `values`.
+        """
+
+# ==================== _compute.py ====================
diff --git a/python/pyarrow/_csv.pyi b/python/pyarrow/_csv.pyi
new file mode 100644
index 00000000000..2f49f8c9a6c
--- /dev/null
+++ b/python/pyarrow/_csv.pyi
@@ -0,0 +1,641 @@
+from dataclasses import dataclass, field
+from typing import IO, Any, Callable, Literal
+
+from _typeshed import StrPath
+
+from . import lib
+
+@dataclass(kw_only=True)
+class ReadOptions(lib._Weakrefable):
+    """
+    Options for reading CSV files.
+
+    Parameters
+    ----------
+    use_threads : bool, optional (default True)
+        Whether to use multiple threads to accelerate reading
+    block_size : int, optional
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual record batches or table chunks.
+        Minimum valid value for block size is 1
+    skip_rows : int, optional (default 0)
+        The number of rows to skip before the column names (if any)
+        and the CSV data.
+    skip_rows_after_names : int, optional (default 0)
+        The number of rows to skip after the column names.
+        This number can be larger than the number of rows in one
+        block, and empty rows are counted.
+        The order of application is as follows:
+        - `skip_rows` is applied (if non-zero);
+        - column names are read (unless `column_names` is set);
+        - `skip_rows_after_names` is applied (if non-zero).
+    column_names : list, optional
+        The column names of the target table.  If empty, fall back on
+        `autogenerate_column_names`.
+    autogenerate_column_names : bool, optional (default False)
+        Whether to autogenerate column names if `column_names` is empty.
+        If true, column names will be of the form "f0", "f1"...
+        If false, column names will be read from the first CSV row
+        after `skip_rows`.
+    encoding : str, optional (default 'utf8')
+        The character encoding of the CSV data.  Columns that cannot
+        decode using this encoding can still be read as Binary.
+
+    Examples
+    --------
+
+    Defining an example data:
+
+    >>> import io
+    >>> s = "1,2,3\\nFlamingo,2,2022-03-01\\nHorse,4,2022-03-02\\nBrittle stars,5,2022-03-03\\nCentipede,100,2022-03-04"
+    >>> print(s)
+    1,2,3
+    Flamingo,2,2022-03-01
+    Horse,4,2022-03-02
+    Brittle stars,5,2022-03-03
+    Centipede,100,2022-03-04
+
+    Ignore the first numbered row and substitute it with defined
+    or autogenerated column names:
+
+    >>> from pyarrow import csv
+    >>> read_options = csv.ReadOptions(column_names=["animals", "n_legs", "entry"], skip_rows=1)
+    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: date32[day]
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    n_legs: [[2,4,5,100]]
+    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+
+    >>> read_options = csv.ReadOptions(autogenerate_column_names=True, skip_rows=1)
+    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
+    pyarrow.Table
+    f0: string
+    f1: int64
+    f2: date32[day]
+    ----
+    f0: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    f1: [[2,4,5,100]]
+    f2: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+
+    Remove the first 2 rows of the data:
+
+    >>> read_options = csv.ReadOptions(skip_rows_after_names=2)
+    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
+    pyarrow.Table
+    1: string
+    2: int64
+    3: date32[day]
+    ----
+    1: [["Brittle stars","Centipede"]]
+    2: [[5,100]]
+    3: [[2022-03-03,2022-03-04]]
+    """
+
+    use_threads: bool = field(default=True, kw_only=False)
+    block_size: int | None = None
+    skip_rows: int = 0
+    skip_rows_after_names: int = 0
+    column_names: list[str] | None = None
+    autogenerate_column_names: bool = False
+    encoding: str = "utf8"
+
+    def validate(self) -> None: ...
+
+@dataclass(kw_only=True)
+class ParseOptions(lib._Weakrefable):
+    """
+    Options for parsing CSV files.
+
+    Parameters
+    ----------
+    delimiter : 1-character string, optional (default ',')
+        The character delimiting individual cells in the CSV data.
+    quote_char : 1-character string or False, optional (default '"')
+        The character used optionally for quoting CSV values
+        (False if quoting is not allowed).
+    double_quote : bool, optional (default True)
+        Whether two quotes in a quoted CSV value denote a single quote
+        in the data.
+    escape_char : 1-character string or False, optional (default False)
+        The character used optionally for escaping special characters
+        (False if escaping is not allowed).
+    newlines_in_values : bool, optional (default False)
+        Whether newline characters are allowed in CSV values.
+        Setting this to True reduces the performance of multi-threaded
+        CSV reading.
+    ignore_empty_lines : bool, optional (default True)
+        Whether empty lines are ignored in CSV input.
+        If False, an empty line is interpreted as containing a single empty
+        value (assuming a one-column CSV file).
+    invalid_row_handler : callable, optional (default None)
+        If not None, this object is called for each CSV row that fails
+        parsing (because of a mismatching number of columns).
+        It should accept a single InvalidRow argument and return either
+        "skip" or "error" depending on the desired outcome.
+
+    Examples
+    --------
+
+    Defining an example file from bytes object:
+
+    >>> import io
+    >>> s = (
+    ...     "animals;n_legs;entry\\n"
+    ...     "Flamingo;2;2022-03-01\\n"
+    ...     "# Comment here:\\n"
+    ...     "Horse;4;2022-03-02\\n"
+    ...     "Brittle stars;5;2022-03-03\\n"
+    ...     "Centipede;100;2022-03-04"
+    ... )
+    >>> print(s)
+    animals;n_legs;entry
+    Flamingo;2;2022-03-01
+    # Comment here:
+    Horse;4;2022-03-02
+    Brittle stars;5;2022-03-03
+    Centipede;100;2022-03-04
+    >>> source = io.BytesIO(s.encode())
+
+    Read the data from a file skipping rows with comments
+    and defining the delimiter:
+
+    >>> from pyarrow import csv
+    >>> def skip_comment(row):
+    ...     if row.text.startswith("# "):
+    ...         return "skip"
+    ...     else:
+    ...         return "error"
+    >>> parse_options = csv.ParseOptions(delimiter=";", invalid_row_handler=skip_comment)
+    >>> csv.read_csv(source, parse_options=parse_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: date32[day]
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    n_legs: [[2,4,5,100]]
+    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+    """
+
+    delimiter: str = field(default=",", kw_only=False)
+    quote_char: str | Literal[False] = '"'
+    double_quote: bool = True
+    escape_char: str | Literal[False] = False
+    newlines_in_values: bool = False
+    ignore_empty_lines: bool = True
+    invalid_row_handler: Callable[[InvalidRow], Literal["skip", "error"]] | None = None
+
+    def validate(self) -> None: ...
+
+@dataclass(kw_only=True)
+class ConvertOptions(lib._Weakrefable):
+    """
+    Options for converting CSV data.
+
+    Parameters
+    ----------
+    check_utf8 : bool, optional (default True)
+        Whether to check UTF8 validity of string columns.
+    column_types : pyarrow.Schema or dict, optional
+        Explicitly map column names to column types. Passing this argument
+        disables type inference on the defined columns.
+    null_values : list, optional
+        A sequence of strings that denote nulls in the data
+        (defaults are appropriate in most cases). Note that by default,
+        string columns are not checked for null values. To enable
+        null checking for those, specify ``strings_can_be_null=True``.
+    true_values : list, optional
+        A sequence of strings that denote true booleans in the data
+        (defaults are appropriate in most cases).
+    false_values : list, optional
+        A sequence of strings that denote false booleans in the data
+        (defaults are appropriate in most cases).
+    decimal_point : 1-character string, optional (default '.')
+        The character used as decimal point in floating-point and decimal
+        data.
+    strings_can_be_null : bool, optional (default False)
+        Whether string / binary columns can have null values.
+        If true, then strings in null_values are considered null for
+        string columns.
+        If false, then all strings are valid string values.
+    quoted_strings_can_be_null : bool, optional (default True)
+        Whether quoted values can be null.
+        If true, then strings in "null_values" are also considered null
+        when they appear quoted in the CSV file. Otherwise, quoted values
+        are never considered null.
+    include_columns : list, optional
+        The names of columns to include in the Table.
+        If empty, the Table will include all columns from the CSV file.
+        If not empty, only these columns will be included, in this order.
+    include_missing_columns : bool, optional (default False)
+        If false, columns in `include_columns` but not in the CSV file will
+        error out.
+        If true, columns in `include_columns` but not in the CSV file will
+        produce a column of nulls (whose type is selected using
+        `column_types`, or null by default).
+        This option is ignored if `include_columns` is empty.
+    auto_dict_encode : bool, optional (default False)
+        Whether to try to automatically dict-encode string / binary data.
+        If true, then when type inference detects a string or binary column,
+        it it dict-encoded up to `auto_dict_max_cardinality` distinct values
+        (per chunk), after which it switches to regular encoding.
+        This setting is ignored for non-inferred columns (those in
+        `column_types`).
+    auto_dict_max_cardinality : int, optional
+        The maximum dictionary cardinality for `auto_dict_encode`.
+        This value is per chunk.
+    timestamp_parsers : list, optional
+        A sequence of strptime()-compatible format strings, tried in order
+        when attempting to infer or convert timestamp values (the special
+        value ISO8601() can also be given).  By default, a fast built-in
+        ISO-8601 parser is used.
+
+    Examples
+    --------
+
+    Defining an example data:
+
+    >>> import io
+    >>> s = (
+    ...     "animals,n_legs,entry,fast\\n"
+    ...     "Flamingo,2,01/03/2022,Yes\\n"
+    ...     "Horse,4,02/03/2022,Yes\\n"
+    ...     "Brittle stars,5,03/03/2022,No\\n"
+    ...     "Centipede,100,04/03/2022,No\\n"
+    ...     ",6,05/03/2022,"
+    ... )
+    >>> print(s)
+    animals,n_legs,entry,fast
+    Flamingo,2,01/03/2022,Yes
+    Horse,4,02/03/2022,Yes
+    Brittle stars,5,03/03/2022,No
+    Centipede,100,04/03/2022,No
+    ,6,05/03/2022,
+
+    Change the type of a column:
+
+    >>> import pyarrow as pa
+    >>> from pyarrow import csv
+    >>> convert_options = csv.ConvertOptions(column_types={"n_legs": pa.float64()})
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: double
+    entry: string
+    fast: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [["01/03/2022","02/03/2022","03/03/2022","04/03/2022","05/03/2022"]]
+    fast: [["Yes","Yes","No","No",""]]
+
+    Define a date parsing format to get a timestamp type column
+    (in case dates are not in ISO format and not converted by default):
+
+    >>> convert_options = csv.ConvertOptions(timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"])
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: timestamp[s]
+    fast: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
+    fast: [["Yes","Yes","No","No",""]]
+
+    Specify a subset of columns to be read:
+
+    >>> convert_options = csv.ConvertOptions(include_columns=["animals", "n_legs"])
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+
+    List additional column to be included as a null typed column:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["animals", "n_legs", "location"], include_missing_columns=True
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    location: null
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    location: [5 nulls]
+
+    Define columns as dictionary type (by default only the
+    string/binary columns are dictionary encoded):
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"], auto_dict_encode=True
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: dictionary<values=string, indices=int32, ordered=0>
+    n_legs: int64
+    entry: timestamp[s]
+    fast: dictionary<values=string, indices=int32, ordered=0>
+    ----
+    animals: [  -- dictionary:
+    ["Flamingo","Horse","Brittle stars","Centipede",""]  -- indices:
+    [0,1,2,3,4]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
+    fast: [  -- dictionary:
+    ["Yes","No",""]  -- indices:
+    [0,0,1,1,2]]
+
+    Set upper limit for the number of categories. If the categories
+    is more than the limit, the conversion to dictionary will not
+    happen:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["animals"], auto_dict_encode=True, auto_dict_max_cardinality=2
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+
+    Set empty strings to missing values:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["animals", "n_legs"], strings_can_be_null=True
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",null]]
+    n_legs: [[2,4,5,100,6]]
+
+    Define values to be True and False when converting a column
+    into a bool type:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["fast"], false_values=["No"], true_values=["Yes"]
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    fast: bool
+    ----
+    fast: [[true,true,false,false,null]]
+    """
+
+    check_utf8: bool = field(default=True, kw_only=False)
+    column_types: lib.Schema | dict | None = None
+    null_values: list[str] | None = None
+    true_values: list[str] | None = None
+    false_values: list[str] | None = None
+    decimal_point: str = "."
+    strings_can_be_null: bool = False
+    quoted_strings_can_be_null: bool = True
+    include_columns: list[str] | None = None
+    include_missing_columns: bool = False
+    auto_dict_encode: bool = False
+    auto_dict_max_cardinality: int | None = None
+    timestamp_parsers: list[str] | None = None
+
+    def validate(self) -> None: ...
+
+@dataclass(kw_only=True)
+class WriteOptions(lib._Weakrefable):
+    """
+    Options for writing CSV files.
+
+    Parameters
+    ----------
+    include_header : bool, optional (default True)
+        Whether to write an initial header line with column names
+    batch_size : int, optional (default 1024)
+        How many rows to process together when converting and writing
+        CSV data
+    delimiter : 1-character string, optional (default ",")
+        The character delimiting individual cells in the CSV data.
+    quoting_style : str, optional (default "needed")
+        Whether to quote values, and if so, which quoting style to use.
+        The following values are accepted:
+
+        - "needed" (default): only enclose values in quotes when needed.
+        - "all_valid": enclose all valid values in quotes; nulls are not quoted.
+        - "none": do not enclose any values in quotes; values containing
+          special characters (such as quotes, cell delimiters or line endings)
+          will raise an error.
+    """
+
+    include_header: bool = field(default=True, kw_only=False)
+    batch_size: int = 1024
+    delimiter: str = ","
+    quoting_style: Literal["needed", "all_valid", "none"] = "needed"
+
+    def validate(self) -> None: ...
+
+@dataclass
+class InvalidRow(lib._Weakrefable):
+    """
+    Description of an invalid row in a CSV file.
+
+    Parameters
+    ----------
+    expected_columns : int
+        The expected number of columns in the row.
+    actual_columns : int
+        The actual number of columns in the row.
+    number : int or None
+        The physical row number if known, otherwise None.
+    text : str
+        The contents of the row.
+    """
+
+    expected_columns: int
+    actual_columns: int
+    number: int | None
+    text: str
+
+class CSVWriter(lib._CRecordBatchWriter):
+    """
+    Writer to create a CSV file.
+
+    Parameters
+    ----------
+    sink : str, path, pyarrow.OutputStream or file-like object
+        The location where to write the CSV data.
+    schema : pyarrow.Schema
+        The schema of the data to be written.
+    write_options : pyarrow.csv.WriteOptions
+        Options to configure writing the CSV data.
+    memory_pool : MemoryPool, optional
+        Pool for temporary allocations.
+    """
+
+    def __init__(
+        self,
+        # TODO: OutputStream
+        sink: StrPath | IO[Any],
+        schema: lib.Schema,
+        write_options: WriteOptions | None = None,
+        *,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+class CSVStreamingReader(lib.RecordBatchReader): ...
+
+ISO8601: lib._Weakrefable
+
+def open_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> CSVStreamingReader:
+    """
+    Open a streaming reader of CSV data.
+
+    Reading using this function is always single-threaded.
+
+    Parameters
+    ----------
+    input_file : string, path or file-like object
+        The location of CSV data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.csv.ReadOptions, optional
+        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.csv.ParseOptions, optional
+        Options for the CSV parser
+        (see pyarrow.csv.ParseOptions constructor for defaults)
+    convert_options : pyarrow.csv.ConvertOptions, optional
+        Options for converting CSV data
+        (see pyarrow.csv.ConvertOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate RecordBatch memory from
+
+    Returns
+    -------
+    :class:`pyarrow.csv.CSVStreamingReader`
+    """
+
+def read_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Table:
+    """
+    Read a Table from a stream of CSV data.
+
+    Parameters
+    ----------
+    input_file : string, path or file-like object
+        The location of CSV data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.csv.ReadOptions, optional
+        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.csv.ParseOptions, optional
+        Options for the CSV parser
+        (see pyarrow.csv.ParseOptions constructor for defaults)
+    convert_options : pyarrow.csv.ConvertOptions, optional
+        Options for converting CSV data
+        (see pyarrow.csv.ConvertOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate Table memory from
+
+    Returns
+    -------
+    :class:`pyarrow.Table`
+        Contents of the CSV file as a in-memory table.
+
+    Examples
+    --------
+
+    Defining an example file from bytes object:
+
+    >>> import io
+    >>> s = (
+    ...     "animals,n_legs,entry\\n"
+    ...     "Flamingo,2,2022-03-01\\n"
+    ...     "Horse,4,2022-03-02\\n"
+    ...     "Brittle stars,5,2022-03-03\\n"
+    ...     "Centipede,100,2022-03-04"
+    ... )
+    >>> print(s)
+    animals,n_legs,entry
+    Flamingo,2,2022-03-01
+    Horse,4,2022-03-02
+    Brittle stars,5,2022-03-03
+    Centipede,100,2022-03-04
+    >>> source = io.BytesIO(s.encode())
+
+    Reading from the file
+
+    >>> from pyarrow import csv
+    >>> csv.read_csv(source)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: date32[day]
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    n_legs: [[2,4,5,100]]
+    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+    """
+
+def write_csv(
+    data: lib.RecordBatch | lib.Table,
+    output_file: StrPath | lib.NativeFile | IO[Any],
+    write_options: WriteOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> None:
+    """
+    Write record batch or table to a CSV file.
+
+    Parameters
+    ----------
+    data : pyarrow.RecordBatch or pyarrow.Table
+        The data to write.
+    output_file : string, path, pyarrow.NativeFile, or file-like object
+        The location where to write the CSV data.
+    write_options : pyarrow.csv.WriteOptions
+        Options to configure writing the CSV data.
+    memory_pool : MemoryPool, optional
+        Pool for temporary allocations.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> from pyarrow import csv
+
+    >>> legs = pa.array([2, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+    >>> entry_date = pa.array(["01/03/2022", "02/03/2022", "03/03/2022", "04/03/2022"])
+    >>> table = pa.table([animals, legs, entry_date], names=["animals", "n_legs", "entry"])
+
+    >>> csv.write_csv(table, "animals.csv")
+
+    >>> write_options = csv.WriteOptions(include_header=False)
+    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
+
+    >>> write_options = csv.WriteOptions(delimiter=";")
+    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
+    """
diff --git a/python/pyarrow/_cuda.pyi b/python/pyarrow/_cuda.pyi
new file mode 100644
index 00000000000..ad52b2f380f
--- /dev/null
+++ b/python/pyarrow/_cuda.pyi
@@ -0,0 +1,556 @@
+from typing import Any
+
+import cuda  # type: ignore[import-not-found]
+
+from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-not-found]
+
+from . import lib
+from ._stubs_typing import ArrayLike
+
+class Context(lib._Weakrefable):
+    """
+    CUDA driver context.
+    """
+
+    def __init__(self, device_number: int = 0, handle: int | None = None) -> None:
+        """
+        Create a CUDA driver context for a particular device.
+
+        If a CUDA context handle is passed, it is wrapped, otherwise
+        a default CUDA context for the given device is requested.
+
+        Parameters
+        ----------
+        device_number : int (default 0)
+          Specify the GPU device for which the CUDA driver context is
+          requested.
+        handle : int, optional
+          Specify CUDA handle for a shared context that has been created
+          by another library.
+        """
+    @staticmethod
+    def from_numba(context: _numba_driver.Context | None = None) -> Context:
+        """
+        Create a Context instance from a Numba CUDA context.
+
+        Parameters
+        ----------
+        context : {numba.cuda.cudadrv.driver.Context, None}
+          A Numba CUDA context instance.
+          If None, the current Numba context is used.
+
+        Returns
+        -------
+        shared_context : pyarrow.cuda.Context
+          Context instance.
+        """
+    def to_numba(self) -> _numba_driver.Context:
+        """
+        Convert Context to a Numba CUDA context.
+
+        Returns
+        -------
+        context : numba.cuda.cudadrv.driver.Context
+          Numba CUDA context instance.
+        """
+    @staticmethod
+    def get_num_devices() -> int:
+        """Return the number of GPU devices."""
+    @property
+    def device_number(self) -> int:
+        """Return context device number."""
+    @property
+    def handle(self) -> int:
+        """Return pointer to context handle."""
+    def synchronize(self) -> None:
+        """Blocks until the device has completed all preceding requested
+        tasks.
+        """
+    @property
+    def bytes_allocated(self) -> int:
+        """Return the number of allocated bytes."""
+    def get_device_address(self, address: int) -> int:
+        """Return the device address that is reachable from kernels running in
+        the context
+
+        Parameters
+        ----------
+        address : int
+          Specify memory address value
+
+        Returns
+        -------
+        device_address : int
+          Device address accessible from device context
+
+        Notes
+        -----
+        The device address is defined as a memory address accessible
+        by device. While it is often a device memory address but it
+        can be also a host memory address, for instance, when the
+        memory is allocated as host memory (using cudaMallocHost or
+        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
+        or the host memory is page-locked (using cudaHostRegister).
+        """
+    def new_buffer(self, nbytes: int) -> CudaBuffer:
+        """Return new device buffer.
+
+        Parameters
+        ----------
+        nbytes : int
+          Specify the number of bytes to be allocated.
+
+        Returns
+        -------
+        buf : CudaBuffer
+          Allocated buffer.
+        """
+    @property
+    def memory_manager(self) -> lib.MemoryManager:
+        """
+        The default memory manager tied to this context's device.
+
+        Returns
+        -------
+        MemoryManager
+        """
+    @property
+    def device(self) -> lib.Device:
+        """
+        The device instance associated with this context.
+
+        Returns
+        -------
+        Device
+        """
+    def foreign_buffer(self, address: int, size: int, base: Any | None = None) -> CudaBuffer:
+        """
+        Create device buffer from address and size as a view.
+
+        The caller is responsible for allocating and freeing the
+        memory. When `address==size==0` then a new zero-sized buffer
+        is returned.
+
+        Parameters
+        ----------
+        address : int
+          Specify the starting address of the buffer. The address can
+          refer to both device or host memory but it must be
+          accessible from device after mapping it with
+          `get_device_address` method.
+        size : int
+          Specify the size of device buffer in bytes.
+        base : {None, object}
+          Specify object that owns the referenced memory.
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device reachable memory.
+
+        """
+    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer:
+        """Open existing CUDA IPC memory handle
+
+        Parameters
+        ----------
+        ipc_handle : IpcMemHandle
+          Specify opaque pointer to CUipcMemHandle (driver API).
+
+        Returns
+        -------
+        buf : CudaBuffer
+          referencing device buffer
+        """
+    def buffer_from_data(
+        self,
+        data: CudaBuffer | HostBuffer | lib.Buffer | ArrayLike,
+        offset: int = 0,
+        size: int = -1,
+    ) -> CudaBuffer:
+        """Create device buffer and initialize with data.
+
+        Parameters
+        ----------
+        data : {CudaBuffer, HostBuffer, Buffer, array-like}
+          Specify data to be copied to device buffer.
+        offset : int
+          Specify the offset of input buffer for device data
+          buffering. Default: 0.
+        size : int
+          Specify the size of device buffer in bytes. Default: all
+          (starting from input offset)
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer with copied data.
+        """
+    def buffer_from_object(self, obj: Any) -> CudaBuffer:
+        """Create device buffer view of arbitrary object that references
+        device accessible memory.
+
+        When the object contains a non-contiguous view of device
+        accessible memory then the returned device buffer will contain
+        contiguous view of the memory, that is, including the
+        intermediate data that is otherwise invisible to the input
+        object.
+
+        Parameters
+        ----------
+        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
+          Specify an object that holds (device or host) address that
+          can be accessed from device. This includes objects with
+          types defined in pyarrow.cuda as well as arbitrary objects
+          that implement the CUDA array interface as defined by numba.
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device accessible memory.
+
+        """
+
+class IpcMemHandle(lib._Weakrefable):
+    """A serializable container for a CUDA IPC handle."""
+    @staticmethod
+    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle:
+        """Create IpcMemHandle from opaque buffer (e.g. from another
+        process)
+
+        Parameters
+        ----------
+        opaque_handle :
+          a CUipcMemHandle as a const void*
+
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+        """
+    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer:
+        """Write IpcMemHandle to a Buffer
+
+        Parameters
+        ----------
+        pool : {MemoryPool, None}
+          Specify a pool to allocate memory from
+
+        Returns
+        -------
+        buf : Buffer
+          The serialized buffer.
+        """
+
+class CudaBuffer(lib.Buffer):
+    """An Arrow buffer with data located in a GPU device.
+
+    To create a CudaBuffer instance, use Context.device_buffer().
+
+    The memory allocated in a CudaBuffer is freed when the buffer object
+    is deleted.
+    """
+
+    @staticmethod
+    def from_buffer(buf: lib.Buffer) -> CudaBuffer:
+        """Convert back generic buffer into CudaBuffer
+
+        Parameters
+        ----------
+        buf : Buffer
+          Specify buffer containing CudaBuffer
+
+        Returns
+        -------
+        dbuf : CudaBuffer
+          Resulting device buffer.
+        """
+    @staticmethod
+    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer:
+        """Create a CudaBuffer view from numba MemoryPointer instance.
+
+        Parameters
+        ----------
+        mem :  numba.cuda.cudadrv.driver.MemoryPointer
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of numba MemoryPointer.
+        """
+    def to_numba(self) -> _numba_driver.MemoryPointer:
+        """Return numba memory pointer of CudaBuffer instance."""
+    def copy_to_host(
+        self,
+        position: int = 0,
+        nbytes: int = -1,
+        buf: lib.Buffer | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        resizable: bool = False,
+    ) -> lib.Buffer:
+        """Copy memory from GPU device to CPU host
+
+        Caller is responsible for ensuring that all tasks affecting
+        the memory are finished. Use
+
+          `<CudaBuffer instance>.context.synchronize()`
+
+        when needed.
+
+        Parameters
+        ----------
+        position : int
+          Specify the starting position of the source data in GPU
+          device buffer. Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          the position until host buffer is full).
+        buf : Buffer
+          Specify a pre-allocated output buffer in host. Default: None
+          (allocate new output buffer).
+        memory_pool : MemoryPool
+        resizable : bool
+          Specify extra arguments to allocate_buffer. Used only when
+          buf is None.
+
+        Returns
+        -------
+        buf : Buffer
+          Output buffer in host.
+
+        """
+    def copy_from_host(
+        self, data: lib.Buffer | ArrayLike, position: int = 0, nbytes: int = -1
+    ) -> int:
+        """Copy data from host to device.
+
+        The device buffer must be pre-allocated.
+
+        Parameters
+        ----------
+        data : {Buffer, array-like}
+          Specify data in host. It can be array-like that is valid
+          argument to py_buffer
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+        """
+    def copy_from_device(self, buf: CudaBuffer, position: int = 0, nbytes: int = -1) -> int:
+        """Copy data from device to device.
+
+        Parameters
+        ----------
+        buf : CudaBuffer
+          Specify source device buffer.
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+
+        """
+    def export_for_ipc(self) -> IpcMemHandle:
+        """
+        Expose this device buffer as IPC memory which can be used in other
+        processes.
+
+        After calling this function, this device memory will not be
+        freed when the CudaBuffer is destructed.
+
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+          The exported IPC handle
+
+        """
+    @property
+    def context(self) -> Context:
+        """Returns the CUDA driver context of this buffer."""
+    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer:
+        """Return slice of device buffer
+
+        Parameters
+        ----------
+        offset : int, default 0
+          Specify offset from the start of device buffer to slice
+        length : int, default None
+          Specify the length of slice (default is until end of device
+          buffer starting from offset). If the length is larger than
+          the data available, the returned slice will have a size of
+          the available data starting from the offset.
+
+        Returns
+        -------
+        sliced : CudaBuffer
+          Zero-copy slice of device buffer.
+
+        """
+    def to_pybytes(self) -> bytes:
+        """Return device buffer content as Python bytes."""
+
+class HostBuffer(lib.Buffer):
+    """Device-accessible CPU memory created using cudaHostAlloc.
+
+    To create a HostBuffer instance, use
+
+      cuda.new_host_buffer(<nbytes>)
+    """
+    @property
+    def size(self) -> int: ...
+
+class BufferReader(lib.NativeFile):
+    """File interface for zero-copy read from CUDA buffers.
+
+    Note: Read methods return pointers to device memory. This means
+    you must be careful using this interface with any Arrow code which
+    may expect to be able to do anything other than pointer arithmetic
+    on the returned buffers.
+    """
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer:
+        """Return a slice view of the underlying device buffer.
+
+        The slice will start at the current reader position and will
+        have specified size in bytes.
+
+        Parameters
+        ----------
+        nbytes : int, default None
+          Specify the number of bytes to read. Default: None (read all
+          remaining bytes).
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          New device buffer.
+
+        """
+
+class BufferWriter(lib.NativeFile):
+    """File interface for writing to CUDA buffers.
+
+    By default writes are unbuffered. Use set_buffer_size to enable
+    buffering.
+    """
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def writeat(self, position: int, data: ArrayLike) -> None:
+        """Write data to buffer starting from position.
+
+        Parameters
+        ----------
+        position : int
+          Specify device buffer position where the data will be
+          written.
+        data : array-like
+          Specify data, the data instance must implement buffer
+          protocol.
+        """
+    @property
+    def buffer_size(self) -> int:
+        """Returns size of host (CPU) buffer, 0 for unbuffered"""
+    @buffer_size.setter
+    def buffer_size(self, buffer_size: int):
+        """Set CPU buffer size to limit calls to cudaMemcpy
+
+        Parameters
+        ----------
+        buffer_size : int
+          Specify the size of CPU buffer to allocate in bytes.
+        """
+    @property
+    def num_bytes_buffered(self) -> int:
+        """Returns number of bytes buffered on host"""
+
+def new_host_buffer(size: int, device: int = 0) -> HostBuffer:
+    """Return buffer with CUDA-accessible memory on CPU host
+
+    Parameters
+    ----------
+    size : int
+      Specify the number of bytes to be allocated.
+    device : int
+      Specify GPU device number.
+
+    Returns
+    -------
+    dbuf : HostBuffer
+      Allocated host buffer
+    """
+
+def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer:
+    """Write record batch message to GPU device memory
+
+    Parameters
+    ----------
+    batch : RecordBatch
+      Record batch to write
+    ctx : Context
+      CUDA Context to allocate device memory from
+
+    Returns
+    -------
+    dbuf : CudaBuffer
+      device buffer which contains the record batch message
+    """
+
+def read_message(
+    source: CudaBuffer | cuda.BufferReader, pool: lib.MemoryManager | None = None
+) -> lib.Message:
+    """Read Arrow IPC message located on GPU device
+
+    Parameters
+    ----------
+    source : {CudaBuffer, cuda.BufferReader}
+      Device buffer or reader of device buffer.
+    pool : MemoryPool (optional)
+      Pool to allocate CPU memory for the metadata
+
+    Returns
+    -------
+    message : Message
+      The deserialized message, body still on device
+    """
+
+def read_record_batch(
+    buffer: lib.Buffer,
+    object: lib.Schema,
+    *,
+    dictionary_memo: lib.DictionaryMemo | None = None,
+    pool: lib.MemoryPool | None = None,
+) -> lib.RecordBatch:
+    """Construct RecordBatch referencing IPC message located on CUDA device.
+
+    While the metadata is copied to host memory for deserialization,
+    the record batch data remains on the device.
+
+    Parameters
+    ----------
+    buffer :
+      Device buffer containing the complete IPC message
+    schema : Schema
+      The schema for the record batch
+    dictionary_memo : DictionaryMemo, optional
+        If message contains dictionaries, must pass a populated
+        DictionaryMemo
+    pool : MemoryPool (optional)
+      Pool to allocate metadata from
+
+    Returns
+    -------
+    batch : RecordBatch
+      Reconstructed record batch, with device pointers
+
+    """
diff --git a/python/pyarrow/_dataset.pyi b/python/pyarrow/_dataset.pyi
new file mode 100644
index 00000000000..114bf625983
--- /dev/null
+++ b/python/pyarrow/_dataset.pyi
@@ -0,0 +1,2301 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import (
+    IO,
+    Any,
+    Callable,
+    Generic,
+    Iterator,
+    Literal,
+    NamedTuple,
+    TypeVar,
+    overload,
+)
+
+from _typeshed import StrPath
+
+from . import _csv, _json, _parquet, lib
+from ._fs import FileSelector, FileSystem, SupportedFileSystem
+from ._stubs_typing import Indices, JoinType, Order
+from .acero import ExecNodeOptions
+from .compute import Expression
+from .ipc import IpcWriteOptions, RecordBatchReader
+
+class Dataset(lib._Weakrefable):
+    """
+    Collection of data fragments and potentially child datasets.
+
+    Arrow Datasets allow you to query against data that has been split across
+    multiple files. This sharding of data may indicate partitioning, which
+    can accelerate queries that only touch some partitions (files).
+    """
+
+    @property
+    def partition_expression(self) -> Expression:
+        """
+        An Expression which evaluates to true for all data viewed by this
+        Dataset.
+        """
+    def replace_schema(self, schema: lib.Schema) -> None:
+        """
+        Return a copy of this Dataset with a different schema.
+
+        The copy will view the same Fragments. If the new schema is not
+        compatible with the original dataset's schema then an error will
+        be raised.
+
+        Parameters
+        ----------
+        schema : Schema
+            The new dataset schema.
+        """
+    def get_fragments(self, filter: Expression | None = None):
+        """Returns an iterator over the fragments in this dataset.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Return fragments matching the optional filter, either using the
+            partition_expression or internal information like Parquet's
+            statistics.
+
+        Returns
+        -------
+        fragments : iterator of Fragment
+        """
+    def scanner(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Build a scan operation against the dataset.
+
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
+
+        See the :meth:`Scanner.from_dataset` method for further information.
+
+        Parameters
+        ----------
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        scanner : Scanner
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>>
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "dataset_scanner.parquet")
+
+        >>> import pyarrow.dataset as ds
+        >>> dataset = ds.dataset("dataset_scanner.parquet")
+
+        Selecting a subset of the columns:
+
+        >>> dataset.scanner(columns=["year", "n_legs"]).to_table()
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        ----
+        year: [[2020,2022,2021,2022,2019,2021]]
+        n_legs: [[2,2,4,4,5,100]]
+
+        Projecting selected columns using an expression:
+
+        >>> dataset.scanner(
+        ...     columns={
+        ...         "n_legs_uint": ds.field("n_legs").cast("uint8"),
+        ...     }
+        ... ).to_table()
+        pyarrow.Table
+        n_legs_uint: uint8
+        ----
+        n_legs_uint: [[2,2,4,4,5,100]]
+
+        Filtering rows while scanning:
+
+        >>> dataset.scanner(filter=ds.field("year") > 2020).to_table()
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        year: [[2022,2021,2022,2021]]
+        n_legs: [[2,4,4,100]]
+        animal: [["Parrot","Dog","Horse","Centipede"]]
+        """
+    def to_batches(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]:
+        """
+        Read the dataset as materialized record batches.
+
+        Parameters
+        ----------
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        record_batches : iterator of RecordBatch
+        """
+    def to_table(
+        self,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Read the dataset to an Arrow table.
+
+        Note that this method reads all the selected data from the dataset
+        into memory.
+
+        Parameters
+        ----------
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Select rows of data by index.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            indices of rows to select in the dataset.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Load the first N rows of the dataset.
+
+        Parameters
+        ----------
+        num_rows : int
+            The number of rows to load.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def count_rows(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int:
+        """
+        Count rows matching the scanner filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        count : int
+        """
+    @property
+    def schema(self) -> lib.Schema:
+        """The common schema of the full Dataset"""
+    def filter(self, expression: Expression) -> Self:
+        """
+        Apply a row filter to the dataset.
+
+        Parameters
+        ----------
+        expression : Expression
+            The filter that should be applied to the dataset.
+
+        Returns
+        -------
+        Dataset
+        """
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> InMemoryDataset:
+        """
+        Sort the Dataset by one or multiple columns.
+
+        Parameters
+        ----------
+        sorting : str or list[tuple(name, order)]
+            Name of the column to use to sort (ascending), or
+            a list of multiple sorting conditions where
+            each entry is a tuple with column name
+            and sorting order ("ascending" or "descending")
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        InMemoryDataset
+            A new dataset sorted according to the sort keys.
+        """
+    def join(
+        self,
+        right_dataset: Dataset,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> InMemoryDataset:
+        """
+        Perform a join between this dataset and another one.
+
+        Result of the join will be a new dataset, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_dataset : dataset
+            The dataset to join to the current one, acting as the right dataset
+            in the join operation.
+        keys : str or list[str]
+            The columns from current dataset that should be used as keys
+            of the join operation left side.
+        right_keys : str or list[str], default None
+            The columns from the right_dataset that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left dataset.
+        join_type : str, default "left outer"
+            The kind of join that should be performed, one of
+            ("left semi", "right semi", "left anti", "right anti",
+            "inner", "left outer", "right outer", "full outer")
+        left_suffix : str, default None
+            Which suffix to add to right column names. This prevents confusion
+            when the columns in left and right datasets have colliding names.
+        right_suffix : str, default None
+            Which suffix to add to the left column names. This prevents confusion
+            when the columns in left and right datasets have colliding names.
+        coalesce_keys : bool, default True
+            If the duplicated keys should be omitted from one of the sides
+            in the join result.
+        use_threads : bool, default True
+            Whenever to use multithreading or not.
+
+        Returns
+        -------
+        InMemoryDataset
+        """
+    def join_asof(
+        self,
+        right_dataset: Dataset,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> InMemoryDataset:
+        """
+        Perform an asof join between this dataset and another one.
+
+        This is similar to a left-join except that we match on nearest key rather
+        than equal keys. Both datasets must be sorted by the key. This type of join
+        is most useful for time series data that are not perfectly aligned.
+
+        Optionally match on equivalent keys with "by" before searching with "on".
+
+        Result of the join will be a new Dataset, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_dataset : dataset
+            The dataset to join to the current one, acting as the right dataset
+            in the join operation.
+        on : str
+            The column from current dataset that should be used as the "on" key
+            of the join operation left side.
+
+            An inexact match is used on the "on" key, i.e. a row is considered a
+            match if and only if left_on - tolerance <= right_on <= left_on.
+
+            The input table must be sorted by the "on" key. Must be a single
+            field of a common type.
+
+            Currently, the "on" key must be an integer, date, or timestamp type.
+        by : str or list[str]
+            The columns from current dataset that should be used as the keys
+            of the join operation left side. The join operation is then done
+            only for the matches in these columns.
+        tolerance : int
+            The tolerance for inexact "on" key matching. A right row is considered
+            a match with the left row `right.on - left.on <= tolerance`. The
+            `tolerance` may be:
+
+            - negative, in which case a past-as-of-join occurs;
+            - or positive, in which case a future-as-of-join occurs;
+            - or zero, in which case an exact-as-of-join occurs.
+
+            The tolerance is interpreted in the same units as the "on" key.
+        right_on : str or list[str], default None
+            The columns from the right_dataset that should be used as the on key
+            on the join operation right side.
+            When ``None`` use the same key name as the left dataset.
+        right_by : str or list[str], default None
+            The columns from the right_dataset that should be used as by keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left dataset.
+
+        Returns
+        -------
+        InMemoryDataset
+        """
+
+class InMemoryDataset(Dataset):
+    """
+    A Dataset wrapping in-memory data.
+
+    Parameters
+    ----------
+    source : RecordBatch, Table, list, tuple
+        The data for this dataset. Can be a RecordBatch, Table, list of
+        RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
+        If an iterable is provided, the schema must also be provided.
+    schema : Schema, optional
+        Only required if passing an iterable as the source
+    """
+
+class UnionDataset(Dataset):
+    """
+    A Dataset wrapping child datasets.
+
+    Children's schemas must agree with the provided schema.
+
+    Parameters
+    ----------
+    schema : Schema
+        A known schema to conform to.
+    children : list of Dataset
+        One or more input children
+    """
+
+    @property
+    def children(self) -> list[Dataset]: ...
+
+class FileSystemDataset(Dataset):
+    """
+    A Dataset of file fragments.
+
+    A FileSystemDataset is composed of one or more FileFragment.
+
+    Parameters
+    ----------
+    fragments : list[Fragments]
+        List of fragments to consume.
+    schema : Schema
+        The top-level schema of the Dataset.
+    format : FileFormat
+        File format of the fragments, currently only ParquetFileFormat,
+        IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
+    filesystem : FileSystem
+        FileSystem of the fragments.
+    root_partition : Expression, optional
+        The top-level partition of the DataDataset.
+    """
+
+    def __init__(
+        self,
+        fragments: list[Fragment],
+        schema: lib.Schema,
+        format: FileFormat,
+        filesystem: SupportedFileSystem | None = None,
+        root_partition: Expression | None = None,
+    ) -> None: ...
+    @classmethod
+    def from_paths(
+        cls,
+        paths: list[str],
+        schema: lib.Schema | None = None,
+        format: FileFormat | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        partitions: list[Expression] | None = None,
+        root_partition: Expression | None = None,
+    ) -> FileSystemDataset:
+        """
+        A Dataset created from a list of paths on a particular filesystem.
+
+        Parameters
+        ----------
+        paths : list of str
+            List of file paths to create the fragments from.
+        schema : Schema
+            The top-level schema of the DataDataset.
+        format : FileFormat
+            File format to create fragments from, currently only
+            ParquetFileFormat, IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
+        filesystem : FileSystem
+            The filesystem which files are from.
+        partitions : list[Expression], optional
+            Attach additional partition information for the file paths.
+        root_partition : Expression, optional
+            The top-level partition of the DataDataset.
+        """
+    @property
+    def filesystem(self) -> FileSystem: ...
+    @property
+    def partitioning(self) -> Partitioning | None:
+        """
+        The partitioning of the Dataset source, if discovered.
+
+        If the FileSystemDataset is created using the ``dataset()`` factory
+        function with a partitioning specified, this will return the
+        finalized Partitioning object from the dataset discovery. In all
+        other cases, this returns None.
+        """
+    @property
+    def files(self) -> list[str]:
+        """List of the files"""
+    @property
+    def format(self) -> FileFormat:
+        """The FileFormat of this source."""
+
+class FileWriteOptions(lib._Weakrefable):
+    @property
+    def format(self) -> FileFormat: ...
+
+class FileFormat(lib._Weakrefable):
+    def inspect(
+        self, file: StrPath | IO, filesystem: SupportedFileSystem | None = None
+    ) -> lib.Schema:
+        """
+        Infer the schema of a file.
+
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to infer a schema from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+
+        Returns
+        -------
+        schema : Schema
+            The schema inferred from the file
+        """
+    def make_fragment(
+        self,
+        file: StrPath | IO,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment:
+        """
+        Make a FileFragment from a given file.
+
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to make a fragment from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+        partition_expression : Expression, optional
+            An expression that is guaranteed true for all rows in the fragment.  Allows
+            fragment to be potentially skipped while scanning with a filter.
+        file_size : int, optional
+            The size of the file in bytes. Can improve performance with high-latency filesystems
+            when file size needs to be known before reading.
+
+        Returns
+        -------
+        fragment : Fragment
+            The file fragment
+        """
+    def make_write_options(self) -> FileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+    @property
+    def default_fragment_scan_options(self) -> FragmentScanOptions: ...
+    @default_fragment_scan_options.setter
+    def default_fragment_scan_options(self, options: FragmentScanOptions) -> None: ...
+
+class Fragment(lib._Weakrefable):
+    """Fragment of data from a Dataset."""
+    @property
+    def physical_schema(self) -> lib.Schema:
+        """Return the physical schema of this Fragment. This schema can be
+        different from the dataset read schema."""
+    @property
+    def partition_expression(self) -> Expression:
+        """An Expression which evaluates to true for all data viewed by this
+        Fragment.
+        """
+    def scanner(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Build a scan operation against the fragment.
+
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
+
+        Parameters
+        ----------
+        schema : Schema
+            Schema to use for scanning. This is used to unify a Fragment to
+            its Dataset's schema. If not specified this will use the
+            Fragment's physical schema which might differ for each Fragment.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        scanner : Scanner
+        """
+    def to_batches(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]:
+        """
+        Read the fragment as materialized record batches.
+
+        Parameters
+        ----------
+        schema : Schema, optional
+            Concrete schema to use for scanning.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        record_batches : iterator of RecordBatch
+        """
+    def to_table(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Convert this Fragment into a Table.
+
+        Use this convenience utility with care. This will serially materialize
+        the Scan result in memory before creating the Table.
+
+        Parameters
+        ----------
+        schema : Schema, optional
+            Concrete schema to use for scanning.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Select rows of data by index.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices of row to select in the dataset.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        Table
+        """
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Load the first N rows of the fragment.
+
+        Parameters
+        ----------
+        num_rows : int
+            The number of rows to load.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        Table
+        """
+    def count_rows(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int:
+        """
+        Count rows matching the scanner filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        count : int
+        """
+
+class FileFragment(Fragment):
+    """A Fragment representing a data file."""
+
+    def open(self) -> lib.NativeFile:
+        """
+        Open a NativeFile of the buffer or file viewed by this fragment.
+        """
+    @property
+    def path(self) -> str:
+        """
+        The path of the data file viewed by this fragment, if it views a
+        file. If instead it views a buffer, this will be "<Buffer>".
+        """
+    @property
+    def filesystem(self) -> FileSystem:
+        """
+        The FileSystem containing the data file viewed by this fragment, if
+        it views a file. If instead it views a buffer, this will be None.
+        """
+    @property
+    def buffer(self) -> lib.Buffer:
+        """
+        The buffer viewed by this fragment, if it views a buffer. If
+        instead it views a file, this will be None.
+        """
+    @property
+    def format(self) -> FileFormat:
+        """
+        The format of the data file viewed by this fragment.
+        """
+
+class FragmentScanOptions(lib._Weakrefable):
+    """Scan options specific to a particular fragment and scan operation."""
+
+    @property
+    def type_name(self) -> str: ...
+
+class IpcFileWriteOptions(FileWriteOptions):
+    @property
+    def write_options(self) -> IpcWriteOptions: ...
+    @write_options.setter
+    def write_options(self, write_options: IpcWriteOptions) -> None: ...
+
+class IpcFileFormat(FileFormat):
+    def equals(self, other: IpcFileFormat) -> bool: ...
+    def make_write_options(self, **kwargs) -> IpcFileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+
+class FeatherFileFormat(IpcFileFormat): ...
+
+class CsvFileFormat(FileFormat):
+    """
+    FileFormat for CSV files.
+
+    Parameters
+    ----------
+    parse_options : pyarrow.csv.ParseOptions
+        Options regarding CSV parsing.
+    default_fragment_scan_options : CsvFragmentScanOptions
+        Default options for fragments scan.
+    convert_options : pyarrow.csv.ConvertOptions
+        Options regarding value conversion.
+    read_options : pyarrow.csv.ReadOptions
+        General read options.
+    """
+    def __init__(
+        self,
+        parse_options: _csv.ParseOptions | None = None,
+        default_fragment_scan_options: CsvFragmentScanOptions | None = None,
+        convert_options: _csv.ConvertOptions | None = None,
+        read_options: _csv.ReadOptions | None = None,
+    ) -> None: ...
+    def make_write_options(self) -> _csv.WriteOptions: ...  # type: ignore[override]
+    @property
+    def parse_options(self) -> _csv.ParseOptions: ...
+    @parse_options.setter
+    def parse_options(self, parse_options: _csv.ParseOptions) -> None: ...
+    def equals(self, other: CsvFileFormat) -> bool: ...
+
+class CsvFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for CSV fragments.
+
+    Parameters
+    ----------
+    convert_options : pyarrow.csv.ConvertOptions
+        Options regarding value conversion.
+    read_options : pyarrow.csv.ReadOptions
+        General read options.
+    """
+
+    convert_options: _csv.ConvertOptions
+    read_options: _csv.ReadOptions
+
+    def __init__(
+        self, convert_options: _csv.ConvertOptions, read_options: _csv.ReadOptions
+    ) -> None: ...
+    def equals(self, other: CsvFragmentScanOptions) -> bool: ...
+
+class CsvFileWriteOptions(FileWriteOptions):
+    write_options: _csv.WriteOptions
+
+class JsonFileFormat(FileFormat):
+    """
+    FileFormat for JSON files.
+
+    Parameters
+    ----------
+    default_fragment_scan_options : JsonFragmentScanOptions
+        Default options for fragments scan.
+    parse_options : pyarrow.json.ParseOptions
+        Options regarding json parsing.
+    read_options : pyarrow.json.ReadOptions
+        General read options.
+    """
+    def __init__(
+        self,
+        default_fragment_scan_options: JsonFragmentScanOptions | None = None,
+        parse_options: _json.ParseOptions | None = None,
+        read_options: _json.ReadOptions | None = None,
+    ) -> None: ...
+    def equals(self, other: JsonFileFormat) -> bool: ...
+
+class JsonFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for JSON fragments.
+
+    Parameters
+    ----------
+    parse_options : pyarrow.json.ParseOptions
+        Options regarding JSON parsing.
+    read_options : pyarrow.json.ReadOptions
+        General read options.
+    """
+
+    parse_options: _json.ParseOptions
+    read_options: _json.ReadOptions
+    def __init__(
+        self, parse_options: _json.ParseOptions, read_options: _json.ReadOptions
+    ) -> None: ...
+    def equals(self, other: JsonFragmentScanOptions) -> bool: ...
+
+class Partitioning(lib._Weakrefable):
+    def parse(self, path: str) -> Expression:
+        """
+        Parse a path into a partition expression.
+
+        Parameters
+        ----------
+        path : str
+
+        Returns
+        -------
+        pyarrow.dataset.Expression
+        """
+    def format(self, expr: Expression) -> tuple[str, str]:
+        """
+        Convert a filter expression into a tuple of (directory, filename) using
+        the current partitioning scheme
+
+        Parameters
+        ----------
+        expr : pyarrow.dataset.Expression
+
+        Returns
+        -------
+        tuple[str, str]
+
+        Examples
+        --------
+
+        Specify the Schema for paths like "/2009/June":
+
+        >>> import pyarrow as pa
+        >>> import pyarrow.dataset as ds
+        >>> import pyarrow.compute as pc
+        >>> part = ds.partitioning(pa.schema([("year", pa.int16()), ("month", pa.string())]))
+        >>> part.format((pc.field("year") == 1862) & (pc.field("month") == "Jan"))
+        ('1862/Jan', '')
+        """
+    @property
+    def schema(self) -> lib.Schema:
+        """The arrow Schema attached to the partitioning."""
+
+class PartitioningFactory(lib._Weakrefable):
+    @property
+    def type_name(self) -> str: ...
+
+class KeyValuePartitioning(Partitioning):
+    @property
+    def dictionaries(self) -> list[lib.Array | None]:
+        """
+        The unique values for each partition field, if available.
+
+        Those values are only available if the Partitioning object was
+        created through dataset discovery from a PartitioningFactory, or
+        if the dictionaries were manually specified in the constructor.
+        If no dictionary field is available, this returns an empty list.
+        """
+
+class DirectoryPartitioning(KeyValuePartitioning):
+    """
+    A Partitioning based on a specified Schema.
+
+    The DirectoryPartitioning expects one segment in the file path for each
+    field in the schema (all fields are required to be present).
+    For example given schema<year:int16, month:int8> the path "/2009/11" would
+    be parsed to ("year"_ == 2009 and "month"_ == 11).
+
+    Parameters
+    ----------
+    schema : Schema
+        The schema that describes the partitions present in the file path.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing.
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+    Returns
+    -------
+    DirectoryPartitioning
+
+    Examples
+    --------
+    >>> from pyarrow.dataset import DirectoryPartitioning
+    >>> partitioning = DirectoryPartitioning(
+    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
+    ... )
+    >>> print(partitioning.parse("/2009/11/"))
+    ((year == 2009) and (month == 11))
+    """
+
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory:
+        """
+        Discover a DirectoryPartitioning.
+
+        Parameters
+        ----------
+        field_names : list of str
+            The names to associate with the values from the subdirectory names.
+            If schema is given, will be populated from the schema.
+        infer_dictionary : bool, default False
+            When inferring a schema for partition fields, yield dictionary
+            encoded types instead of plain types. This can be more efficient
+            when materializing virtual columns, and Expressions parsed by the
+            finished Partitioning will include dictionaries of all unique
+            inspected values for each field.
+        max_partition_dictionary_size : int, default 0
+            Synonymous with infer_dictionary for backwards compatibility with
+            1.0: setting this to -1 or None is equivalent to passing
+            infer_dictionary=True.
+        schema : Schema, default None
+            Use this schema instead of inferring a schema from partition
+            values. Partition values will be validated against this schema
+            before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+        Returns
+        -------
+        PartitioningFactory
+            To be used in the FileSystemFactoryOptions.
+        """
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+
+class HivePartitioning(KeyValuePartitioning):
+    """
+    A Partitioning for "/$key=$value/" nested directories as found in
+    Apache Hive.
+
+    Multi-level, directory based partitioning scheme originating from
+    Apache Hive with all data files stored in the leaf directories. Data is
+    partitioned by static values of a particular column in the schema.
+    Partition keys are represented in the form $key=$value in directory names.
+    Field order is ignored, as are missing or unrecognized field names.
+
+    For example, given schema<year:int16, month:int8, day:int8>, a possible
+    path would be "/year=2009/month=11/day=15".
+
+    Parameters
+    ----------
+    schema : Schema
+        The schema that describes the partitions present in the file path.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing.
+    null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
+        If any field is None then this fallback will be used as a label
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+    Returns
+    -------
+    HivePartitioning
+
+    Examples
+    --------
+    >>> from pyarrow.dataset import HivePartitioning
+    >>> partitioning = HivePartitioning(pa.schema([("year", pa.int16()), ("month", pa.int8())]))
+    >>> print(partitioning.parse("/year=2009/month=11/"))
+    ((year == 2009) and (month == 11))
+
+    """
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        null_fallback: str = "__HIVE_DEFAULT_PARTITION__",
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+    @staticmethod
+    def discover(
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        null_fallback="__HIVE_DEFAULT_PARTITION__",
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory:
+        """
+        Discover a HivePartitioning.
+
+        Parameters
+        ----------
+        infer_dictionary : bool, default False
+            When inferring a schema for partition fields, yield dictionary
+            encoded types instead of plain. This can be more efficient when
+            materializing virtual columns, and Expressions parsed by the
+            finished Partitioning will include dictionaries of all unique
+            inspected values for each field.
+        max_partition_dictionary_size : int, default 0
+            Synonymous with infer_dictionary for backwards compatibility with
+            1.0: setting this to -1 or None is equivalent to passing
+            infer_dictionary=True.
+        null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
+            When inferring a schema for partition fields this value will be
+            replaced by null.  The default is set to __HIVE_DEFAULT_PARTITION__
+            for compatibility with Spark
+        schema : Schema, default None
+            Use this schema instead of inferring a schema from partition
+            values. Partition values will be validated against this schema
+            before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+        Returns
+        -------
+        PartitioningFactory
+            To be used in the FileSystemFactoryOptions.
+        """
+
+class FilenamePartitioning(KeyValuePartitioning):
+    """
+    A Partitioning based on a specified Schema.
+
+    The FilenamePartitioning expects one segment in the file name for each
+    field in the schema (all fields are required to be present) separated
+    by '_'. For example given schema<year:int16, month:int8> the name
+    ``"2009_11_"`` would be parsed to ("year" == 2009 and "month" == 11).
+
+    Parameters
+    ----------
+    schema : Schema
+        The schema that describes the partitions present in the file path.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing.
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+    Returns
+    -------
+    FilenamePartitioning
+
+    Examples
+    --------
+    >>> from pyarrow.dataset import FilenamePartitioning
+    >>> partitioning = FilenamePartitioning(
+    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
+    ... )
+    >>> print(partitioning.parse("2009_11_data.parquet"))
+    ((year == 2009) and (month == 11))
+    """
+
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory:
+        """
+        Discover a FilenamePartitioning.
+
+        Parameters
+        ----------
+        field_names : list of str
+            The names to associate with the values from the subdirectory names.
+            If schema is given, will be populated from the schema.
+        infer_dictionary : bool, default False
+            When inferring a schema for partition fields, yield dictionary
+            encoded types instead of plain types. This can be more efficient
+            when materializing virtual columns, and Expressions parsed by the
+            finished Partitioning will include dictionaries of all unique
+            inspected values for each field.
+        schema : Schema, default None
+            Use this schema instead of inferring a schema from partition
+            values. Partition values will be validated against this schema
+            before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+        Returns
+        -------
+        PartitioningFactory
+            To be used in the FileSystemFactoryOptions.
+        """
+
+class DatasetFactory(lib._Weakrefable):
+    """
+    DatasetFactory is used to create a Dataset, inspect the Schema
+    of the fragments contained in it, and declare a partitioning.
+    """
+
+    root_partition: Expression
+    def finish(self, schema: lib.Schema | None = None) -> Dataset:
+        """
+        Create a Dataset using the inspected schema or an explicit schema
+        (if given).
+
+        Parameters
+        ----------
+        schema : Schema, default None
+            The schema to conform the source to.  If None, the inspected
+            schema is used.
+
+        Returns
+        -------
+        Dataset
+        """
+    def inspect(self) -> lib.Schema:
+        """
+        Inspect all data fragments and return a common Schema.
+
+        Returns
+        -------
+        Schema
+        """
+    def inspect_schemas(self) -> list[lib.Schema]: ...
+
+class FileSystemFactoryOptions(lib._Weakrefable):
+    """
+    Influences the discovery of filesystem paths.
+
+    Parameters
+    ----------
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+    partitioning : Partitioning/PartitioningFactory, optional
+       Apply the Partitioning to every discovered Fragment. See Partitioning or
+       PartitioningFactory documentation.
+    exclude_invalid_files : bool, optional (default True)
+        If True, invalid files will be excluded (file format specific check).
+        This will incur IO for each files in a serial and single threaded
+        fashion. Disabling this feature will skip the IO, but unsupported
+        files may be present in the Dataset (resulting in an error at scan
+        time).
+    selector_ignore_prefixes : list, optional
+        When discovering from a Selector (and not from an explicit file list),
+        ignore files and directories matching any of these prefixes.
+        By default this is ['.', '_'].
+    """
+
+    partitioning: Partitioning
+    partitioning_factory: PartitioningFactory
+    partition_base_dir: str
+    exclude_invalid_files: bool
+    selector_ignore_prefixes: list[str]
+
+    def __init__(
+        self,
+        artition_base_dir: str | None = None,
+        partitioning: Partitioning | PartitioningFactory | None = None,
+        exclude_invalid_files: bool = True,
+        selector_ignore_prefixes: list[str] | None = None,
+    ) -> None: ...
+
+class FileSystemDatasetFactory(DatasetFactory):
+    """
+    Create a DatasetFactory from a list of paths with schema inspection.
+
+    Parameters
+    ----------
+    filesystem : pyarrow.fs.FileSystem
+        Filesystem to discover.
+    paths_or_selector : pyarrow.fs.FileSelector or list of path-likes
+        Either a Selector object or a list of path-like objects.
+    format : FileFormat
+        Currently only ParquetFileFormat and IpcFileFormat are supported.
+    options : FileSystemFactoryOptions, optional
+        Various flags influencing the discovery of filesystem paths.
+    """
+
+    def __init__(
+        self,
+        filesystem: SupportedFileSystem,
+        paths_or_selector: FileSelector,
+        format: FileFormat,
+        options: FileSystemFactoryOptions | None = None,
+    ) -> None: ...
+
+class UnionDatasetFactory(DatasetFactory):
+    """
+    Provides a way to inspect/discover a Dataset's expected schema before
+    materialization.
+
+    Parameters
+    ----------
+    factories : list of DatasetFactory
+    """
+    def __init__(self, factories: list[DatasetFactory]) -> None: ...
+
+_RecordBatchT = TypeVar("_RecordBatchT", bound=lib.RecordBatch)
+
+class RecordBatchIterator(lib._Weakrefable, Generic[_RecordBatchT]):
+    """An iterator over a sequence of record batches."""
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> _RecordBatchT: ...
+
+class TaggedRecordBatch(NamedTuple):
+    """
+    A combination of a record batch and the fragment it came from.
+
+    Parameters
+    ----------
+    record_batch : RecordBatch
+        The record batch.
+    fragment : Fragment
+        Fragment of the record batch.
+    """
+
+    record_batch: lib.RecordBatch
+    fragment: Fragment
+
+class TaggedRecordBatchIterator(lib._Weakrefable):
+    """An iterator over a sequence of record batches with fragments."""
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> TaggedRecordBatch: ...
+
+class Scanner(lib._Weakrefable):
+    """A materialized scan operation with context and options bound.
+
+    A scanner is the class that glues the scan tasks, data fragments and data
+    sources together.
+    """
+    @staticmethod
+    def from_dataset(
+        dataset: Dataset,
+        *,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Create Scanner from Dataset,
+
+        Parameters
+        ----------
+        dataset : Dataset
+            Dataset to scan.
+        columns : list[str] or dict[str, Expression], default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+        """
+    @staticmethod
+    def from_fragment(
+        fragment: Fragment,
+        *,
+        schema: lib.Schema | None = None,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Create Scanner from Fragment,
+
+        Parameters
+        ----------
+        fragment : Fragment
+            fragment to scan.
+        schema : Schema, optional
+            The schema of the fragment.
+        columns : list[str] or dict[str, Expression], default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+        """
+    @overload
+    @staticmethod
+    def from_batches(
+        source: Iterator[lib.RecordBatch],
+        *,
+        schema: lib.Schema,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+    @overload
+    @staticmethod
+    def from_batches(
+        source: RecordBatchReader,
+        *,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner: ...
+    @staticmethod
+    def from_batches(*args, **kwargs):
+        """
+        Create a Scanner from an iterator of batches.
+
+        This creates a scanner which can be used only once. It is
+        intended to support writing a dataset (which takes a scanner)
+        from a source which can be read only once (e.g. a
+        RecordBatchReader or generator).
+
+        Parameters
+        ----------
+        source : Iterator or Arrow-compatible stream object
+            The iterator of Batches. This can be a pyarrow RecordBatchReader,
+            any object that implements the Arrow PyCapsule Protocol for
+            streams, or an actual Python iterator of RecordBatches.
+        schema : Schema
+            The schema of the batches (required when passing a Python
+            iterator).
+        columns : list[str] or dict[str, Expression], default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+        """
+    @property
+    def dataset_schema(self) -> lib.Schema:
+        """The schema with which batches will be read from fragments."""
+    @property
+    def projected_schema(self) -> lib.Schema:
+        """
+        The materialized schema of the data, accounting for projections.
+
+        This is the schema of any data returned from the scanner.
+        """
+    def to_batches(self) -> Iterator[lib.RecordBatch]:
+        """
+        Consume a Scanner in record batches.
+
+        Returns
+        -------
+        record_batches : iterator of RecordBatch
+        """
+    def scan_batches(self) -> TaggedRecordBatchIterator:
+        """
+        Consume a Scanner in record batches with corresponding fragments.
+
+        Returns
+        -------
+        record_batches : iterator of TaggedRecordBatch
+        """
+    def to_table(self) -> lib.Table:
+        """
+        Convert a Scanner into a Table.
+
+        Use this convenience utility with care. This will serially materialize
+        the Scan result in memory before creating the Table.
+
+        Returns
+        -------
+        Table
+        """
+    def take(self, indices: Indices) -> lib.Table:
+        """
+        Select rows of data by index.
+
+        Will only consume as many batches of the underlying dataset as
+        needed. Otherwise, this is equivalent to
+        ``to_table().take(indices)``.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            indices of rows to select in the dataset.
+
+        Returns
+        -------
+        Table
+        """
+    def head(self, num_rows: int) -> lib.Table:
+        """
+        Load the first N rows of the dataset.
+
+        Parameters
+        ----------
+        num_rows : int
+            The number of rows to load.
+
+        Returns
+        -------
+        Table
+        """
+    def count_rows(self) -> int:
+        """
+        Count rows matching the scanner filter.
+
+        Returns
+        -------
+        count : int
+        """
+    def to_reader(self) -> RecordBatchReader:
+        """Consume this scanner as a RecordBatchReader.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+
+def get_partition_keys(partition_expression: Expression) -> dict[str, Any]:
+    """
+    Extract partition keys (equality constraints between a field and a scalar)
+    from an expression as a dict mapping the field's name to its value.
+
+    NB: All expressions yielded by a HivePartitioning or DirectoryPartitioning
+    will be conjunctions of equality conditions and are accessible through this
+    function. Other subexpressions will be ignored.
+
+    Parameters
+    ----------
+    partition_expression : pyarrow.dataset.Expression
+
+    Returns
+    -------
+    dict
+
+    Examples
+    --------
+
+    For example, an expression of
+    <pyarrow.dataset.Expression ((part == A:string) and (year == 2016:int32))>
+    is converted to {'part': 'A', 'year': 2016}
+    """
+
+class WrittenFile(lib._Weakrefable):
+    """
+    Metadata information about files written as
+    part of a dataset write operation
+
+    Parameters
+    ----------
+    path : str
+        Path to the file.
+    metadata : pyarrow.parquet.FileMetaData, optional
+        For Parquet files, the Parquet file metadata.
+    size : int
+        The size of the file in bytes.
+    """
+    def __init__(self, path: str, metadata: _parquet.FileMetaData | None, size: int) -> None: ...
+
+def _filesystemdataset_write(
+    data: Scanner,
+    base_dir: StrPath,
+    basename_template: str,
+    filesystem: SupportedFileSystem,
+    partitioning: Partitioning,
+    file_options: FileWriteOptions,
+    max_partitions: int,
+    file_visitor: Callable[[str], None],
+    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"],
+    max_open_files: int,
+    max_rows_per_file: int,
+    min_rows_per_group: int,
+    max_rows_per_group: int,
+    create_dir: bool,
+): ...
+
+class _ScanNodeOptions(ExecNodeOptions):
+    def _set_options(self, dataset: Dataset, scan_options: dict) -> None: ...
+
+class ScanNodeOptions(_ScanNodeOptions):
+    """
+    A Source node which yields batches from a Dataset scan.
+
+    This is the option class for the "scan" node factory.
+
+    This node is capable of applying pushdown projections or filters
+    to the file readers which reduce the amount of data that needs to
+    be read (if supported by the file format). But note that this does not
+    construct associated filter or project nodes to perform the final
+    filtering or projection. Rather, you may supply the same filter
+    expression or projection to the scan node that you also supply
+    to the filter or project node.
+
+    Yielded batches will be augmented with fragment/batch indices when
+    implicit_ordering=True to enable stable ordering for simple ExecPlans.
+
+    Parameters
+    ----------
+    dataset : pyarrow.dataset.Dataset
+        The table which acts as the data source.
+    **kwargs : dict, optional
+        Scan options. See `Scanner.from_dataset` for possible arguments.
+    require_sequenced_output : bool, default False
+        Batches are yielded sequentially, like single-threaded
+    implicit_ordering : bool, default False
+        Preserve implicit ordering of data.
+    """
+
+    def __init__(
+        self, dataset: Dataset, require_sequenced_output: bool = False, **kwargs
+    ) -> None: ...
diff --git a/python/pyarrow/_dataset_orc.pyi b/python/pyarrow/_dataset_orc.pyi
new file mode 100644
index 00000000000..9c4ac04198f
--- /dev/null
+++ b/python/pyarrow/_dataset_orc.pyi
@@ -0,0 +1,6 @@
+from ._dataset import FileFormat
+
+class OrcFileFormat(FileFormat):
+    def equals(self, other: OrcFileFormat) -> bool: ...
+    @property
+    def default_extname(self): ...
diff --git a/python/pyarrow/_dataset_parquet.pyi b/python/pyarrow/_dataset_parquet.pyi
new file mode 100644
index 00000000000..cbcc17235f1
--- /dev/null
+++ b/python/pyarrow/_dataset_parquet.pyi
@@ -0,0 +1,314 @@
+from dataclasses import dataclass
+from typing import IO, Any, Iterable, TypedDict
+
+from _typeshed import StrPath
+
+from ._compute import Expression
+from ._dataset import (
+    DatasetFactory,
+    FileFormat,
+    FileFragment,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+)
+from ._dataset_parquet_encryption import ParquetDecryptionConfig
+from ._fs import SupportedFileSystem
+from ._parquet import FileDecryptionProperties, FileMetaData
+from .lib import CacheOptions, Schema, _Weakrefable
+
+parquet_encryption_enabled: bool
+
+class ParquetFileFormat(FileFormat):
+    """
+    FileFormat for Parquet
+
+    Parameters
+    ----------
+    read_options : ParquetReadOptions
+        Read options for the file.
+    default_fragment_scan_options : ParquetFragmentScanOptions
+        Scan Options for the file.
+    **kwargs : dict
+        Additional options for read option or scan option
+    """
+    def __init__(
+        self,
+        read_options: ParquetReadOptions | None = None,
+        default_fragment_scan_options: ParquetFragmentScanOptions | None = None,
+        **kwargs,
+    ) -> None: ...
+    @property
+    def read_options(self) -> ParquetReadOptions: ...
+    def make_write_options(self) -> ParquetFileWriteOptions: ...  # type: ignore[override]
+    def equals(self, other: ParquetFileFormat) -> bool: ...
+    @property
+    def default_extname(self) -> str: ...
+    def make_fragment(
+        self,
+        file: StrPath | IO,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        row_groups: Iterable[int] | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment:
+        """
+        Make a FileFragment from a given file.
+
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to make a fragment from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+        partition_expression : Expression, optional
+            An expression that is guaranteed true for all rows in the fragment.  Allows
+            fragment to be potentially skipped while scanning with a filter.
+        row_groups : Iterable, optional
+            The indices of the row groups to include
+        file_size : int, optional
+            The size of the file in bytes. Can improve performance with high-latency filesystems
+            when file size needs to be known before reading.
+
+        Returns
+        -------
+        fragment : Fragment
+            The file fragment
+        """
+
+class _NameStats(TypedDict):
+    min: Any
+    max: Any
+
+class RowGroupInfo:
+    """
+    A wrapper class for RowGroup information
+
+    Parameters
+    ----------
+    id : integer
+        The group ID.
+    metadata : FileMetaData
+        The rowgroup metadata.
+    schema : Schema
+        Schema of the rows.
+    """
+
+    id: int
+    metadata: FileMetaData
+    schema: Schema
+
+    def __init__(self, id: int, metadata: FileMetaData, schema: Schema) -> None: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def statistics(self) -> dict[str, _NameStats]: ...
+
+class ParquetFileFragment(FileFragment):
+    """A Fragment representing a parquet file."""
+
+    def ensure_complete_metadata(self) -> None: ...
+    @property
+    def row_groups(self) -> list[RowGroupInfo]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def num_row_groups(self) -> int:
+        """
+        Return the number of row groups viewed by this fragment (not the
+        number of row groups in the origin file).
+        """
+    def split_by_row_group(
+        self, filter: Expression | None = None, schema: Schema | None = None
+    ) -> list[Fragment]:
+        """
+        Split the fragment into multiple fragments.
+
+        Yield a Fragment wrapping each row group in this ParquetFileFragment.
+        Row groups will be excluded whose metadata contradicts the optional
+        filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's physical schema
+
+        Returns
+        -------
+        A list of Fragments
+        """
+    def subset(
+        self,
+        filter: Expression | None = None,
+        schema: Schema | None = None,
+        row_group_ids: list[int] | None = None,
+    ) -> ParquetFileFormat:
+        """
+        Create a subset of the fragment (viewing a subset of the row groups).
+
+        Subset can be specified by either a filter predicate (with optional
+        schema) or by a list of row group IDs. Note that when using a filter,
+        the resulting fragment can be empty (viewing no row groups).
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's physical schema
+        row_group_ids : list of ints
+            The row group IDs to include in the subset. Can only be specified
+            if `filter` is None.
+
+        Returns
+        -------
+        ParquetFileFragment
+        """
+
+class ParquetReadOptions(_Weakrefable):
+    """
+    Parquet format specific options for reading.
+
+    Parameters
+    ----------
+    dictionary_columns : list of string, default None
+        Names of columns which should be dictionary encoded as
+        they are read
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds
+    """
+    def __init__(
+        self, dictionary_columns: list[str] | None, coerce_int96_timestamp_unit: str | None = None
+    ) -> None: ...
+    @property
+    def coerce_int96_timestamp_unit(self) -> str: ...
+    @coerce_int96_timestamp_unit.setter
+    def coerce_int96_timestamp_unit(self, unit: str) -> None: ...
+    def equals(self, other: ParquetReadOptions) -> bool: ...
+
+class ParquetFileWriteOptions(FileWriteOptions):
+    def update(self, **kwargs) -> None: ...
+    def _set_properties(self) -> None: ...
+    def _set_arrow_properties(self) -> None: ...
+    def _set_encryption_config(self) -> None: ...
+
+@dataclass(kw_only=True)
+class ParquetFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for Parquet fragments.
+
+    Parameters
+    ----------
+    use_buffered_stream : bool, default False
+        Read files through buffered input streams rather than loading entire
+        row groups at once. This may be enabled to reduce memory overhead.
+        Disabled by default.
+    buffer_size : int, default 8192
+        Size of buffered stream, if enabled. Default is 8KB.
+    pre_buffer : bool, default True
+        If enabled, pre-buffer the raw Parquet data instead of issuing one
+        read per column chunk. This can improve performance on high-latency
+        filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
+        parallel using a background I/O thread pool.
+        Set to False if you want to prioritize minimal memory usage
+        over maximum speed.
+    cache_options : pyarrow.CacheOptions, default None
+        Cache options used when pre_buffer is enabled. The default values should
+        be good for most use cases. You may want to adjust these for example if
+        you have exceptionally high latency to the file system.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None
+        If not None, use the provided ParquetDecryptionConfig to decrypt the
+        Parquet file.
+    decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None
+        If not None, use the provided FileDecryptionProperties to decrypt encrypted
+        Parquet file.
+    page_checksum_verification : bool, default False
+        If True, verify the page checksum for each page read from the file.
+    """
+
+    use_buffered_stream: bool = False
+    buffer_size: int = 8192
+    pre_buffer: bool = True
+    cache_options: CacheOptions | None = None
+    thrift_string_size_limit: int | None = None
+    thrift_container_size_limit: int | None = None
+    decryption_config: ParquetDecryptionConfig | None = None
+    decryption_properties: FileDecryptionProperties | None = None
+    page_checksum_verification: bool = False
+
+    def equals(self, other: ParquetFragmentScanOptions) -> bool: ...
+
+@dataclass
+class ParquetFactoryOptions(_Weakrefable):
+    """
+    Influences the discovery of parquet dataset.
+
+    Parameters
+    ----------
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+    partitioning : Partitioning, PartitioningFactory, optional
+        The partitioning scheme applied to fragments, see ``Partitioning``.
+    validate_column_chunk_paths : bool, default False
+        Assert that all ColumnChunk paths are consistent. The parquet spec
+        allows for ColumnChunk data to be stored in multiple files, but
+        ParquetDatasetFactory supports only a single file with all ColumnChunk
+        data. If this flag is set construction of a ParquetDatasetFactory will
+        raise an error if ColumnChunk data is not resident in a single file.
+    """
+
+    partition_base_dir: str | None = None
+    partitioning: Partitioning | PartitioningFactory | None = None
+    validate_column_chunk_paths: bool = False
+
+class ParquetDatasetFactory(DatasetFactory):
+    """
+    Create a ParquetDatasetFactory from a Parquet `_metadata` file.
+
+    Parameters
+    ----------
+    metadata_path : str
+        Path to the `_metadata` parquet metadata-only file generated with
+        `pyarrow.parquet.write_metadata`.
+    filesystem : pyarrow.fs.FileSystem
+        Filesystem to read the metadata_path from, and subsequent parquet
+        files.
+    format : ParquetFileFormat
+        Parquet format options.
+    options : ParquetFactoryOptions, optional
+        Various flags influencing the discovery of filesystem paths.
+    """
+    def __init__(
+        self,
+        metadata_path: str,
+        filesystem: SupportedFileSystem,
+        format: FileFormat,
+        options: ParquetFactoryOptions | None = None,
+    ) -> None: ...
diff --git a/python/pyarrow/_dataset_parquet_encryption.pyi b/python/pyarrow/_dataset_parquet_encryption.pyi
new file mode 100644
index 00000000000..7623275b865
--- /dev/null
+++ b/python/pyarrow/_dataset_parquet_encryption.pyi
@@ -0,0 +1,85 @@
+from ._dataset_parquet import ParquetFileWriteOptions, ParquetFragmentScanOptions
+from ._parquet import FileDecryptionProperties
+from ._parquet_encryption import CryptoFactory, EncryptionConfiguration, KmsConnectionConfig
+from .lib import _Weakrefable
+
+class ParquetEncryptionConfig(_Weakrefable):
+    """
+    Core configuration class encapsulating parameters for high-level encryption
+    within the Parquet framework.
+
+    The ParquetEncryptionConfig class serves as a bridge for passing encryption-related
+    parameters to the appropriate components within the Parquet library. It maintains references
+    to objects that define the encryption strategy, Key Management Service (KMS) configuration,
+    and specific encryption configurations for Parquet data.
+
+    Parameters
+    ----------
+    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
+        Shared pointer to a `CryptoFactory` object. The `CryptoFactory` is responsible for
+        creating cryptographic components, such as encryptors and decryptors.
+    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
+        Shared pointer to a `KmsConnectionConfig` object. This object holds the configuration
+        parameters necessary for connecting to a Key Management Service (KMS).
+    encryption_config : pyarrow.parquet.encryption.EncryptionConfiguration
+        Shared pointer to an `EncryptionConfiguration` object. This object defines specific
+        encryption settings for Parquet data, including the keys assigned to different columns.
+
+    Raises
+    ------
+    ValueError
+        Raised if `encryption_config` is None.
+    """
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> None: ...
+
+class ParquetDecryptionConfig(_Weakrefable):
+    """
+    Core configuration class encapsulating parameters for high-level decryption
+    within the Parquet framework.
+
+    ParquetDecryptionConfig is designed to pass decryption-related parameters to
+    the appropriate decryption components within the Parquet library. It holds references to
+    objects that define the decryption strategy, Key Management Service (KMS) configuration,
+    and specific decryption configurations for reading encrypted Parquet data.
+
+    Parameters
+    ----------
+    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
+        Shared pointer to a `CryptoFactory` object, pivotal in creating cryptographic
+        components for the decryption process.
+    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
+        Shared pointer to a `KmsConnectionConfig` object, containing parameters necessary
+        for connecting to a Key Management Service (KMS) during decryption.
+    decryption_config : pyarrow.parquet.encryption.DecryptionConfiguration
+        Shared pointer to a `DecryptionConfiguration` object, specifying decryption settings
+        for reading encrypted Parquet data.
+
+    Raises
+    ------
+    ValueError
+        Raised if `decryption_config` is None.
+    """
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> None: ...
+
+def set_encryption_config(
+    opts: ParquetFileWriteOptions,
+    config: ParquetEncryptionConfig,
+) -> None: ...
+def set_decryption_properties(
+    opts: ParquetFragmentScanOptions,
+    config: FileDecryptionProperties,
+): ...
+def set_decryption_config(
+    opts: ParquetFragmentScanOptions,
+    config: ParquetDecryptionConfig,
+): ...
diff --git a/python/pyarrow/_feather.pyi b/python/pyarrow/_feather.pyi
new file mode 100644
index 00000000000..8bb914ba45d
--- /dev/null
+++ b/python/pyarrow/_feather.pyi
@@ -0,0 +1,29 @@
+from typing import IO
+
+from _typeshed import StrPath
+
+from .lib import Buffer, NativeFile, Table, _Weakrefable
+
+class FeatherError(Exception): ...
+
+def write_feather(
+    table: Table,
+    dest: StrPath | IO | NativeFile,
+    compression: str | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: int = 2,
+): ...
+
+class FeatherReader(_Weakrefable):
+    def __init__(
+        self,
+        source: StrPath | IO | NativeFile | Buffer,
+        use_memory_map: bool,
+        use_threads: bool,
+    ) -> None: ...
+    @property
+    def version(self) -> str: ...
+    def read(self) -> Table: ...
+    def read_indices(self, indices: list[int]) -> Table: ...
+    def read_names(self, names: list[str]) -> Table: ...
diff --git a/python/pyarrow/_flight.pyi b/python/pyarrow/_flight.pyi
new file mode 100644
index 00000000000..4450c42df49
--- /dev/null
+++ b/python/pyarrow/_flight.pyi
@@ -0,0 +1,1380 @@
+import asyncio
+import enum
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Generator, Generic, Iterable, Iterator, NamedTuple, TypeVar
+
+from typing_extensions import deprecated
+
+from .ipc import _ReadPandasMixin
+from .lib import (
+    ArrowCancelled,
+    ArrowException,
+    ArrowInvalid,
+    Buffer,
+    IpcReadOptions,
+    IpcWriteOptions,
+    RecordBatch,
+    RecordBatchReader,
+    Schema,
+    Table,
+    TimestampScalar,
+    _CRecordBatchWriter,
+    _Weakrefable,
+)
+
+_T = TypeVar("_T")
+
+class FlightCallOptions(_Weakrefable):
+    """RPC-layer options for a Flight call."""
+
+    def __init__(
+        self,
+        timeout: float | None = None,
+        write_options: IpcWriteOptions | None = None,
+        headers: list[tuple[str, str]] | None = None,
+        read_options: IpcReadOptions | None = None,
+    ) -> None:
+        """Create call options.
+
+        Parameters
+        ----------
+        timeout : float, None
+            A timeout for the call, in seconds. None means that the
+            timeout defaults to an implementation-specific value.
+        write_options : pyarrow.ipc.IpcWriteOptions, optional
+            IPC write options. The default options can be controlled
+            by environment variables (see pyarrow.ipc).
+        headers : List[Tuple[str, str]], optional
+            A list of arbitrary headers as key, value tuples
+        read_options : pyarrow.ipc.IpcReadOptions, optional
+            Serialization options for reading IPC format.
+        """
+
+class CertKeyPair(NamedTuple):
+    """A TLS certificate and key for use in Flight."""
+
+    cert: str
+    key: str
+
+class FlightError(Exception):
+    """
+    The base class for Flight-specific errors.
+
+    A server may raise this class or one of its subclasses to provide
+    a more detailed error to clients.
+
+    Parameters
+    ----------
+    message : str, optional
+        The error message.
+    extra_info : bytes, optional
+        Extra binary error details that were provided by the
+        server/will be sent to the client.
+
+    Attributes
+    ----------
+    extra_info : bytes
+        Extra binary error details that were provided by the
+        server/will be sent to the client.
+    """
+
+    extra_info: bytes
+
+class FlightInternalError(FlightError, ArrowException):
+    """An error internal to the Flight server occurred."""
+
+class FlightTimedOutError(FlightError, ArrowException):
+    """The Flight RPC call timed out."""
+
+class FlightCancelledError(FlightError, ArrowCancelled):
+    """The operation was cancelled."""
+
+class FlightServerError(FlightError, ArrowException):
+    """A server error occurred."""
+
+class FlightUnauthenticatedError(FlightError, ArrowException):
+    """The client is not authenticated."""
+
+class FlightUnauthorizedError(FlightError, ArrowException):
+    """The client is not authorized to perform the given operation."""
+
+class FlightUnavailableError(FlightError, ArrowException):
+    """The server is not reachable or available."""
+
+class FlightWriteSizeExceededError(ArrowInvalid):
+    """A write operation exceeded the client-configured limit."""
+
+    limit: int
+    actual: int
+
+class Action(_Weakrefable):
+    """An action executable on a Flight service."""
+
+    def __init__(self, action_type: bytes | str, buf: Buffer | bytes) -> None:
+        """Create an action from a type and a buffer.
+
+        Parameters
+        ----------
+        action_type : bytes or str
+        buf : Buffer or bytes-like object
+        """
+    @property
+    def type(self) -> str:
+        """The action type."""
+    @property
+    def body(self) -> Buffer:
+        """The action body (arguments for the action)."""
+    def serialize(self) -> bytes:
+        """Get the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self:
+        """Parse the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+
+class ActionType(NamedTuple):
+    """A type of action that is executable on a Flight service."""
+
+    type: str
+    description: str
+
+    def make_action(self, buf: Buffer | bytes) -> Action:
+        """Create an Action with this type.
+
+        Parameters
+        ----------
+        buf : obj
+            An Arrow buffer or Python bytes or bytes-like object.
+        """
+
+class Result(_Weakrefable):
+    """A result from executing an Action."""
+    def __init__(self, buf: Buffer | bytes) -> None:
+        """Create a new result.
+
+        Parameters
+        ----------
+        buf : Buffer or bytes-like object
+        """
+    @property
+    def body(self) -> Buffer:
+        """Get the Buffer containing the result."""
+    def serialize(self) -> bytes:
+        """Get the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self:
+        """Parse the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+
+class BasicAuth(_Weakrefable):
+    """A container for basic auth."""
+    def __init__(
+        self, username: str | bytes | None = None, password: str | bytes | None = None
+    ) -> None:
+        """Create a new basic auth object.
+
+        Parameters
+        ----------
+        username : string
+        password : string
+        """
+    @property
+    def username(self) -> bytes: ...
+    @property
+    def password(self) -> bytes: ...
+    def serialize(self) -> str: ...
+    @staticmethod
+    def deserialize(serialized: str | bytes) -> BasicAuth: ...
+
+class DescriptorType(enum.Enum):
+    """
+    The type of a FlightDescriptor.
+
+    Attributes
+    ----------
+
+    UNKNOWN
+        An unknown descriptor type.
+
+    PATH
+        A Flight stream represented by a path.
+
+    CMD
+        A Flight stream represented by an application-defined command.
+
+    """
+
+    UNKNOWN = 0
+    PATH = 1
+    CMD = 2
+
+class FlightMethod(enum.Enum):
+    """The implemented methods in Flight."""
+
+    INVALID = 0
+    HANDSHAKE = 1
+    LIST_FLIGHTS = 2
+    GET_FLIGHT_INFO = 3
+    GET_SCHEMA = 4
+    DO_GET = 5
+    DO_PUT = 6
+    DO_ACTION = 7
+    LIST_ACTIONS = 8
+    DO_EXCHANGE = 9
+
+class FlightDescriptor(_Weakrefable):
+    """A description of a data stream available from a Flight service."""
+    @staticmethod
+    def for_path(*path: str | bytes) -> FlightDescriptor:
+        """Create a FlightDescriptor for a resource path."""
+
+    @staticmethod
+    def for_command(command: str | bytes) -> FlightDescriptor:
+        """Create a FlightDescriptor for an opaque command."""
+    @property
+    def descriptor_type(self) -> DescriptorType:
+        """Get the type of this descriptor."""
+    @property
+    def path(self) -> list[bytes] | None:
+        """Get the path for this descriptor."""
+    @property
+    def command(self) -> bytes | None:
+        """Get the command for this descriptor."""
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class Ticket(_Weakrefable):
+    """A ticket for requesting a Flight stream."""
+    def __init__(self, ticket: str | bytes) -> None: ...
+    @property
+    def ticket(self) -> bytes: ...
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class Location(_Weakrefable):
+    """The location of a Flight service."""
+    def __init__(self, uri: str | bytes) -> None: ...
+    @property
+    def uri(self) -> bytes: ...
+    def equals(self, other: Location) -> bool: ...
+    @staticmethod
+    def for_grpc_tcp(host: str | bytes, port: int) -> Location:
+        """Create a Location for a TCP-based gRPC service."""
+    @staticmethod
+    def for_grpc_tls(host: str | bytes, port: int) -> Location:
+        """Create a Location for a TLS-based gRPC service."""
+    @staticmethod
+    def for_grpc_unix(path: str | bytes) -> Location:
+        """Create a Location for a domain socket-based gRPC service."""
+
+class FlightEndpoint(_Weakrefable):
+    """A Flight stream, along with the ticket and locations to access it."""
+    def __init__(
+        self,
+        ticket: Ticket | str | bytes,
+        locations: list[str | Location],
+        expiration_time: TimestampScalar | None = ...,
+        app_metadata: bytes | str = ...,
+    ):
+        """Create a FlightEndpoint from a ticket and list of locations.
+
+        Parameters
+        ----------
+        ticket : Ticket or bytes
+            the ticket needed to access this flight
+        locations : list of string URIs
+            locations where this flight is available
+        expiration_time : TimestampScalar, default None
+            Expiration time of this stream. If present, clients may assume
+            they can retry DoGet requests. Otherwise, clients should avoid
+            retrying DoGet requests.
+        app_metadata : bytes or str, default ""
+            Application-defined opaque metadata.
+
+        Raises
+        ------
+        ArrowException
+            If one of the location URIs is not a valid URI.
+        """
+    @property
+    def ticket(self) -> Ticket:
+        """Get the ticket in this endpoint."""
+    @property
+    def locations(self) -> list[Location]:
+        """Get locations where this flight is available."""
+    def serialize(self) -> bytes: ...
+    @property
+    def expiration_time(self) -> TimestampScalar | None:
+        """Get the expiration time of this stream.
+
+        If present, clients may assume they can retry DoGet requests.
+        Otherwise, clients should avoid retrying DoGet requests.
+
+        """
+    @property
+    def app_metadata(self) -> bytes | str:
+        """Get application-defined opaque metadata."""
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class SchemaResult(_Weakrefable):
+    """The serialized schema returned from a GetSchema request."""
+    def __init__(self, schema: Schema) -> None:
+        """Create a SchemaResult from a schema.
+
+        Parameters
+        ----------
+        schema: Schema
+            the schema of the data in this flight.
+        """
+    @property
+    def schema(self) -> Schema:
+        """The schema of the data in this flight."""
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class FlightInfo(_Weakrefable):
+    """A description of a Flight stream."""
+    def __init__(
+        self,
+        schema: Schema,
+        descriptor: FlightDescriptor,
+        endpoints: list[FlightEndpoint],
+        total_records: int = ...,
+        total_bytes: int = ...,
+        ordered: bool = ...,
+        app_metadata: bytes | str = ...,
+    ) -> None:
+        """Create a FlightInfo object from a schema, descriptor, and endpoints.
+
+        Parameters
+        ----------
+        schema : Schema
+            the schema of the data in this flight.
+        descriptor : FlightDescriptor
+            the descriptor for this flight.
+        endpoints : list of FlightEndpoint
+            a list of endpoints where this flight is available.
+        total_records : int, default None
+            the total records in this flight, -1 or None if unknown.
+        total_bytes : int, default None
+            the total bytes in this flight, -1 or None if unknown.
+        ordered : boolean, default False
+            Whether endpoints are in the same order as the data.
+        app_metadata : bytes or str, default ""
+            Application-defined opaque metadata.
+        """
+    @property
+    def schema(self) -> Schema:
+        """The schema of the data in this flight."""
+    @property
+    def descriptor(self) -> FlightDescriptor:
+        """The descriptor of the data in this flight."""
+    @property
+    def endpoints(self) -> list[FlightEndpoint]:
+        """The endpoints where this flight is available."""
+    @property
+    def total_records(self) -> int:
+        """The total record count of this flight, or -1 if unknown."""
+    @property
+    def total_bytes(self) -> int:
+        """The size in bytes of the data in this flight, or -1 if unknown."""
+    @property
+    def ordered(self) -> bool:
+        """Whether endpoints are in the same order as the data."""
+    @property
+    def app_metadata(self) -> bytes | str:
+        """
+        Application-defined opaque metadata.
+
+        There is no inherent or required relationship between this and the
+        app_metadata fields in the FlightEndpoints or resulting FlightData
+        messages. Since this metadata is application-defined, a given
+        application could define there to be a relationship, but there is
+        none required by the spec.
+
+        """
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class FlightStreamChunk(_Weakrefable):
+    """A RecordBatch with application metadata on the side."""
+    @property
+    def data(self) -> RecordBatch | None: ...
+    @property
+    def app_metadata(self) -> Buffer | None: ...
+    def __iter__(self): ...
+
+class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
+    """A reader for Flight streams."""
+
+    # Needs to be separate class so the "real" class can subclass the
+    # pure-Python mixin class
+
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> FlightStreamChunk: ...
+    @property
+    def schema(self) -> Schema:
+        """Get the schema for this reader."""
+    def read_all(self) -> Table:
+        """Read the entire contents of the stream as a Table."""
+    def read_chunk(self) -> FlightStreamChunk:
+        """Read the next FlightStreamChunk along with any metadata.
+
+        Returns
+        -------
+        chunk : FlightStreamChunk
+            The next FlightStreamChunk in the stream.
+
+        Raises
+        ------
+        StopIteration
+            when the stream is finished
+        """
+    def to_reader(self) -> RecordBatchReader:
+        """Convert this reader into a regular RecordBatchReader.
+
+        This may fail if the schema cannot be read from the remote end.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+
+class MetadataRecordBatchReader(_MetadataRecordBatchReader):
+    """The base class for readers for Flight streams.
+
+    See Also
+    --------
+    FlightStreamReader
+    """
+
+class FlightStreamReader(MetadataRecordBatchReader):
+    """A reader that can also be canceled."""
+    def cancel(self) -> None:
+        """Cancel the read operation."""
+    def read_all(self) -> Table:
+        """Read the entire contents of the stream as a Table."""
+
+class MetadataRecordBatchWriter(_CRecordBatchWriter):
+    """A RecordBatchWriter that also allows writing application metadata.
+
+    This class is a context manager; on exit, close() will be called.
+    """
+
+    def begin(self, schema: Schema, options: IpcWriteOptions | None = None) -> None:
+        """Prepare to write data to this stream with the given schema."""
+    def write_metadata(self, buf: Buffer) -> None:
+        """Write Flight metadata by itself."""
+    def write_batch(self, batch: RecordBatch) -> None:  # type: ignore[override]
+        """
+        Write RecordBatch to stream.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+        """
+    def write_table(self, table: Table, max_chunksize: int | None = None, **kwargs) -> None:
+        """
+        Write Table to stream in (contiguous) RecordBatch objects.
+
+        Parameters
+        ----------
+        table : Table
+        max_chunksize : int, default None
+            Maximum number of rows for RecordBatch chunks. Individual chunks may
+            be smaller depending on the chunk layout of individual columns.
+        """
+    def close(self) -> None:
+        """
+        Close stream and write end-of-stream 0 marker.
+        """
+    def write_with_metadata(self, batch: RecordBatch, buf: Buffer) -> None:
+        """Write a RecordBatch along with Flight metadata.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+            The next RecordBatch in the stream.
+        buf : Buffer
+            Application-specific metadata for the batch as defined by
+            Flight.
+        """
+
+class FlightStreamWriter(MetadataRecordBatchWriter):
+    """A writer that also allows closing the write side of a stream."""
+    def done_writing(self) -> None:
+        """Indicate that the client is done writing, but not done reading."""
+
+class FlightMetadataReader(_Weakrefable):
+    """A reader for Flight metadata messages sent during a DoPut."""
+    def read(self) -> Buffer | None:
+        """Read the next metadata message."""
+
+class FlightMetadataWriter(_Weakrefable):
+    """A sender for Flight metadata messages during a DoPut."""
+    def write(self, message: Buffer) -> None:
+        """Write the next metadata message.
+
+        Parameters
+        ----------
+        message : Buffer
+        """
+
+class AsyncioCall(Generic[_T]):
+    """State for an async RPC using asyncio."""
+
+    _future: asyncio.Future[_T]
+
+    def as_awaitable(self) -> asyncio.Future[_T]: ...
+    def wakeup(self, result_or_exception: BaseException | _T) -> None: ...
+
+class AsyncioFlightClient:
+    """
+    A FlightClient with an asyncio-based async interface.
+
+    This interface is EXPERIMENTAL.
+    """
+
+    def __init__(self, client: FlightClient) -> None: ...
+    async def get_flight_info(
+        self,
+        descriptor: FlightDescriptor,
+        *,
+        options: FlightCallOptions | None = None,
+    ): ...
+
+class FlightClient(_Weakrefable):
+    """A client to a Flight service.
+
+    Connect to a Flight service on the given host and port.
+
+    Parameters
+    ----------
+    location : str, tuple or Location
+        Location to connect to. Either a gRPC URI like `grpc://localhost:port`,
+        a tuple of (host, port) pair, or a Location instance.
+    tls_root_certs : bytes or None
+        PEM-encoded
+    cert_chain: bytes or None
+        Client certificate if using mutual TLS
+    private_key: bytes or None
+        Client private key for cert_chain is using mutual TLS
+    override_hostname : str or None
+        Override the hostname checked by TLS. Insecure, use with caution.
+    middleware : list optional, default None
+        A list of ClientMiddlewareFactory instances.
+    write_size_limit_bytes : int optional, default None
+        A soft limit on the size of a data payload sent to the
+        server. Enabled if positive. If enabled, writing a record
+        batch that (when serialized) exceeds this limit will raise an
+        exception; the client can retry the write with a smaller
+        batch.
+    disable_server_verification : boolean optional, default False
+        A flag that indicates that, if the client is connecting
+        with TLS, that it skips server verification. If this is
+        enabled, all other TLS settings are overridden.
+    generic_options : list optional, default None
+        A list of generic (string, int or string) option tuples passed
+        to the underlying transport. Effect is implementation
+        dependent.
+    """
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location,
+        *,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        middleware: list[ClientMiddlewareFactory] | None = None,
+        write_size_limit_bytes: int | None = None,
+        disable_server_verification: bool = False,
+        generic_options: list[tuple[str, int | str]] | None = None,
+    ): ...
+    @property
+    def supports_async(self) -> bool: ...
+    def as_async(self) -> AsyncioFlightClient: ...
+    def wait_for_available(self, timeout: int = 5) -> None:
+        """Block until the server can be contacted.
+
+        Parameters
+        ----------
+        timeout : int, default 5
+            The maximum seconds to wait.
+        """
+    @deprecated(
+        "Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead."
+    )
+    @classmethod
+    def connect(
+        cls,
+        location: str | tuple[str, int] | Location,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        disable_server_verification: bool = False,
+    ) -> FlightClient:
+        """Connect to a Flight server.
+
+        .. deprecated:: 0.15.0
+            Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead.
+        """
+    def authenticate(
+        self, auth_handler: ClientAuthHandler, options: FlightCallOptions | None = None
+    ) -> None:
+        """Authenticate to the server.
+
+        Parameters
+        ----------
+        auth_handler : ClientAuthHandler
+            The authentication mechanism to use.
+        options : FlightCallOptions
+            Options for this call.
+        """
+    def authenticate_basic_token(
+        self, username: str, password: str, options: FlightCallOptions | None = None
+    ) -> tuple[str, str]:
+        """Authenticate to the server with HTTP basic authentication.
+
+        Parameters
+        ----------
+        username : string
+            Username to authenticate with
+        password : string
+            Password to authenticate with
+        options  : FlightCallOptions
+            Options for this call
+
+        Returns
+        -------
+        tuple : Tuple[str, str]
+            A tuple representing the FlightCallOptions authorization
+            header entry of a bearer token.
+        """
+    def list_actions(self, options: FlightCallOptions | None = None) -> list[Action]:
+        """List the actions available on a service."""
+    def do_action(
+        self, action: Action, options: FlightCallOptions | None = None
+    ) -> Iterator[Result]:
+        """
+        Execute an action on a service.
+
+        Parameters
+        ----------
+        action : str, tuple, or Action
+            Can be action type name (no body), type and body, or any Action
+            object
+        options : FlightCallOptions
+            RPC options
+
+        Returns
+        -------
+        results : iterator of Result values
+        """
+    def list_flights(
+        self, criteria: str | None = None, options: FlightCallOptions | None = None
+    ) -> Generator[FlightInfo, None, None]:
+        """List the flights available on a service."""
+    def get_flight_info(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> FlightInfo:
+        """Request information about an available flight."""
+    def get_schema(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> Schema:
+        """Request schema for an available flight."""
+    def do_get(
+        self, ticket: Ticket, options: FlightCallOptions | None = None
+    ) -> FlightStreamReader:
+        """Request the data for a flight.
+
+        Returns
+        -------
+        reader : FlightStreamReader
+        """
+    def do_put(
+        self,
+        descriptor: FlightDescriptor,
+        schema: Schema,
+        options: FlightCallOptions | None = None,
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
+        """Upload data to a flight.
+
+        Returns
+        -------
+        writer : FlightStreamWriter
+        reader : FlightMetadataReader
+        """
+    def do_exchange(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
+        """Start a bidirectional data exchange with a server.
+
+        Parameters
+        ----------
+        descriptor : FlightDescriptor
+            A descriptor for the flight.
+        options : FlightCallOptions
+            RPC options.
+
+        Returns
+        -------
+        writer : FlightStreamWriter
+        reader : FlightStreamReader
+        """
+    def close(self) -> None:
+        """Close the client and disconnect."""
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, traceback) -> None: ...
+
+class FlightDataStream(_Weakrefable):
+    """
+    Abstract base class for Flight data streams.
+
+    See Also
+    --------
+    RecordBatchStream
+    GeneratorStream
+    """
+
+class RecordBatchStream(FlightDataStream):
+    """A Flight data stream backed by RecordBatches.
+
+    The remainder of this DoGet request will be handled in C++,
+    without having to acquire the GIL.
+
+    """
+    def __init__(
+        self, data_source: RecordBatchReader | Table, options: IpcWriteOptions | None = None
+    ) -> None:
+        """Create a RecordBatchStream from a data source.
+
+        Parameters
+        ----------
+        data_source : RecordBatchReader or Table
+            The data to stream to the client.
+        options : pyarrow.ipc.IpcWriteOptions, optional
+            Optional IPC options to control how to write the data.
+        """
+
+class GeneratorStream(FlightDataStream):
+    """A Flight data stream backed by a Python generator."""
+    def __init__(
+        self,
+        schema: Schema,
+        generator: Iterable[FlightDataStream | Table | RecordBatch | RecordBatchReader],
+        options: IpcWriteOptions | None = None,
+    ) -> None:
+        """Create a GeneratorStream from a Python generator.
+
+        Parameters
+        ----------
+        schema : Schema
+            The schema for the data to be returned.
+
+        generator : iterator or iterable
+            The generator should yield other FlightDataStream objects,
+            Tables, RecordBatches, or RecordBatchReaders.
+
+        options : pyarrow.ipc.IpcWriteOptions, optional
+        """
+
+class ServerCallContext(_Weakrefable):
+    """Per-call state/context."""
+    def peer_identity(self) -> bytes:
+        """Get the identity of the authenticated peer.
+
+        May be the empty string.
+        """
+    def peer(self) -> str:
+        """Get the address of the peer."""
+        # Set safe=True as gRPC on Windows sometimes gives garbage bytes
+    def is_cancelled(self) -> bool:
+        """Check if the current RPC call has been canceled by the client."""
+    def add_header(self, key: str, value: str) -> None:
+        """Add a response header."""
+    def add_trailer(self, key: str, value: str) -> None:
+        """Add a response trailer."""
+    def get_middleware(self, key: str) -> ServerMiddleware | None:
+        """
+        Get a middleware instance by key.
+
+        Returns None if the middleware was not found.
+        """
+
+class ServerAuthReader(_Weakrefable):
+    """A reader for messages from the client during an auth handshake."""
+    def read(self) -> str: ...
+
+class ServerAuthSender(_Weakrefable):
+    """A writer for messages to the client during an auth handshake."""
+    def write(self, message: str) -> None: ...
+
+class ClientAuthReader(_Weakrefable):
+    """A reader for messages from the server during an auth handshake."""
+    def read(self) -> str: ...
+
+class ClientAuthSender(_Weakrefable):
+    """A writer for messages to the server during an auth handshake."""
+    def write(self, message: str) -> None: ...
+
+class ServerAuthHandler(_Weakrefable):
+    """Authentication middleware for a server.
+
+    To implement an authentication mechanism, subclass this class and
+    override its methods.
+
+    """
+    def authenticate(self, outgoing: ServerAuthSender, incoming: ServerAuthReader):
+        """Conduct the handshake with the client.
+
+        May raise an error if the client cannot authenticate.
+
+        Parameters
+        ----------
+        outgoing : ServerAuthSender
+            A channel to send messages to the client.
+        incoming : ServerAuthReader
+            A channel to read messages from the client.
+        """
+    def is_valid(self, token: str) -> bool:
+        """Validate a client token, returning their identity.
+
+        May return an empty string (if the auth mechanism does not
+        name the peer) or raise an exception (if the token is
+        invalid).
+
+        Parameters
+        ----------
+        token : bytes
+            The authentication token from the client.
+
+        """
+
+class ClientAuthHandler(_Weakrefable):
+    """Authentication plugin for a client."""
+    def authenticate(self, outgoing: ClientAuthSender, incoming: ClientAuthReader):
+        """Conduct the handshake with the server.
+
+        Parameters
+        ----------
+        outgoing : ClientAuthSender
+            A channel to send messages to the server.
+        incoming : ClientAuthReader
+            A channel to read messages from the server.
+        """
+    def get_token(self) -> str:
+        """Get the auth token for a call."""
+
+class CallInfo(NamedTuple):
+    """Information about a particular RPC for Flight middleware."""
+
+    method: FlightMethod
+
+class ClientMiddlewareFactory(_Weakrefable):
+    """A factory for new middleware instances.
+
+    All middleware methods will be called from the same thread as the
+    RPC method implementation. That is, thread-locals set in the
+    client are accessible from the middleware itself.
+
+    """
+    def start_call(self, info: CallInfo) -> ClientMiddleware | None:
+        """Called at the start of an RPC.
+
+        This must be thread-safe and must not raise exceptions.
+
+        Parameters
+        ----------
+        info : CallInfo
+            Information about the call.
+
+        Returns
+        -------
+        instance : ClientMiddleware
+            An instance of ClientMiddleware (the instance to use for
+            the call), or None if this call is not intercepted.
+
+        """
+
+class ClientMiddleware(_Weakrefable):
+    """Client-side middleware for a call, instantiated per RPC.
+
+    Methods here should be fast and must be infallible: they should
+    not raise exceptions or stall indefinitely.
+
+    """
+
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
+        """A callback before headers are sent.
+
+        Returns
+        -------
+        headers : dict
+            A dictionary of header values to add to the request, or
+            None if no headers are to be added. The dictionary should
+            have string keys and string or list-of-string values.
+
+            Bytes values are allowed, but the underlying transport may
+            not support them or may restrict them. For gRPC, binary
+            values are only allowed on headers ending in "-bin".
+
+            Header names must be lowercase ASCII.
+
+        """
+
+    def received_headers(self, headers: dict[str, list[str] | list[bytes]]):
+        """A callback when headers are received.
+
+        The default implementation does nothing.
+
+        Parameters
+        ----------
+        headers : dict
+            A dictionary of headers from the server. Keys are strings
+            and values are lists of strings (for text headers) or
+            bytes (for binary headers).
+
+        """
+
+    def call_completed(self, exception: ArrowException):
+        """A callback when the call finishes.
+
+        The default implementation does nothing.
+
+        Parameters
+        ----------
+        exception : ArrowException
+            If the call errored, this is the equivalent
+            exception. Will be None if the call succeeded.
+
+        """
+
+class ServerMiddlewareFactory(_Weakrefable):
+    """A factory for new middleware instances.
+
+    All middleware methods will be called from the same thread as the
+    RPC method implementation. That is, thread-locals set in the
+    middleware are accessible from the method itself.
+
+    """
+
+    def start_call(
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> ServerMiddleware | None:
+        """Called at the start of an RPC.
+
+        This must be thread-safe.
+
+        Parameters
+        ----------
+        info : CallInfo
+            Information about the call.
+        headers : dict
+            A dictionary of headers from the client. Keys are strings
+            and values are lists of strings (for text headers) or
+            bytes (for binary headers).
+
+        Returns
+        -------
+        instance : ServerMiddleware
+            An instance of ServerMiddleware (the instance to use for
+            the call), or None if this call is not intercepted.
+
+        Raises
+        ------
+        exception : pyarrow.ArrowException
+            If an exception is raised, the call will be rejected with
+            the given error.
+
+        """
+
+class TracingServerMiddlewareFactory(ServerMiddlewareFactory):
+    """A factory for tracing middleware instances.
+
+    This enables OpenTelemetry support in Arrow (if Arrow was compiled
+    with OpenTelemetry support enabled). A new span will be started on
+    each RPC call. The TracingServerMiddleware instance can then be
+    retrieved within an RPC handler to get the propagated context,
+    which can be used to start a new span on the Python side.
+
+    Because the Python/C++ OpenTelemetry libraries do not
+    interoperate, spans on the C++ side are not directly visible to
+    the Python side and vice versa.
+
+    """
+
+class ServerMiddleware(_Weakrefable):
+    """Server-side middleware for a call, instantiated per RPC.
+
+    Methods here should be fast and must be infallible: they should
+    not raise exceptions or stall indefinitely.
+
+    """
+
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
+        """A callback before headers are sent.
+
+        Returns
+        -------
+        headers : dict
+            A dictionary of header values to add to the response, or
+            None if no headers are to be added. The dictionary should
+            have string keys and string or list-of-string values.
+
+            Bytes values are allowed, but the underlying transport may
+            not support them or may restrict them. For gRPC, binary
+            values are only allowed on headers ending in "-bin".
+
+            Header names must be lowercase ASCII.
+
+        """
+    def call_completed(self, exception: ArrowException):
+        """A callback when the call finishes.
+
+        Parameters
+        ----------
+        exception : pyarrow.ArrowException
+            If the call errored, this is the equivalent
+            exception. Will be None if the call succeeded.
+
+        """
+
+class TracingServerMiddleware(ServerMiddleware):
+    trace_context: dict
+    def __init__(self, trace_context: dict) -> None: ...
+
+class _ServerMiddlewareFactoryWrapper(ServerMiddlewareFactory):
+    """Wrapper to bundle server middleware into a single C++ one."""
+
+    def __init__(self, factories: dict[str, ServerMiddlewareFactory]) -> None: ...
+    def start_call(  # type: ignore[override]
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> _ServerMiddlewareFactoryWrapper | None: ...
+
+class _ServerMiddlewareWrapper(ServerMiddleware):
+    def __init__(self, middleware: dict[str, ServerMiddleware]) -> None: ...
+    def send_headers(self) -> dict[str, dict[str, list[str] | list[bytes]]]: ...
+    def call_completed(self, exception: ArrowException) -> None: ...
+
+class _FlightServerFinalizer(_Weakrefable):
+    """
+    A finalizer that shuts down the server on destruction.
+
+    See ARROW-16597. If the server is still active at interpreter
+    exit, the process may segfault.
+    """
+
+    def finalize(self) -> None: ...
+
+class FlightServerBase(_Weakrefable):
+    """A Flight service definition.
+
+    To start the server, create an instance of this class with an
+    appropriate location. The server will be running as soon as the
+    instance is created; it is not required to call :meth:`serve`.
+
+    Override methods to define your Flight service.
+
+    Parameters
+    ----------
+    location : str, tuple or Location optional, default None
+        Location to serve on. Either a gRPC URI like `grpc://localhost:port`,
+        a tuple of (host, port) pair, or a Location instance.
+        If None is passed then the server will be started on localhost with a
+        system provided random port.
+    auth_handler : ServerAuthHandler optional, default None
+        An authentication mechanism to use. May be None.
+    tls_certificates : list optional, default None
+        A list of (certificate, key) pairs.
+    verify_client : boolean optional, default False
+        If True, then enable mutual TLS: require the client to present
+        a client certificate, and validate the certificate.
+    root_certificates : bytes optional, default None
+        If enabling mutual TLS, this specifies the PEM-encoded root
+        certificate used to validate client certificates.
+    middleware : dict optional, default None
+        A dictionary of :class:`ServerMiddlewareFactory` instances. The
+        string keys can be used to retrieve the middleware instance within
+        RPC handlers (see :meth:`ServerCallContext.get_middleware`).
+
+    """
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location | None = None,
+        auth_handler: ServerAuthHandler | None = None,
+        tls_certificates: list[tuple[str, str]] | None = None,
+        verify_client: bool = False,
+        root_certificates: str | None = None,
+        middleware: dict[str, ServerMiddlewareFactory] | None = None,
+    ): ...
+    @property
+    def port(self) -> int:
+        """
+        Get the port that this server is listening on.
+
+        Returns a non-positive value if the operation is invalid
+        (e.g. init() was not called or server is listening on a domain
+        socket).
+        """
+    def list_flights(self, context: ServerCallContext, criteria: str) -> Iterator[FlightInfo]:
+        """List flights available on this service.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        criteria : bytes
+            Filter criteria provided by the client.
+
+        Returns
+        -------
+        iterator of FlightInfo
+
+        """
+    def get_flight_info(
+        self, context: ServerCallContext, descriptor: FlightDescriptor
+    ) -> FlightInfo:
+        """Get information about a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+
+        Returns
+        -------
+        FlightInfo
+
+        """
+    def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor) -> Schema:
+        """Get the schema of a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+
+        Returns
+        -------
+        Schema
+
+        """
+    def do_put(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: FlightMetadataWriter,
+    ) -> None:
+        """Write data to a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+        reader : MetadataRecordBatchReader
+            A reader for data uploaded by the client.
+        writer : FlightMetadataWriter
+            A writer to send responses to the client.
+
+        """
+    def do_get(self, context: ServerCallContext, ticket: Ticket) -> FlightDataStream:
+        """Write data to a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        ticket : Ticket
+            The ticket for the flight.
+
+        Returns
+        -------
+        FlightDataStream
+            A stream of data to send back to the client.
+
+        """
+    def do_exchange(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: MetadataRecordBatchWriter,
+    ) -> None:
+        """Write data to a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+        reader : MetadataRecordBatchReader
+            A reader for data uploaded by the client.
+        writer : MetadataRecordBatchWriter
+            A writer to send responses to the client.
+
+        """
+    def list_actions(self, context: ServerCallContext) -> Iterable[Action]:
+        """List custom actions available on this server.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+
+        Returns
+        -------
+        iterator of ActionType or tuple
+
+        """
+    def do_action(self, context: ServerCallContext, action: Action) -> Iterable[bytes]:
+        """Execute a custom action.
+
+        This method should return an iterator, or it should be a
+        generator. Applications should override this method to
+        implement their own behavior. The default method raises a
+        NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        action : Action
+            The action to execute.
+
+        Returns
+        -------
+        iterator of bytes
+
+        """
+    def serve(self) -> None:
+        """Block until the server shuts down.
+
+        This method only returns if shutdown() is called or a signal is
+        received.
+        """
+    def run(self) -> None:
+        """Block until the server shuts down.
+
+        .. deprecated:: 0.15.0
+            Use the ``FlightServer.serve`` method instead
+        """
+    def shutdown(self) -> None:
+        """Shut down the server, blocking until current requests finish.
+
+        Do not call this directly from the implementation of a Flight
+        method, as then the server will block forever waiting for that
+        request to finish. Instead, call this method from a background
+        thread.
+
+        This method should only be called once.
+        """
+    def wait(self) -> None:
+        """Block until server is terminated with shutdown."""
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, traceback): ...
+
+def connect(
+    location: str | tuple[str, int] | Location,
+    *,
+    tls_root_certs: str | None = None,
+    cert_chain: str | None = None,
+    private_key: str | None = None,
+    override_hostname: str | None = None,
+    middleware: list[ClientMiddlewareFactory] | None = None,
+    write_size_limit_bytes: int | None = None,
+    disable_server_verification: bool = False,
+    generic_options: list[tuple[str, int | str]] | None = None,
+) -> FlightClient:
+    """
+    Connect to a Flight server.
+
+    Parameters
+    ----------
+    location : str, tuple, or Location
+        Location to connect to. Either a URI like "grpc://localhost:port",
+        a tuple of (host, port), or a Location instance.
+    tls_root_certs : bytes or None
+        PEM-encoded.
+    cert_chain: str or None
+        If provided, enables TLS mutual authentication.
+    private_key: str or None
+        If provided, enables TLS mutual authentication.
+    override_hostname : str or None
+        Override the hostname checked by TLS. Insecure, use with caution.
+    middleware : list or None
+        A list of ClientMiddlewareFactory instances to apply.
+    write_size_limit_bytes : int or None
+        A soft limit on the size of a data payload sent to the
+        server. Enabled if positive. If enabled, writing a record
+        batch that (when serialized) exceeds this limit will raise an
+        exception; the client can retry the write with a smaller
+        batch.
+    disable_server_verification : boolean or None
+        Disable verifying the server when using TLS.
+        Insecure, use with caution.
+    generic_options : list or None
+        A list of generic (string, int or string) options to pass to
+        the underlying transport.
+
+    Returns
+    -------
+    client : FlightClient
+    """
diff --git a/python/pyarrow/_fs.pyi b/python/pyarrow/_fs.pyi
new file mode 100644
index 00000000000..7670ef5230d
--- /dev/null
+++ b/python/pyarrow/_fs.pyi
@@ -0,0 +1,1005 @@
+import datetime as dt
+import enum
+import sys
+
+from abc import ABC, abstractmethod
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from typing import Union, overload
+
+from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
+
+from .lib import NativeFile, _Weakrefable
+
+SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
+
+class FileType(enum.IntFlag):
+    NotFound = enum.auto()
+    Unknown = enum.auto()
+    File = enum.auto()
+    Directory = enum.auto()
+
+class FileInfo(_Weakrefable):
+    """
+    FileSystem entry info.
+
+    Parameters
+    ----------
+    path : str
+        The full path to the filesystem entry.
+    type : FileType
+        The type of the filesystem entry.
+    mtime : datetime or float, default None
+        If given, the modification time of the filesystem entry.
+        If a float is given, it is the number of seconds since the
+        Unix epoch.
+    mtime_ns : int, default None
+        If given, the modification time of the filesystem entry,
+        in nanoseconds since the Unix epoch.
+        `mtime` and `mtime_ns` are mutually exclusive.
+    size : int, default None
+        If given, the filesystem entry size in bytes.  This should only
+        be given if `type` is `FileType.File`.
+
+    Examples
+    --------
+    Generate a file:
+
+    >>> from pyarrow import fs
+    >>> local = fs.LocalFileSystem()
+    >>> path_fs = local_path + "/pyarrow-fs-example.dat"
+    >>> with local.open_output_stream(path_fs) as stream:
+    ...     stream.write(b"data")
+    4
+
+    Get FileInfo object using ``get_file_info()``:
+
+    >>> file_info = local.get_file_info(path_fs)
+    >>> file_info
+    <FileInfo for '.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+
+    Inspect FileInfo attributes:
+
+    >>> file_info.type
+    <FileType.File: 2>
+
+    >>> file_info.is_file
+    True
+
+    >>> file_info.path
+    '/.../pyarrow-fs-example.dat'
+
+    >>> file_info.base_name
+    'pyarrow-fs-example.dat'
+
+    >>> file_info.size
+    4
+
+    >>> file_info.extension
+    'dat'
+
+    >>> file_info.mtime  # doctest: +SKIP
+    datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
+
+    >>> file_info.mtime_ns  # doctest: +SKIP
+    1656489370873922073
+    """
+
+    def __init__(
+        self,
+        path: str,
+        type: FileType = FileType.Unknown,
+        *,
+        mtime: dt.datetime | float | None = None,
+        mtime_ns: int | None = None,
+        size: int | None = None,
+    ): ...
+    @property
+    def type(self) -> FileType:
+        """
+        Type of the file.
+
+        The returned enum values can be the following:
+
+        - FileType.NotFound: target does not exist
+        - FileType.Unknown: target exists but its type is unknown (could be a
+          special file such as a Unix socket or character device, or
+          Windows NUL / CON / ...)
+        - FileType.File: target is a regular file
+        - FileType.Directory: target is a regular directory
+
+        Returns
+        -------
+        type : FileType
+        """
+    @property
+    def is_file(self) -> bool: ...
+    @property
+    def path(self) -> str:
+        """
+        The full file path in the filesystem.
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.path
+        '/.../pyarrow-fs-example.dat'
+        """
+    @property
+    def base_name(self) -> str:
+        """
+        The file base name.
+
+        Component after the last directory separator.
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.base_name
+        'pyarrow-fs-example.dat'
+        """
+    @property
+    def size(self) -> int:
+        """
+        The size in bytes, if available.
+
+        Only regular files are guaranteed to have a size.
+
+        Returns
+        -------
+        size : int or None
+        """
+    @property
+    def extension(self) -> str:
+        """
+        The file extension.
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.extension
+        'dat'
+        """
+    @property
+    def mtime(self) -> dt.datetime | None:
+        """
+        The time of last modification, if available.
+
+        Returns
+        -------
+        mtime : datetime.datetime or None
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.mtime  # doctest: +SKIP
+        datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
+        """
+    @property
+    def mtime_ns(self) -> int | None:
+        """
+        The time of last modification, if available, expressed in nanoseconds
+        since the Unix epoch.
+
+        Returns
+        -------
+        mtime_ns : int or None
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.mtime_ns  # doctest: +SKIP
+        1656489370873922073
+        """
+
+class FileSelector(_Weakrefable):
+    """
+    File and directory selector.
+
+    It contains a set of options that describes how to search for files and
+    directories.
+
+    Parameters
+    ----------
+    base_dir : str
+        The directory in which to select files. Relative paths also work, use
+        '.' for the current directory and '..' for the parent.
+    allow_not_found : bool, default False
+        The behavior if `base_dir` doesn't exist in the filesystem.
+        If false, an error is returned.
+        If true, an empty selection is returned.
+    recursive : bool, default False
+        Whether to recurse into subdirectories.
+
+    Examples
+    --------
+    List the contents of a directory and subdirectories:
+
+    >>> selector_1 = fs.FileSelector(local_path, recursive=True)
+    >>> local.get_file_info(selector_1)  # doctest: +SKIP
+    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
+    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>,
+    <FileInfo for 'tmp/alphabet/subdir/example_copy.dat': type=FileType.File, size=4>]
+
+    List only the contents of the base directory:
+
+    >>> selector_2 = fs.FileSelector(local_path)
+    >>> local.get_file_info(selector_2)  # doctest: +SKIP
+    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
+    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>]
+
+    Return empty selection if the directory doesn't exist:
+
+    >>> selector_not_found = fs.FileSelector(
+    ...     local_path + "/missing", recursive=True, allow_not_found=True
+    ... )
+    >>> local.get_file_info(selector_not_found)
+    []
+    """
+
+    base_dir: str
+    allow_not_found: bool
+    recursive: bool
+    def __init__(self, base_dir: str, allow_not_found: bool = False, recursive: bool = False): ...
+
+class FileSystem(_Weakrefable):
+    """
+    Abstract file system API.
+    """
+
+    @classmethod
+    def from_uri(cls, uri: str) -> tuple[Self, str]:
+        """
+        Create a new FileSystem from URI or Path.
+
+        Recognized URI schemes are "file", "mock", "s3fs", "gs", "gcs", "hdfs" and "viewfs".
+        In addition, the argument can be a pathlib.Path object, or a string
+        describing an absolute local path.
+
+        Parameters
+        ----------
+        uri : string
+            URI-based path, for example: file:///some/local/path.
+
+        Returns
+        -------
+        tuple of (FileSystem, str path)
+            With (filesystem, path) tuple where path is the abstract path
+            inside the FileSystem instance.
+
+        Examples
+        --------
+        Create a new FileSystem subclass from a URI:
+
+        >>> uri = "file:///{}/pyarrow-fs-example.dat".format(local_path)
+        >>> local_new, path_new = fs.FileSystem.from_uri(uri)
+        >>> local_new
+        <pyarrow._fs.LocalFileSystem object at ...
+        >>> path_new
+        '/.../pyarrow-fs-example.dat'
+
+        Or from a s3 bucket:
+
+        >>> fs.FileSystem.from_uri("s3://usgs-landsat/collection02/")
+        (<pyarrow._s3fs.S3FileSystem object at ...>, 'usgs-landsat/collection02')
+        """
+    def equals(self, other: FileSystem) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.fs.FileSystem
+
+        Returns
+        -------
+        bool
+        """
+    @property
+    def type_name(self) -> str:
+        """
+        The filesystem's type name.
+        """
+    @overload
+    def get_file_info(self, paths_or_selector: str) -> FileInfo: ...
+    @overload
+    def get_file_info(self, paths_or_selector: FileSelector | list[str]) -> list[FileInfo]: ...
+    def get_file_info(self, paths_or_selector):
+        """
+        Get info for the given files.
+
+        Any symlink is automatically dereferenced, recursively. A non-existing
+        or unreachable file returns a FileStat object and has a FileType of
+        value NotFound. An exception indicates a truly exceptional condition
+        (low-level I/O error, etc.).
+
+        Parameters
+        ----------
+        paths_or_selector : FileSelector, path-like or list of path-likes
+            Either a selector object, a path-like object or a list of
+            path-like objects. The selector's base directory will not be
+            part of the results, even if it exists. If it doesn't exist,
+            use `allow_not_found`.
+
+        Returns
+        -------
+        FileInfo or list of FileInfo
+            Single FileInfo object is returned for a single path, otherwise
+            a list of FileInfo objects is returned.
+
+        Examples
+        --------
+        >>> local
+        <pyarrow._fs.LocalFileSystem object at ...>
+        >>> local.get_file_info("/{}/pyarrow-fs-example.dat".format(local_path))
+        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+        """
+    def create_dir(self, path: str, *, recursive: bool = True) -> None:
+        """
+        Create a directory and subdirectories.
+
+        This function succeeds if the directory already exists.
+
+        Parameters
+        ----------
+        path : str
+            The path of the new directory.
+        recursive : bool, default True
+            Create nested directories as well.
+        """
+    def delete_dir(self, path: str) -> None:
+        """
+        Delete a directory and its contents, recursively.
+
+        Parameters
+        ----------
+        path : str
+            The path of the directory to be deleted.
+        """
+    def delete_dir_contents(
+        self, path: str, *, accept_root_dir: bool = False, missing_dir_ok: bool = False
+    ) -> None:
+        """
+        Delete a directory's contents, recursively.
+
+        Like delete_dir, but doesn't delete the directory itself.
+
+        Parameters
+        ----------
+        path : str
+            The path of the directory to be deleted.
+        accept_root_dir : boolean, default False
+            Allow deleting the root directory's contents
+            (if path is empty or "/")
+        missing_dir_ok : boolean, default False
+            If False then an error is raised if path does
+            not exist
+        """
+    def move(self, src: str, dest: str) -> None:
+        """
+        Move / rename a file or directory.
+
+        If the destination exists:
+        - if it is a non-empty directory, an error is returned
+        - otherwise, if it has the same type as the source, it is replaced
+        - otherwise, behavior is unspecified (implementation-dependent).
+
+        Parameters
+        ----------
+        src : str
+            The path of the file or the directory to be moved.
+        dest : str
+            The destination path where the file or directory is moved to.
+
+        Examples
+        --------
+        Create a new folder with a file:
+
+        >>> local.create_dir("/tmp/other_dir")
+        >>> local.copy_file(path, "/tmp/move_example.dat")
+
+        Move the file:
+
+        >>> local.move("/tmp/move_example.dat", "/tmp/other_dir/move_example_2.dat")
+
+        Inspect the file info:
+
+        >>> local.get_file_info("/tmp/other_dir/move_example_2.dat")
+        <FileInfo for '/tmp/other_dir/move_example_2.dat': type=FileType.File, size=4>
+        >>> local.get_file_info("/tmp/move_example.dat")
+        <FileInfo for '/tmp/move_example.dat': type=FileType.NotFound>
+
+        Delete the folder:
+        >>> local.delete_dir("/tmp/other_dir")
+        """
+    def copy_file(self, src: str, dest: str) -> None:
+        """
+        Copy a file.
+
+        If the destination exists and is a directory, an error is returned.
+        Otherwise, it is replaced.
+
+        Parameters
+        ----------
+        src : str
+            The path of the file to be copied from.
+        dest : str
+            The destination path where the file is copied to.
+
+        Examples
+        --------
+        >>> local.copy_file(path, local_path + "/pyarrow-fs-example_copy.dat")
+
+        Inspect the file info:
+
+        >>> local.get_file_info(local_path + "/pyarrow-fs-example_copy.dat")
+        <FileInfo for '/.../pyarrow-fs-example_copy.dat': type=FileType.File, size=4>
+        >>> local.get_file_info(path)
+        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+        """
+    def delete_file(self, path: str) -> None:
+        """
+        Delete a file.
+
+        Parameters
+        ----------
+        path : str
+            The path of the file to be deleted.
+        """
+    def open_input_file(self, path: str) -> NativeFile:
+        """
+        Open an input file for random access reading.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for reading.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        Print the data from the file with `open_input_file()`:
+
+        >>> with local.open_input_file(path) as f:
+        ...     print(f.readall())
+        b'data'
+        """
+    def open_input_stream(
+        self, path: str, compression: str | None = "detect", buffer_size: int | None = None
+    ) -> NativeFile:
+        """
+        Open an input stream for sequential reading.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for reading.
+        compression : str optional, default 'detect'
+            The compression algorithm to use for on-the-fly decompression.
+            If "detect" and source is a file path, then compression will be
+            chosen based on the file extension.
+            If None, no compression will be applied. Otherwise, a well-known
+            algorithm name must be supplied (e.g. "gzip").
+        buffer_size : int optional, default None
+            If None or 0, no buffering will happen. Otherwise the size of the
+            temporary read buffer.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        Print the data from the file with `open_input_stream()`:
+
+        >>> with local.open_input_stream(path) as f:
+        ...     print(f.readall())
+        b'data'
+        """
+    def open_output_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ) -> NativeFile:
+        """
+        Open an output stream for sequential writing.
+
+        If the target already exists, existing data is truncated.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for writing.
+        compression : str optional, default 'detect'
+            The compression algorithm to use for on-the-fly compression.
+            If "detect" and source is a file path, then compression will be
+            chosen based on the file extension.
+            If None, no compression will be applied. Otherwise, a well-known
+            algorithm name must be supplied (e.g. "gzip").
+        buffer_size : int optional, default None
+            If None or 0, no buffering will happen. Otherwise the size of the
+            temporary write buffer.
+        metadata : dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        >>> local = fs.LocalFileSystem()
+        >>> with local.open_output_stream(path) as stream:
+        ...     stream.write(b"data")
+        4
+        """
+    def open_append_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ):
+        """
+        Open an output stream for appending.
+
+        If the target doesn't exist, a new empty file is created.
+
+        .. note::
+            Some filesystem implementations do not support efficient
+            appending to an existing file, in which case this method will
+            raise NotImplementedError.
+            Consider writing to multiple files (using e.g. the dataset layer)
+            instead.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for writing.
+        compression : str optional, default 'detect'
+            The compression algorithm to use for on-the-fly compression.
+            If "detect" and source is a file path, then compression will be
+            chosen based on the file extension.
+            If None, no compression will be applied. Otherwise, a well-known
+            algorithm name must be supplied (e.g. "gzip").
+        buffer_size : int optional, default None
+            If None or 0, no buffering will happen. Otherwise the size of the
+            temporary write buffer.
+        metadata : dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        Append new data to a FileSystem subclass with nonempty file:
+
+        >>> with local.open_append_stream(path) as f:
+        ...     f.write(b"+newly added")
+        12
+
+        Print out the content to the file:
+
+        >>> with local.open_input_file(path) as f:
+        ...     print(f.readall())
+        b'data+newly added'
+        """
+    def normalize_path(self, path: str) -> str:
+        """
+        Normalize filesystem path.
+
+        Parameters
+        ----------
+        path : str
+            The path to normalize
+
+        Returns
+        -------
+        normalized_path : str
+            The normalized path
+        """
+
+class LocalFileSystem(FileSystem):
+    """
+    A FileSystem implementation accessing files on the local machine.
+
+    Details such as symlinks are abstracted away (symlinks are always followed,
+    except when deleting an entry).
+
+    Parameters
+    ----------
+    use_mmap : bool, default False
+        Whether open_input_stream and open_input_file should return
+        a mmap'ed file or a regular file.
+
+    Examples
+    --------
+    Create a FileSystem object with LocalFileSystem constructor:
+
+    >>> from pyarrow import fs
+    >>> local = fs.LocalFileSystem()
+    >>> local
+    <pyarrow._fs.LocalFileSystem object at ...>
+
+    and write data on to the file:
+
+    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
+    ...     stream.write(b"data")
+    4
+    >>> with local.open_input_stream("/tmp/local_fs.dat") as stream:
+    ...     print(stream.readall())
+    b'data'
+
+    Create a FileSystem object inferred from a URI of the saved file:
+
+    >>> local_new, path = fs.LocalFileSystem().from_uri("/tmp/local_fs.dat")
+    >>> local_new
+    <pyarrow._fs.LocalFileSystem object at ...
+    >>> path
+    '/tmp/local_fs.dat'
+
+    Check if FileSystems `local` and `local_new` are equal:
+
+    >>> local.equals(local_new)
+    True
+
+    Compare two different FileSystems:
+
+    >>> local2 = fs.LocalFileSystem(use_mmap=True)
+    >>> local.equals(local2)
+    False
+
+    Copy a file and print out the data:
+
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/local_fs-copy.dat")
+    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as stream:
+    ...     print(stream.readall())
+    b'data'
+
+    Open an output stream for appending, add text and print the new data:
+
+    >>> with local.open_append_stream("/tmp/local_fs-copy.dat") as f:
+    ...     f.write(b"+newly added")
+    12
+
+    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as f:
+    ...     print(f.readall())
+    b'data+newly added'
+
+    Create a directory, copy a file into it and then delete the whole directory:
+
+    >>> local.create_dir("/tmp/new_folder")
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
+    >>> local.delete_dir("/tmp/new_folder")
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.NotFound>
+
+    Create a directory, copy a file into it and then delete
+    the content of the directory:
+
+    >>> local.create_dir("/tmp/new_folder")
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
+    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
+    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.File, size=4>
+    >>> local.delete_dir_contents("/tmp/new_folder")
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
+    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
+    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
+
+    Create a directory, copy a file into it and then delete
+    the file from the directory:
+
+    >>> local.create_dir("/tmp/new_folder")
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
+    >>> local.delete_file("/tmp/new_folder/local_fs.dat")
+    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
+    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
+
+    Move the file:
+
+    >>> local.move("/tmp/local_fs-copy.dat", "/tmp/new_folder/local_fs-copy.dat")
+    >>> local.get_file_info("/tmp/new_folder/local_fs-copy.dat")
+    <FileInfo for '/tmp/new_folder/local_fs-copy.dat': type=FileType.File, size=16>
+    >>> local.get_file_info("/tmp/local_fs-copy.dat")
+    <FileInfo for '/tmp/local_fs-copy.dat': type=FileType.NotFound>
+
+    To finish delete the file left:
+    >>> local.delete_file("/tmp/local_fs.dat")
+    """
+
+    def __init__(self, *, use_mmap: bool = False) -> None: ...
+
+class SubTreeFileSystem(FileSystem):
+    """
+    Delegates to another implementation after prepending a fixed base path.
+
+    This is useful to expose a logical view of a subtree of a filesystem,
+    for example a directory in a LocalFileSystem.
+
+    Note, that this makes no security guarantee. For example, symlinks may
+    allow to "escape" the subtree and access other parts of the underlying
+    filesystem.
+
+    Parameters
+    ----------
+    base_path : str
+        The root of the subtree.
+    base_fs : FileSystem
+        FileSystem object the operations delegated to.
+
+    Examples
+    --------
+    Create a LocalFileSystem instance:
+
+    >>> from pyarrow import fs
+    >>> local = fs.LocalFileSystem()
+    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
+    ...     stream.write(b"data")
+    4
+
+    Create a directory and a SubTreeFileSystem instance:
+
+    >>> local.create_dir("/tmp/sub_tree")
+    >>> subtree = fs.SubTreeFileSystem("/tmp/sub_tree", local)
+
+    Write data into the existing file:
+
+    >>> with subtree.open_append_stream("sub_tree_fs.dat") as f:
+    ...     f.write(b"+newly added")
+    12
+
+    Print out the attributes:
+
+    >>> subtree.base_fs
+    <pyarrow._fs.LocalFileSystem object at ...>
+    >>> subtree.base_path
+    '/tmp/sub_tree/'
+
+    Get info for the given directory or given file:
+
+    >>> subtree.get_file_info("")
+    <FileInfo for '': type=FileType.Directory>
+    >>> subtree.get_file_info("sub_tree_fs.dat")
+    <FileInfo for 'sub_tree_fs.dat': type=FileType.File, size=12>
+
+    Delete the file and directory:
+
+    >>> subtree.delete_file("sub_tree_fs.dat")
+    >>> local.delete_dir("/tmp/sub_tree")
+    >>> local.delete_file("/tmp/local_fs.dat")
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+    def __init__(self, base_path: str, base_fs: FileSystem): ...
+    @property
+    def base_path(self) -> str: ...
+    @property
+    def base_fs(self) -> FileSystem: ...
+
+class _MockFileSystem(FileSystem):
+    def __init__(self, current_time: dt.datetime | None = None) -> None: ...
+
+class PyFileSystem(FileSystem):
+    """
+    A FileSystem with behavior implemented in Python.
+
+    Parameters
+    ----------
+    handler : FileSystemHandler
+        The handler object implementing custom filesystem behavior.
+
+    Examples
+    --------
+    Create an fsspec-based filesystem object for GitHub:
+
+    >>> from fsspec.implementations import github
+    >>> gfs = github.GithubFileSystem("apache", "arrow")  # doctest: +SKIP
+
+    Get a PyArrow FileSystem object:
+
+    >>> from pyarrow.fs import PyFileSystem, FSSpecHandler
+    >>> pa_fs = PyFileSystem(FSSpecHandler(gfs))  # doctest: +SKIP
+
+    Use :func:`~pyarrow.fs.FileSystem` functionality ``get_file_info()``:
+
+    >>> pa_fs.get_file_info("README.md")  # doctest: +SKIP
+    <FileInfo for 'README.md': type=FileType.File, size=...>
+    """
+    def __init__(self, handler: FileSystemHandler) -> None: ...
+    @property
+    def handler(self) -> FileSystemHandler:
+        """
+        The filesystem's underlying handler.
+
+        Returns
+        -------
+        handler : FileSystemHandler
+        """
+
+class FileSystemHandler(ABC):
+    """
+    An abstract class exposing methods to implement PyFileSystem's behavior.
+    """
+    @abstractmethod
+    def get_type_name(self) -> str:
+        """
+        Implement PyFileSystem.type_name.
+        """
+    @abstractmethod
+    def get_file_info(self, paths: str | list[str]) -> FileInfo | list[FileInfo]:
+        """
+        Implement PyFileSystem.get_file_info(paths).
+
+        Parameters
+        ----------
+        paths : list of str
+            paths for which we want to retrieve the info.
+        """
+    @abstractmethod
+    def get_file_info_selector(self, selector: FileSelector) -> list[FileInfo]:
+        """
+        Implement PyFileSystem.get_file_info(selector).
+
+        Parameters
+        ----------
+        selector : FileSelector
+            selector for which we want to retrieve the info.
+        """
+
+    @abstractmethod
+    def create_dir(self, path: str, recursive: bool) -> None:
+        """
+        Implement PyFileSystem.create_dir(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the directory.
+        recursive : bool
+            if the parent directories should be created too.
+        """
+    @abstractmethod
+    def delete_dir(self, path: str) -> None:
+        """
+        Implement PyFileSystem.delete_dir(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the directory.
+        """
+    @abstractmethod
+    def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None:
+        """
+        Implement PyFileSystem.delete_dir_contents(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the directory.
+        missing_dir_ok : bool
+            if False an error should be raised if path does not exist
+        """
+    @abstractmethod
+    def delete_root_dir_contents(self) -> None:
+        """
+        Implement PyFileSystem.delete_dir_contents("/", accept_root_dir=True).
+        """
+    @abstractmethod
+    def delete_file(self, path: str) -> None:
+        """
+        Implement PyFileSystem.delete_file(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the file.
+        """
+    @abstractmethod
+    def move(self, src: str, dest: str) -> None:
+        """
+        Implement PyFileSystem.move(...).
+
+        Parameters
+        ----------
+        src : str
+            path of what should be moved.
+        dest : str
+            path of where it should be moved to.
+        """
+
+    @abstractmethod
+    def copy_file(self, src: str, dest: str) -> None:
+        """
+        Implement PyFileSystem.copy_file(...).
+
+        Parameters
+        ----------
+        src : str
+            path of what should be copied.
+        dest : str
+            path of where it should be copied to.
+        """
+    @abstractmethod
+    def open_input_stream(self, path: str) -> NativeFile:
+        """
+        Implement PyFileSystem.open_input_stream(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        """
+    @abstractmethod
+    def open_input_file(self, path: str) -> NativeFile:
+        """
+        Implement PyFileSystem.open_input_file(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        """
+    @abstractmethod
+    def open_output_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
+        """
+        Implement PyFileSystem.open_output_stream(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        metadata :  mapping
+            Mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+        """
+
+    @abstractmethod
+    def open_append_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
+        """
+        Implement PyFileSystem.open_append_stream(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        metadata :  mapping
+            Mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+        """
+    @abstractmethod
+    def normalize_path(self, path: str) -> str:
+        """
+        Implement PyFileSystem.normalize_path(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be normalized.
+        """
diff --git a/python/pyarrow/_gcsfs.pyi b/python/pyarrow/_gcsfs.pyi
new file mode 100644
index 00000000000..4fc7ea68e48
--- /dev/null
+++ b/python/pyarrow/_gcsfs.pyi
@@ -0,0 +1,83 @@
+import datetime as dt
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+class GcsFileSystem(FileSystem):
+    """
+    Google Cloud Storage (GCS) backed FileSystem implementation
+
+    By default uses the process described in https://google.aip.dev/auth/4110
+    to resolve credentials. If not running on Google Cloud Platform (GCP),
+    this generally requires the environment variable
+    GOOGLE_APPLICATION_CREDENTIALS to point to a JSON file
+    containing credentials.
+
+    Note: GCS buckets are special and the operations available on them may be
+    limited or more expensive than expected compared to local file systems.
+
+    Note: When pickling a GcsFileSystem that uses default credentials, resolution
+    credentials are not stored in the serialized data. Therefore, when unpickling
+    it is assumed that the necessary credentials are in place for the target
+    process.
+
+    Parameters
+    ----------
+    anonymous : boolean, default False
+        Whether to connect anonymously.
+        If true, will not attempt to look up credentials using standard GCP
+        configuration methods.
+    access_token : str, default None
+        GCP access token.  If provided, temporary credentials will be fetched by
+        assuming this role; also, a `credential_token_expiration` must be
+        specified as well.
+    target_service_account : str, default None
+        An optional service account to try to impersonate when accessing GCS. This
+        requires the specified credential user or service account to have the necessary
+        permissions.
+    credential_token_expiration : datetime, default None
+        Expiration for credential generated with an access token. Must be specified
+        if `access_token` is specified.
+    default_bucket_location : str, default 'US'
+        GCP region to create buckets in.
+    scheme : str, default 'https'
+        GCS connection transport scheme.
+    endpoint_override : str, default None
+        Override endpoint with a connect string such as "localhost:9000"
+    default_metadata : mapping or pyarrow.KeyValueMetadata, default None
+        Default metadata for `open_output_stream`.  This will be ignored if
+        non-empty metadata is passed to `open_output_stream`.
+    retry_time_limit : timedelta, default None
+        Set the maximum amount of time the GCS client will attempt to retry
+        transient errors. Subsecond granularity is ignored.
+    project_id : str, default None
+        The GCP project identifier to use for creating buckets.
+        If not set, the library uses the GOOGLE_CLOUD_PROJECT environment
+        variable. Most I/O operations do not need a project id, only applications
+        that create new buckets need a project id.
+    """
+
+    def __init__(
+        self,
+        *,
+        anonymous: bool = False,
+        access_token: str | None = None,
+        target_service_account: str | None = None,
+        credential_token_expiration: dt.datetime | None = None,
+        default_bucket_location: str = "US",
+        scheme: str = "https",
+        endpoint_override: str | None = None,
+        default_metadata: dict | KeyValueMetadata | None = None,
+        retry_time_limit: dt.timedelta | None = None,
+        project_id: str | None = None,
+    ): ...
+    @property
+    def default_bucket_location(self) -> str:
+        """
+        The GCP location this filesystem will write to.
+        """
+    @property
+    def project_id(self) -> str:
+        """
+        The GCP project id this filesystem will use.
+        """
diff --git a/python/pyarrow/_hdfs.pyi b/python/pyarrow/_hdfs.pyi
new file mode 100644
index 00000000000..200f669379b
--- /dev/null
+++ b/python/pyarrow/_hdfs.pyi
@@ -0,0 +1,75 @@
+from _typeshed import StrPath
+
+from ._fs import FileSystem
+
+class HadoopFileSystem(FileSystem):
+    """
+    HDFS backed FileSystem implementation
+
+    Parameters
+    ----------
+    host : str
+        HDFS host to connect to. Set to "default" for fs.defaultFS from
+        core-site.xml.
+    port : int, default 8020
+        HDFS port to connect to. Set to 0 for default or logical (HA) nodes.
+    user : str, default None
+        Username when connecting to HDFS; None implies login user.
+    replication : int, default 3
+        Number of copies each block will have.
+    buffer_size : int, default 0
+        If 0, no buffering will happen otherwise the size of the temporary read
+        and write buffer.
+    default_block_size : int, default None
+        None means the default configuration for HDFS, a typical block size is
+        128 MB.
+    kerb_ticket : string or path, default None
+        If not None, the path to the Kerberos ticket cache.
+    extra_conf : dict, default None
+        Extra key/value pairs for configuration; will override any
+        hdfs-site.xml properties.
+
+    Examples
+    --------
+    >>> from pyarrow import fs
+    >>> hdfs = fs.HadoopFileSystem(
+    ...     host, port, user=user, kerb_ticket=ticket_cache_path
+    ... )  # doctest: +SKIP
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+    def __init__(
+        self,
+        host: str,
+        port: int = 8020,
+        *,
+        user: str | None = None,
+        replication: int = 3,
+        buffer_size: int = 0,
+        default_block_size: int | None = None,
+        kerb_ticket: StrPath | None = None,
+        extra_conf: dict | None = None,
+    ): ...
+    @staticmethod
+    def from_uri(uri: str) -> HadoopFileSystem:  # type: ignore[override]
+        """
+        Instantiate HadoopFileSystem object from an URI string.
+
+        The following two calls are equivalent
+
+        * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
+&replication=1')``
+        * ``HadoopFileSystem('localhost', port=8020, user='test', \
+replication=1)``
+
+        Parameters
+        ----------
+        uri : str
+            A string URI describing the connection to HDFS.
+            In order to change the user, replication, buffer_size or
+            default_block_size pass the values as query parts.
+
+        Returns
+        -------
+        HadoopFileSystem
+        """
diff --git a/python/pyarrow/_json.pyi b/python/pyarrow/_json.pyi
new file mode 100644
index 00000000000..43d2ae83cd8
--- /dev/null
+++ b/python/pyarrow/_json.pyi
@@ -0,0 +1,169 @@
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from .lib import MemoryPool, RecordBatchReader, Schema, Table, _Weakrefable
+
+class ReadOptions(_Weakrefable):
+    """
+    Options for reading JSON files.
+
+    Parameters
+    ----------
+    use_threads : bool, optional (default True)
+        Whether to use multiple threads to accelerate reading
+    block_size : int, optional
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual chunks in the Table.
+    """
+
+    use_threads: bool
+    """
+    Whether to use multiple threads to accelerate reading.
+    """
+    block_size: int
+    """
+    How much bytes to process at a time from the input stream.
+
+    This will determine multi-threading granularity as well as the size of
+    individual chunks in the Table.
+    """
+    def __init__(self, use_threads: bool | None = None, block_size: int | None = None): ...
+    def equals(self, other: ReadOptions) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ReadOptions
+
+        Returns
+        -------
+        bool
+        """
+
+class ParseOptions(_Weakrefable):
+    """
+    Options for parsing JSON files.
+
+    Parameters
+    ----------
+    explicit_schema : Schema, optional (default None)
+        Optional explicit schema (no type inference, ignores other fields).
+    newlines_in_values : bool, optional (default False)
+        Whether objects may be printed across multiple lines (for example
+        pretty printed). If false, input must end with an empty line.
+    unexpected_field_behavior : str, default "infer"
+        How JSON fields outside of explicit_schema (if given) are treated.
+
+        Possible behaviors:
+
+         - "ignore": unexpected JSON fields are ignored
+         - "error": error out on unexpected JSON fields
+         - "infer": unexpected JSON fields are type-inferred and included in
+           the output
+    """
+
+    explicit_schema: Schema
+    """
+    Optional explicit schema (no type inference, ignores other fields)
+    """
+    newlines_in_values: bool
+    """
+    Whether newline characters are allowed in JSON values.
+    Setting this to True reduces the performance of multi-threaded
+    JSON reading.
+    """
+    unexpected_field_behavior: Literal["ignore", "error", "infer"]
+    """
+    How JSON fields outside of explicit_schema (if given) are treated.
+
+    Possible behaviors:
+
+        - "ignore": unexpected JSON fields are ignored
+        - "error": error out on unexpected JSON fields
+        - "infer": unexpected JSON fields are type-inferred and included in
+        the output
+
+    Set to "infer" by default.
+    """
+    def __init__(
+        self,
+        explicit_schema: Schema | None = None,
+        newlines_in_values: bool | None = None,
+        unexpected_field_behavior: Literal["ignore", "error", "infer"] = "infer",
+    ): ...
+    def equals(self, other: ParseOptions) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ParseOptions
+
+        Returns
+        -------
+        bool
+        """
+
+class JSONStreamingReader(RecordBatchReader):
+    """An object that reads record batches incrementally from a JSON file.
+
+    Should not be instantiated directly by user code.
+    """
+
+def read_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Table:
+    """
+    Read a Table from a stream of JSON data.
+
+    Parameters
+    ----------
+    input_file : str, path or file-like object
+        The location of JSON data. Currently only the line-delimited JSON
+        format is supported.
+    read_options : pyarrow.json.ReadOptions, optional
+        Options for the JSON reader (see ReadOptions constructor for defaults).
+    parse_options : pyarrow.json.ParseOptions, optional
+        Options for the JSON parser
+        (see ParseOptions constructor for defaults).
+    memory_pool : MemoryPool, optional
+        Pool to allocate Table memory from.
+
+    Returns
+    -------
+    :class:`pyarrow.Table`
+        Contents of the JSON file as a in-memory table.
+    """
+
+def open_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> JSONStreamingReader:
+    """
+    Open a streaming reader of JSON data.
+
+    Reading using this function is always single-threaded.
+
+    Parameters
+    ----------
+    input_file : string, path or file-like object
+        The location of JSON data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.json.ReadOptions, optional
+        Options for the JSON reader (see pyarrow.json.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.json.ParseOptions, optional
+        Options for the JSON parser
+        (see pyarrow.json.ParseOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate RecordBatch memory from
+
+    Returns
+    -------
+    :class:`pyarrow.json.JSONStreamingReader`
+    """
diff --git a/python/pyarrow/_orc.pyi b/python/pyarrow/_orc.pyi
new file mode 100644
index 00000000000..71bf0dde9ba
--- /dev/null
+++ b/python/pyarrow/_orc.pyi
@@ -0,0 +1,56 @@
+from typing import IO, Literal
+
+from .lib import (
+    Buffer,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+class ORCReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def open(self, source: str | NativeFile | Buffer, use_memory_map: bool = True): ...
+    def metadata(self) -> KeyValueMetadata: ...
+    def schema(self) -> Schema: ...
+    def nrows(self) -> int: ...
+    def nstripes(self) -> int: ...
+    def file_version(self) -> str: ...
+    def software_version(self) -> str: ...
+    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+    def compression_size(self) -> int: ...
+    def row_index_stride(self) -> int: ...
+    def writer(self) -> str: ...
+    def writer_version(self) -> str: ...
+    def nstripe_statistics(self) -> int: ...
+    def content_length(self) -> int: ...
+    def stripe_statistics_length(self) -> int: ...
+    def file_footer_length(self) -> int: ...
+    def file_postscript_length(self) -> int: ...
+    def file_length(self) -> int: ...
+    def serialized_file_tail(self) -> int: ...
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch: ...
+    def read(self, columns: list[str] | None = None) -> Table: ...
+
+class ORCWriter(_Weakrefable):
+    def open(
+        self,
+        where: str | NativeFile | IO,
+        *,
+        file_version: str | None = None,
+        batch_size: int | None = None,
+        stripe_size: int | None = None,
+        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] | None = None,
+        compression_block_size: int | None = None,
+        compression_strategy: Literal["COMPRESSION", "SPEED"] | None = None,
+        row_index_stride: int | None = None,
+        padding_tolerance: float | None = None,
+        dictionary_key_size_threshold: float | None = None,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float | None = None,
+    ) -> None: ...
+    def write(self, table: Table) -> None: ...
+    def close(self) -> None: ...
diff --git a/python/pyarrow/_parquet.pyi b/python/pyarrow/_parquet.pyi
new file mode 100644
index 00000000000..a9187df0428
--- /dev/null
+++ b/python/pyarrow/_parquet.pyi
@@ -0,0 +1,445 @@
+from typing import IO, Any, Iterable, Iterator, Literal, Sequence, TypeAlias, TypedDict
+
+from _typeshed import StrPath
+
+from ._stubs_typing import Order
+from .lib import (
+    Buffer,
+    ChunkedArray,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+_PhysicalType: TypeAlias = Literal[
+    "BOOLEAN",
+    "INT32",
+    "INT64",
+    "INT96",
+    "FLOAT",
+    "DOUBLE",
+    "BYTE_ARRAY",
+    "FIXED_LEN_BYTE_ARRAY",
+    "UNKNOWN",
+]
+_LogicTypeName: TypeAlias = Literal[
+    "UNDEFINED",
+    "STRING",
+    "MAP",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME",
+    "TIMESTAMP",
+    "INT",
+    "FLOAT16",
+    "JSON",
+    "BSON",
+    "UUID",
+    "NONE",
+    "UNKNOWN",
+]
+_ConvertedType: TypeAlias = Literal[
+    "NONE",
+    "UTF8",
+    "MAP",
+    "MAP_KEY_VALUE",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME_MILLIS",
+    "TIME_MICROS",
+    "TIMESTAMP_MILLIS",
+    "TIMESTAMP_MICROS",
+    "UINT_8",
+    "UINT_16",
+    "UINT_32",
+    "UINT_64",
+    "INT_8",
+    "INT_16",
+    "INT_32",
+    "INT_64",
+    "JSON",
+    "BSON",
+    "INTERVAL",
+    "UNKNOWN",
+]
+_Encoding: TypeAlias = Literal[
+    "PLAIN",
+    "PLAIN_DICTIONARY",
+    "RLE",
+    "BIT_PACKED",
+    "DELTA_BINARY_PACKED",
+    "DELTA_LENGTH_BYTE_ARRAY",
+    "DELTA_BYTE_ARRAY",
+    "RLE_DICTIONARY",
+    "BYTE_STREAM_SPLIT",
+    "UNKNOWN",
+]
+_Compression: TypeAlias = Literal[
+    "UNCOMPRESSED",
+    "SNAPPY",
+    "GZIP",
+    "LZO",
+    "BROTLI",
+    "LZ4",
+    "ZSTD",
+    "UNKNOWN",
+]
+
+class _Statistics(TypedDict):
+    has_min_max: bool
+    min: Any | None
+    max: Any | None
+    null_count: int | None
+    distinct_count: int | None
+    num_values: int
+    physical_type: _PhysicalType
+
+class Statistics(_Weakrefable):
+    def to_dict(self) -> _Statistics: ...
+    def equals(self, other: Statistics) -> bool: ...
+    @property
+    def has_min_max(self) -> bool: ...
+    @property
+    def hash_null_count(self) -> bool: ...
+    @property
+    def has_distinct_count(self) -> bool: ...
+    @property
+    def min_raw(self) -> Any | None: ...
+    @property
+    def max_raw(self) -> Any | None: ...
+    @property
+    def min(self) -> Any | None: ...
+    @property
+    def max(self) -> Any | None: ...
+    @property
+    def null_count(self) -> int | None: ...
+    @property
+    def distinct_count(self) -> int | None: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+
+class ParquetLogicalType(_Weakrefable):
+    def to_json(self) -> str: ...
+    @property
+    def type(self) -> _LogicTypeName: ...
+
+class _ColumnChunkMetaData(TypedDict):
+    file_offset: int
+    file_path: str | None
+    physical_type: _PhysicalType
+    num_values: int
+    path_in_schema: str
+    is_stats_set: bool
+    statistics: Statistics | None
+    compression: _Compression
+    encodings: tuple[_Encoding, ...]
+    has_dictionary_page: bool
+    dictionary_page_offset: int | None
+    data_page_offset: int
+    total_compressed_size: int
+    total_uncompressed_size: int
+
+class ColumnChunkMetaData(_Weakrefable):
+    def to_dict(self) -> _ColumnChunkMetaData: ...
+    def equals(self, other: ColumnChunkMetaData) -> bool: ...
+    @property
+    def file_offset(self) -> int: ...
+    @property
+    def file_path(self) -> str | None: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def path_in_schema(self) -> str: ...
+    @property
+    def is_stats_set(self) -> bool: ...
+    @property
+    def statistics(self) -> Statistics | None: ...
+    @property
+    def compression(self) -> _Compression: ...
+    @property
+    def encodings(self) -> tuple[_Encoding, ...]: ...
+    @property
+    def has_dictionary_page(self) -> bool: ...
+    @property
+    def dictionary_page_offset(self) -> int | None: ...
+    @property
+    def data_page_offset(self) -> int: ...
+    @property
+    def has_index_page(self) -> bool: ...
+    @property
+    def index_page_offset(self) -> int: ...
+    @property
+    def total_compressed_size(self) -> int: ...
+    @property
+    def total_uncompressed_size(self) -> int: ...
+    @property
+    def has_offset_index(self) -> bool: ...
+    @property
+    def has_column_index(self) -> bool: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+
+class _SortingColumn(TypedDict):
+    column_index: int
+    descending: bool
+    nulls_first: bool
+
+class SortingColumn:
+    def __init__(
+        self, column_index: int, descending: bool = False, nulls_first: bool = False
+    ) -> None: ...
+    @classmethod
+    def from_ordering(
+        cls,
+        schema: Schema,
+        sort_keys: Sequence[tuple[str, Order]],
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> tuple[SortingColumn, ...]: ...
+    @staticmethod
+    def to_ordering(
+        schema: Schema, sorting_columns: tuple[SortingColumn, ...]
+    ) -> tuple[Sequence[tuple[str, Order]], Literal["at_start", "at_end"]]: ...
+    def __hash__(self) -> int: ...
+    @property
+    def column_index(self) -> int: ...
+    @property
+    def descending(self) -> bool: ...
+    @property
+    def nulls_first(self) -> bool: ...
+    def to_dict(self) -> _SortingColumn: ...
+
+class _RowGroupMetaData(TypedDict):
+    num_columns: int
+    num_rows: int
+    total_byte_size: int
+    columns: list[ColumnChunkMetaData]
+    sorting_columns: list[SortingColumn]
+
+class RowGroupMetaData(_Weakrefable):
+    def __init__(self, parent: FileMetaData, index: int) -> None: ...
+    def equals(self, other: RowGroupMetaData) -> bool: ...
+    def column(self, i: int) -> ColumnChunkMetaData: ...
+    def to_dict(self) -> _RowGroupMetaData: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def sorting_columns(self) -> list[SortingColumn]: ...
+
+class _FileMetaData(TypedDict):
+    created_by: str
+    num_columns: int
+    num_rows: int
+    num_row_groups: int
+    format_version: str
+    serialized_size: int
+
+class FileMetaData(_Weakrefable):
+    def __hash__(self) -> int: ...
+    def to_dict(self) -> _FileMetaData: ...
+    def equals(self, other: FileMetaData) -> bool: ...
+    @property
+    def schema(self) -> ParquetSchema: ...
+    @property
+    def serialized_size(self) -> int: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    @property
+    def format_version(self) -> str: ...
+    @property
+    def created_by(self) -> str: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+    def row_group(self, i: int) -> RowGroupMetaData: ...
+    def set_file_path(self, path: str) -> None: ...
+    def append_row_groups(self, other: FileMetaData) -> None: ...
+    def write_metadata_file(self, where: StrPath | Buffer | NativeFile | IO) -> None: ...
+
+class ParquetSchema(_Weakrefable):
+    def __init__(self, container: FileMetaData) -> None: ...
+    def __getitem__(self, i: int) -> ColumnChunkMetaData: ...
+    def __hash__(self) -> int: ...
+    def __len__(self) -> int: ...
+    @property
+    def names(self) -> list[str]: ...
+    def to_arrow_schema(self) -> Schema: ...
+    def equals(self, other: ParquetSchema) -> bool: ...
+    def column(self, i: int) -> ColumnSchema: ...
+
+class ColumnSchema(_Weakrefable):
+    def __init__(self, schema: ParquetSchema, index: int) -> None: ...
+    def equals(self, other: ColumnSchema) -> bool: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def path(self) -> str: ...
+    @property
+    def max_definition_level(self) -> int: ...
+    @property
+    def max_repetition_level(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+    @property
+    def length(self) -> int | None: ...
+    @property
+    def precision(self) -> int | None: ...
+    @property
+    def scale(self) -> int | None: ...
+
+class ParquetReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def open(
+        self,
+        source: StrPath | NativeFile | IO,
+        *,
+        use_memory_map: bool = False,
+        read_dictionary: Iterable[int] | Iterable[str] | None = None,
+        metadata: FileMetaData | None = None,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    @property
+    def column_paths(self) -> list[str]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def schema_arrow(self) -> Schema: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    def set_use_threads(self, use_threads: bool) -> None: ...
+    def set_batch_size(self, batch_size: int) -> None: ...
+    def iter_batches(
+        self,
+        batch_size: int,
+        row_groups: list[int],
+        column_indices: list[int] | None = None,
+        use_threads: bool = True,
+    ) -> Iterator[RecordBatch]: ...
+    def read_row_group(
+        self, i: int, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+    def read_row_groups(
+        self,
+        row_groups: list[int],
+        column_indices: list[int] | None = None,
+        use_threads: bool = True,
+    ) -> Table: ...
+    def read_all(
+        self, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+    def scan_contents(self, column_indices: list[int] | None = None, batch_size: int = 65536): ...
+    def column_name_idx(self, column_name: str) -> int: ...
+    def read_column(self, column_index: int) -> ChunkedArray: ...
+    def close(self) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+
+class ParquetWriter(_Weakrefable):
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        schema: Schema,
+        use_dictionary: bool | list[str] | None = None,
+        compression: _Compression | dict[str, _Compression] | None = None,
+        version: str | None = None,
+        write_statistics: bool | list[str] | None = None,
+        memory_pool: MemoryPool | None = None,
+        use_deprecated_int96_timestamps: bool = False,
+        coerce_timestamps: Literal["ms", "us"] | None = None,
+        data_page_size: int | None = None,
+        allow_truncated_timestamps: bool = False,
+        compression_level: int | dict[str, int] | None = None,
+        use_byte_stream_split: bool | list[str] = False,
+        column_encoding: _Encoding | dict[str, _Encoding] | None = None,
+        writer_engine_version: str | None = None,
+        data_page_version: str | None = None,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileDecryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: tuple[SortingColumn, ...] | None = None,
+        store_decimal_as_integer: bool = False,
+    ): ...
+    def close(self) -> None: ...
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
+    def add_key_value_metadata(self, key_value_metadata: KeyValueMetadata) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def use_dictionary(self) -> bool | list[str] | None: ...
+    @property
+    def use_deprecated_int96_timestamps(self) -> bool: ...
+    @property
+    def use_byte_stream_split(self) -> bool | list[str]: ...
+    @property
+    def column_encoding(self) -> _Encoding | dict[str, _Encoding] | None: ...
+    @property
+    def coerce_timestamps(self) -> Literal["ms", "us"] | None: ...
+    @property
+    def allow_truncated_timestamps(self) -> bool: ...
+    @property
+    def compression(self) -> _Compression | dict[str, _Compression] | None: ...
+    @property
+    def compression_level(self) -> int | dict[str, int] | None: ...
+    @property
+    def data_page_version(self) -> str | None: ...
+    @property
+    def use_compliant_nested_type(self) -> bool: ...
+    @property
+    def version(self) -> str | None: ...
+    @property
+    def write_statistics(self) -> bool | list[str] | None: ...
+    @property
+    def writer_engine_version(self) -> str: ...
+    @property
+    def row_group_size(self) -> int: ...
+    @property
+    def data_page_size(self) -> int: ...
+    @property
+    def encryption_properties(self) -> FileDecryptionProperties: ...
+    @property
+    def write_batch_size(self) -> int: ...
+    @property
+    def dictionary_pagesize_limit(self) -> int: ...
+    @property
+    def store_schema(self) -> bool: ...
+    @property
+    def store_decimal_as_integer(self) -> bool: ...
+
+class FileEncryptionProperties: ...
+class FileDecryptionProperties: ...
diff --git a/python/pyarrow/_parquet_encryption.pyi b/python/pyarrow/_parquet_encryption.pyi
new file mode 100644
index 00000000000..c707edb844a
--- /dev/null
+++ b/python/pyarrow/_parquet_encryption.pyi
@@ -0,0 +1,67 @@
+import datetime as dt
+
+from typing import Callable
+
+from ._parquet import FileDecryptionProperties, FileEncryptionProperties
+from .lib import _Weakrefable
+
+class EncryptionConfiguration(_Weakrefable):
+    footer_key: str
+    column_keys: dict[str, list[str]]
+    encryption_algorithm: str
+    plaintext_footer: bool
+    double_wrapping: bool
+    cache_lifetime: dt.timedelta
+    internal_key_material: bool
+    data_key_length_bits: int
+
+    def __init__(
+        self,
+        footer_key: str,
+        *,
+        column_keys: dict[str, str | list[str]] | None = None,
+        encryption_algorithm: str | None = None,
+        plaintext_footer: bool | None = None,
+        double_wrapping: bool | None = None,
+        cache_lifetime: dt.timedelta | None = None,
+        internal_key_material: bool | None = None,
+        data_key_length_bits: int | None = None,
+    ) -> None: ...
+
+class DecryptionConfiguration(_Weakrefable):
+    cache_lifetime: dt.timedelta
+    def __init__(self, *, cache_lifetime: dt.timedelta | None = None): ...
+
+class KmsConnectionConfig(_Weakrefable):
+    kms_instance_id: str
+    kms_instance_url: str
+    key_access_token: str
+    custom_kms_conf: dict[str, str]
+    def __init__(
+        self,
+        *,
+        kms_instance_id: str | None = None,
+        kms_instance_url: str | None = None,
+        key_access_token: str | None = None,
+        custom_kms_conf: dict[str, str] | None = None,
+    ) -> None: ...
+    def refresh_key_access_token(self, value: str) -> None: ...
+
+class KmsClient(_Weakrefable):
+    def wrap_key(self, key_bytes: bytes, master_key_identifier: str) -> str: ...
+    def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> str: ...
+
+class CryptoFactory(_Weakrefable):
+    def __init__(self, kms_client_factory: Callable[[KmsConnectionConfig], KmsClient]): ...
+    def file_encryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> FileEncryptionProperties: ...
+    def file_decryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        decryption_config: DecryptionConfiguration | None = None,
+    ) -> FileDecryptionProperties: ...
+    def remove_cache_entries_for_token(self, access_token: str) -> None: ...
+    def remove_cache_entries_for_all_tokens(self) -> None: ...
diff --git a/python/pyarrow/_s3fs.pyi b/python/pyarrow/_s3fs.pyi
new file mode 100644
index 00000000000..fc13c498bd9
--- /dev/null
+++ b/python/pyarrow/_s3fs.pyi
@@ -0,0 +1,74 @@
+import enum
+
+from typing import Literal, NotRequired, Required, TypedDict
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+class _ProxyOptions(TypedDict):
+    schema: Required[Literal["http", "https"]]
+    host: Required[str]
+    port: Required[int]
+    username: NotRequired[str]
+    password: NotRequired[str]
+
+class S3LogLevel(enum.IntEnum):
+    Off = enum.auto()
+    Fatal = enum.auto()
+    Error = enum.auto()
+    Warn = enum.auto()
+    Info = enum.auto()
+    Debug = enum.auto()
+    Trace = enum.auto()
+
+Off = S3LogLevel.Off
+Fatal = S3LogLevel.Fatal
+Error = S3LogLevel.Error
+Warn = S3LogLevel.Warn
+Info = S3LogLevel.Info
+Debug = S3LogLevel.Debug
+Trace = S3LogLevel.Trace
+
+def initialize_s3(
+    log_level: S3LogLevel = S3LogLevel.Fatal, num_event_loop_threads: int = 1
+) -> None: ...
+def ensure_s3_initialized() -> None: ...
+def finalize_s3() -> None: ...
+def ensure_s3_finalized() -> None: ...
+def resolve_s3_region(bucket: str) -> str: ...
+
+class S3RetryStrategy:
+    max_attempts: int
+    def __init__(self, max_attempts=3) -> None: ...
+
+class AwsStandardS3RetryStrategy(S3RetryStrategy): ...
+class AwsDefaultS3RetryStrategy(S3RetryStrategy): ...
+
+class S3FileSystem(FileSystem):
+    def __init__(
+        self,
+        *,
+        access_key: str | None = None,
+        secret_key: str | None = None,
+        session_token: str | None = None,
+        anonymous: bool = False,
+        region: str | None = None,
+        request_timeout: float | None = None,
+        connect_timeout: float | None = None,
+        scheme: Literal["http", "https"] = "https",
+        endpoint_override: str | None = None,
+        background_writes: bool = True,
+        default_metadata: dict | KeyValueMetadata | None = None,
+        role_arn: str | None = None,
+        session_name: str | None = None,
+        external_id: str | None = None,
+        load_frequency: int = 900,
+        proxy_options: _ProxyOptions | str | None = None,
+        allow_bucket_creation: bool = False,
+        allow_bucket_deletion: bool = False,
+        check_directory_existence_before_creation: bool = False,
+        retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3),
+        force_virtual_addressing: bool = False,
+    ): ...
+    @property
+    def region(self) -> str: ...
diff --git a/python/pyarrow/_stubs_typing.pyi b/python/pyarrow/_stubs_typing.pyi
new file mode 100644
index 00000000000..c259513f1ea
--- /dev/null
+++ b/python/pyarrow/_stubs_typing.pyi
@@ -0,0 +1,80 @@
+import datetime as dt
+
+from collections.abc import Sequence
+from decimal import Decimal
+from typing import Any, Collection, Literal, Protocol, TypeAlias, TypeVar
+
+import numpy as np
+
+from numpy.typing import NDArray
+
+from .compute import BooleanArray, IntegerArray
+
+ArrayLike: TypeAlias = Any
+ScalarLike: TypeAlias = Any
+Order: TypeAlias = Literal["ascending", "descending"]
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+Compression: TypeAlias = Literal[
+    "gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy"
+]
+NullEncoding: TypeAlias = Literal["mask", "encode"]
+NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"]
+Mask: TypeAlias = Sequence[bool | None] | NDArray[np.bool_] | BooleanArray
+Indices: TypeAlias = Sequence[int] | NDArray[np.integer[Any]] | IntegerArray
+PyScalar: TypeAlias = (
+    bool | int | float | Decimal | str | bytes | dt.date | dt.datetime | dt.time | dt.timedelta
+)
+
+_T = TypeVar("_T")
+SingleOrList: TypeAlias = list[_T] | _T
+
+class SupportEq(Protocol):
+    def __eq__(self, other) -> bool: ...
+
+class SupportLt(Protocol):
+    def __lt__(self, other) -> bool: ...
+
+class SupportGt(Protocol):
+    def __gt__(self, other) -> bool: ...
+
+class SupportLe(Protocol):
+    def __le__(self, other) -> bool: ...
+
+class SupportGe(Protocol):
+    def __ge__(self, other) -> bool: ...
+
+FilterTuple: TypeAlias = (
+    tuple[str, Literal["=", "==", "!="], SupportEq]
+    | tuple[str, Literal["<"], SupportLt]
+    | tuple[str, Literal[">"], SupportGt]
+    | tuple[str, Literal["<="], SupportLe]
+    | tuple[str, Literal[">="], SupportGe]
+    | tuple[str, Literal["in", "not in"], Collection]
+)
+
+class Buffer(Protocol):
+    def __buffer__(self, flags: int, /) -> memoryview: ...
+
+class SupportPyBuffer(Protocol):
+    def __buffer__(self, flags: int, /) -> memoryview: ...
+
+class SupportArrowStream(Protocol):
+    def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
+
+class SupportArrowArray(Protocol):
+    def __arrow_c_array__(self, requested_schema=None) -> Any: ...
+
+class SupportArrowDeviceArray(Protocol):
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
+
+class SupportArrowSchema(Protocol):
+    def __arrow_c_schema(self) -> Any: ...
diff --git a/python/pyarrow/_substrait.pyi b/python/pyarrow/_substrait.pyi
new file mode 100644
index 00000000000..ff226e9521b
--- /dev/null
+++ b/python/pyarrow/_substrait.pyi
@@ -0,0 +1,39 @@
+from typing import Any, Callable
+
+from ._compute import Expression
+from .lib import Buffer, RecordBatchReader, Schema, Table, _Weakrefable
+
+def run_query(
+    plan: Buffer | int,
+    *,
+    table_provider: Callable[[list[str], Schema], Table] | None = None,
+    use_threads: bool = True,
+) -> RecordBatchReader: ...
+def _parse_json_plan(plan: bytes) -> Buffer: ...
+
+class SubstraitSchema:
+    schema: Schema
+    expression: Expression
+    def __init__(self, schema: Schema, expression: Expression) -> None: ...
+    def to_pysubstrait(self) -> Any: ...
+
+def serialize_schema(schema: Schema) -> SubstraitSchema: ...
+def deserialize_schema(buf: Buffer | bytes) -> Schema: ...
+def serialize_expressions(
+    exprs: list[Expression],
+    names: list[str],
+    schema: Schema,
+    *,
+    allow_arrow_extensions: bool = False,
+) -> Buffer: ...
+
+class BoundExpressions(_Weakrefable):
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def expressions(self) -> dict[str, Expression]: ...
+    @classmethod
+    def from_substrait(cls, message: Buffer | bytes) -> BoundExpressions: ...
+
+def deserialize_expressions(buf: Buffer | bytes) -> BoundExpressions: ...
+def get_supported_functions() -> list[str]: ...
diff --git a/python/pyarrow/acero.pyi b/python/pyarrow/acero.pyi
new file mode 100644
index 00000000000..8a520bdc24a
--- /dev/null
+++ b/python/pyarrow/acero.pyi
@@ -0,0 +1,85 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import Literal
+
+from . import lib
+from .compute import Expression, FunctionOptions
+
+_StrOrExpr: TypeAlias = str | Expression
+
+class Declaration(lib._Weakrefable):
+    def __init__(
+        self,
+        factory_name: str,
+        options: ExecNodeOptions,
+        inputs: list[Declaration] | None = None,
+    ) -> None: ...
+    @classmethod
+    def from_sequence(cls, decls: list[Declaration]) -> Self: ...
+    def to_reader(self, use_threads: bool = True) -> lib.RecordBatchReader: ...
+    def to_table(self, use_threads: bool = True) -> lib.Table: ...
+
+class ExecNodeOptions(lib._Weakrefable): ...
+
+class TableSourceNodeOptions(ExecNodeOptions):
+    def __init__(self, table: lib.Table) -> None: ...
+
+class FilterNodeOptions(ExecNodeOptions):
+    def __init__(self, filter_expression: Expression) -> None: ...
+
+class ProjectNodeOptions(ExecNodeOptions):
+    def __init__(self, expressions: list[Expression], names: list[str] | None = None) -> None: ...
+
+class AggregateNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        aggregates: list[tuple[list[str], str, FunctionOptions, str]],
+        keys: list[_StrOrExpr] | None = None,
+    ) -> None: ...
+
+class OrderByNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        sort_keys: tuple[tuple[str, Literal["ascending", "descending"]], ...] = (),
+        *,
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> None: ...
+
+class HashJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        join_type: Literal[
+            "left semi",
+            "right semi",
+            "left anti",
+            "right anti",
+            "inner",
+            "left outer",
+            "right outer",
+            "full outer",
+        ],
+        left_keys: _StrOrExpr | list[_StrOrExpr],
+        right_keys: _StrOrExpr | list[_StrOrExpr],
+        left_output: list[_StrOrExpr] | None = None,
+        right_output: list[_StrOrExpr] | None = None,
+        output_suffix_for_left: str = "",
+        output_suffix_for_right: str = "",
+    ) -> None: ...
+
+class AsofJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        left_on: _StrOrExpr,
+        left_by: _StrOrExpr | list[_StrOrExpr],
+        right_on: _StrOrExpr,
+        right_by: _StrOrExpr | list[_StrOrExpr],
+        tolerance: int,
+    ) -> None: ...
diff --git a/python/pyarrow/benchmark.pyi b/python/pyarrow/benchmark.pyi
new file mode 100644
index 00000000000..048973301dc
--- /dev/null
+++ b/python/pyarrow/benchmark.pyi
@@ -0,0 +1,3 @@
+from pyarrow.lib import benchmark_PandasObjectIsNull
+
+__all__ = ["benchmark_PandasObjectIsNull"]
diff --git a/python/pyarrow/cffi.pyi b/python/pyarrow/cffi.pyi
new file mode 100644
index 00000000000..2ae945c5974
--- /dev/null
+++ b/python/pyarrow/cffi.pyi
@@ -0,0 +1,4 @@
+import cffi
+
+c_source: str
+ffi: cffi.FFI
diff --git a/python/pyarrow/compute.pyi b/python/pyarrow/compute.pyi
new file mode 100644
index 00000000000..8d8fc35b134
--- /dev/null
+++ b/python/pyarrow/compute.pyi
@@ -0,0 +1,7779 @@
+# ruff: noqa: I001
+from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence
+from collections.abc import Callable
+
+# Option classes
+from pyarrow._compute import ArraySortOptions as ArraySortOptions
+from pyarrow._compute import AssumeTimezoneOptions as AssumeTimezoneOptions
+from pyarrow._compute import CastOptions as CastOptions
+from pyarrow._compute import CountOptions as CountOptions
+from pyarrow._compute import CumulativeOptions as CumulativeOptions
+from pyarrow._compute import CumulativeSumOptions as CumulativeSumOptions
+from pyarrow._compute import DayOfWeekOptions as DayOfWeekOptions
+from pyarrow._compute import DictionaryEncodeOptions as DictionaryEncodeOptions
+from pyarrow._compute import ElementWiseAggregateOptions as ElementWiseAggregateOptions
+
+# Expressions
+from pyarrow._compute import Expression as Expression
+from pyarrow._compute import ExtractRegexOptions as ExtractRegexOptions
+from pyarrow._compute import ExtractRegexSpanOptions as ExtractRegexSpanOptions
+from pyarrow._compute import FilterOptions as FilterOptions
+from pyarrow._compute import Function as Function
+from pyarrow._compute import FunctionOptions as FunctionOptions
+from pyarrow._compute import FunctionRegistry as FunctionRegistry
+from pyarrow._compute import HashAggregateFunction as HashAggregateFunction
+from pyarrow._compute import HashAggregateKernel as HashAggregateKernel
+from pyarrow._compute import IndexOptions as IndexOptions
+from pyarrow._compute import JoinOptions as JoinOptions
+from pyarrow._compute import Kernel as Kernel
+from pyarrow._compute import ListFlattenOptions as ListFlattenOptions
+from pyarrow._compute import ListSliceOptions as ListSliceOptions
+from pyarrow._compute import MakeStructOptions as MakeStructOptions
+from pyarrow._compute import MapLookupOptions as MapLookupOptions
+from pyarrow._compute import MatchSubstringOptions as MatchSubstringOptions
+from pyarrow._compute import ModeOptions as ModeOptions
+from pyarrow._compute import NullOptions as NullOptions
+from pyarrow._compute import PadOptions as PadOptions
+from pyarrow._compute import PairwiseOptions as PairwiseOptions
+from pyarrow._compute import PartitionNthOptions as PartitionNthOptions
+from pyarrow._compute import PivotWiderOptions as PivotWiderOptions
+from pyarrow._compute import QuantileOptions as QuantileOptions
+from pyarrow._compute import RandomOptions as RandomOptions
+from pyarrow._compute import RankOptions as RankOptions
+from pyarrow._compute import RankQuantileOptions as RankQuantileOptions
+from pyarrow._compute import ReplaceSliceOptions as ReplaceSliceOptions
+from pyarrow._compute import ReplaceSubstringOptions as ReplaceSubstringOptions
+from pyarrow._compute import RoundBinaryOptions as RoundBinaryOptions
+from pyarrow._compute import RoundOptions as RoundOptions
+from pyarrow._compute import RoundTemporalOptions as RoundTemporalOptions
+from pyarrow._compute import RoundToMultipleOptions as RoundToMultipleOptions
+from pyarrow._compute import RunEndEncodeOptions as RunEndEncodeOptions
+from pyarrow._compute import ScalarAggregateFunction as ScalarAggregateFunction
+from pyarrow._compute import ScalarAggregateKernel as ScalarAggregateKernel
+from pyarrow._compute import ScalarAggregateOptions as ScalarAggregateOptions
+from pyarrow._compute import ScalarFunction as ScalarFunction
+from pyarrow._compute import ScalarKernel as ScalarKernel
+from pyarrow._compute import SelectKOptions as SelectKOptions
+from pyarrow._compute import SetLookupOptions as SetLookupOptions
+from pyarrow._compute import SkewOptions as SkewOptions
+from pyarrow._compute import SliceOptions as SliceOptions
+from pyarrow._compute import SortOptions as SortOptions
+from pyarrow._compute import SplitOptions as SplitOptions
+from pyarrow._compute import SplitPatternOptions as SplitPatternOptions
+from pyarrow._compute import StrftimeOptions as StrftimeOptions
+from pyarrow._compute import StrptimeOptions as StrptimeOptions
+from pyarrow._compute import StructFieldOptions as StructFieldOptions
+from pyarrow._compute import TakeOptions as TakeOptions
+from pyarrow._compute import TDigestOptions as TDigestOptions
+from pyarrow._compute import TrimOptions as TrimOptions
+from pyarrow._compute import UdfContext as UdfContext
+from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions
+from pyarrow._compute import VarianceOptions as VarianceOptions
+from pyarrow._compute import VectorFunction as VectorFunction
+from pyarrow._compute import VectorKernel as VectorKernel
+from pyarrow._compute import WeekOptions as WeekOptions
+from pyarrow._compute import WinsorizeOptions as WinsorizeOptions
+
+# Functions
+from pyarrow._compute import call_function as call_function
+
+# Udf
+from pyarrow._compute import call_tabular_function as call_tabular_function
+from pyarrow._compute import function_registry as function_registry
+from pyarrow._compute import get_function as get_function
+from pyarrow._compute import list_functions as list_functions
+from pyarrow._compute import register_aggregate_function as register_aggregate_function
+from pyarrow._compute import register_scalar_function as register_scalar_function
+from pyarrow._compute import register_tabular_function as register_tabular_function
+from pyarrow._compute import register_vector_function as register_vector_function
+
+from pyarrow._compute import _Order, _Placement
+from pyarrow._stubs_typing import ArrayLike, ScalarLike
+from . import lib
+
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+
+def field(*name_or_index: str | tuple[str, ...] | int) -> Expression:
+    """Reference a column of the dataset.
+
+    Stores only the field's name. Type and other information is known only when
+    the expression is bound to a dataset having an explicit scheme.
+
+    Nested references are allowed by passing multiple names or a tuple of
+    names. For example ``('foo', 'bar')`` references the field named "bar"
+    inside the field named "foo".
+
+    Parameters
+    ----------
+    *name_or_index : string, multiple strings, tuple or int
+        The name or index of the (possibly nested) field the expression
+        references to.
+
+    Returns
+    -------
+    field_expr : Expression
+        Reference to the given field
+
+    Examples
+    --------
+    >>> import pyarrow.compute as pc
+    >>> pc.field("a")
+    <pyarrow.compute.Expression a>
+    >>> pc.field(1)
+    <pyarrow.compute.Expression FieldPath(1)>
+    >>> pc.field(("a", "b"))
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    >>> pc.field("a", "b")
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    """
+
+def scalar(value: bool | float | str) -> Expression:
+    """Expression representing a scalar value.
+
+    Creates an Expression object representing a scalar value that can be used
+    in compute expressions and predicates.
+
+    Parameters
+    ----------
+    value : bool, int, float or string
+        Python value of the scalar. This function accepts any value that can be
+        converted to a ``pyarrow.Scalar`` using ``pa.scalar()``.
+
+    Notes
+    -----
+    This function differs from ``pyarrow.scalar()`` in the following way:
+
+    * ``pyarrow.scalar()`` creates a ``pyarrow.Scalar`` object that represents
+      a single value in Arrow's memory model.
+    * ``pyarrow.compute.scalar()`` creates an ``Expression`` object representing
+      a scalar value that can be used in compute expressions, predicates, and
+      dataset filtering operations.
+
+    Returns
+    -------
+    scalar_expr : Expression
+        An Expression representing the scalar value
+    """
+
+def _clone_signature(f: Callable[_P, _R]) -> Callable[_P, _R]: ...
+
+# ============= compute functions =============
+_DataTypeT = TypeVar("_DataTypeT", bound=lib.DataType)
+_Scalar_CoT = TypeVar("_Scalar_CoT", bound=lib.Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=lib.Scalar)
+_ArrayT = TypeVar("_ArrayT", bound=lib.Array | lib.ChunkedArray)
+_ScalarOrArrayT = TypeVar("_ScalarOrArrayT", bound=lib.Array | lib.Scalar | lib.ChunkedArray)
+ArrayOrChunkedArray: TypeAlias = lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]
+ScalarOrArray: TypeAlias = ArrayOrChunkedArray[_Scalar_CoT] | _Scalar_CoT
+
+SignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.Int8Type]
+    | lib.Scalar[lib.Int16Type]
+    | lib.Scalar[lib.Int32Type]
+    | lib.Scalar[lib.Int64Type]
+)
+UnsignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.UInt8Type]
+    | lib.Scalar[lib.UInt16Type]
+    | lib.Scalar[lib.Uint32Type]
+    | lib.Scalar[lib.UInt64Type]
+)
+IntegerScalar: TypeAlias = SignedIntegerScalar | UnsignedIntegerScalar
+FloatScalar: TypeAlias = (
+    lib.Scalar[lib.Float16Type] | lib.Scalar[lib.Float32Type] | lib.Scalar[lib.Float64Type]
+)
+DecimalScalar: TypeAlias = (
+    lib.Scalar[lib.Decimal32Type]
+    | lib.Scalar[lib.Decimal64Type]
+    | lib.Scalar[lib.Decimal128Type]
+    | lib.Scalar[lib.Decimal256Type]
+)
+NonFloatNumericScalar: TypeAlias = IntegerScalar | DecimalScalar
+NumericScalar: TypeAlias = IntegerScalar | FloatScalar | DecimalScalar
+BinaryScalar: TypeAlias = (
+    lib.Scalar[lib.BinaryType]
+    | lib.Scalar[lib.LargeBinaryType]
+    | lib.Scalar[lib.FixedSizeBinaryType]
+)
+StringScalar: TypeAlias = lib.Scalar[lib.StringType] | lib.Scalar[lib.LargeStringType]
+StringOrBinaryScalar: TypeAlias = StringScalar | BinaryScalar
+_ListScalar: TypeAlias = lib.ListViewScalar[_DataTypeT] | lib.FixedSizeListScalar[_DataTypeT, Any]
+_LargeListScalar: TypeAlias = lib.LargeListScalar[_DataTypeT] | lib.LargeListViewScalar[_DataTypeT]
+ListScalar: TypeAlias = (
+    lib.ListScalar[_DataTypeT] | _ListScalar[_DataTypeT] | _LargeListScalar[_DataTypeT]
+)
+TemporalScalar: TypeAlias = (
+    lib.Date32Scalar
+    | lib.Date64Scalar
+    | lib.Time32Scalar[Any]
+    | lib.Time64Scalar[Any]
+    | lib.TimestampScalar[Any]
+    | lib.DurationScalar[Any]
+    | lib.MonthDayNanoIntervalScalar
+)
+NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
+NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
+
+_NumericOrTemporalScalarT = TypeVar("_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar)
+NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT]
+_NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray)
+_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
+_NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar)
+NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar]
+_NumericOrDurationArrayT = TypeVar("_NumericOrDurationArrayT", bound=NumericOrDurationArray)
+NumericOrTemporalArray: TypeAlias = ArrayOrChunkedArray[_NumericOrTemporalScalarT]
+_NumericOrTemporalArrayT = TypeVar("_NumericOrTemporalArrayT", bound=NumericOrTemporalArray)
+BooleanArray: TypeAlias = ArrayOrChunkedArray[lib.BooleanScalar]
+_BooleanArrayT = TypeVar("_BooleanArrayT", bound=BooleanArray)
+IntegerArray: TypeAlias = ArrayOrChunkedArray[IntegerScalar]
+_FloatScalarT = TypeVar("_FloatScalarT", bound=FloatScalar)
+FloatArray: TypeAlias = ArrayOrChunkedArray[FloatScalar]
+_FloatArrayT = TypeVar("_FloatArrayT", bound=FloatArray)
+_StringScalarT = TypeVar("_StringScalarT", bound=StringScalar)
+StringArray: TypeAlias = ArrayOrChunkedArray[StringScalar]
+_StringArrayT = TypeVar("_StringArrayT", bound=StringArray)
+_BinaryScalarT = TypeVar("_BinaryScalarT", bound=BinaryScalar)
+BinaryArray: TypeAlias = ArrayOrChunkedArray[BinaryScalar]
+_BinaryArrayT = TypeVar("_BinaryArrayT", bound=BinaryArray)
+_StringOrBinaryScalarT = TypeVar("_StringOrBinaryScalarT", bound=StringOrBinaryScalar)
+StringOrBinaryArray: TypeAlias = StringArray | BinaryArray
+_StringOrBinaryArrayT = TypeVar("_StringOrBinaryArrayT", bound=StringOrBinaryArray)
+_TemporalScalarT = TypeVar("_TemporalScalarT", bound=TemporalScalar)
+TemporalArray: TypeAlias = ArrayOrChunkedArray[TemporalScalar]
+_TemporalArrayT = TypeVar("_TemporalArrayT", bound=TemporalArray)
+_ListArray: TypeAlias = ArrayOrChunkedArray[_ListScalar[_DataTypeT]]
+_LargeListArray: TypeAlias = ArrayOrChunkedArray[_LargeListScalar[_DataTypeT]]
+ListArray: TypeAlias = ArrayOrChunkedArray[ListScalar[_DataTypeT]]
+# =============================== 1. Aggregation ===============================
+
+# ========================= 1.1 functions =========================
+
+def all(
+    array: lib.BooleanScalar | BooleanArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar:
+    """
+    Test whether all elements in a boolean array evaluate to true.
+
+    Null values are ignored by default.
+    If the `skip_nulls` option is set to false, then Kleene logic is used.
+    See "kleene_and" for more details on Kleene logic.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+any = _clone_signature(all)
+"""
+Test whether any element in a boolean array evaluates to true.
+
+Null values are ignored by default.
+If the `skip_nulls` option is set to false, then Kleene logic is used.
+See "kleene_or" for more details on Kleene logic.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def approximate_median(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar:
+    """
+    Approximate median of a numeric array with T-Digest algorithm.
+
+    Nulls and NaNs are ignored.
+    A null scalar is returned if there is no valid data point.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def count(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar:
+    """
+    Count the number of null / non-null values.
+
+    By default, only non-null values are counted.
+    This can be changed through CountOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    mode : str, default "only_valid"
+        Which values to count in the input.
+        Accepted values are "only_valid", "only_null", "all".
+    options : pyarrow.compute.CountOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def count_distinct(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar:
+    """
+    Count the number of unique values.
+
+    By default, only non-null values are counted.
+    This can be changed through CountOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    mode : str, default "only_valid"
+        Which values to count in the input.
+        Accepted values are "only_valid", "only_null", "all".
+    options : pyarrow.compute.CountOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def first(
+    array: lib.Array[_ScalarT] | lib.ChunkedArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT:
+    """
+    Compute the first value in each group.
+
+    Null values are ignored by default.
+    If skip_nulls = false, then this will return the first and last values
+    regardless if it is null
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def first_last(
+    array: lib.Array[Any] | lib.ChunkedArray[Any],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar:
+    """
+    Compute the first and last values of an array.
+
+    Null values are ignored by default.
+    If skip_nulls = false, then this will return the first and last values
+    regardless if it is null
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def index(
+    data: lib.Array[Any] | lib.ChunkedArray[Any],
+    value,
+    start: int | None = None,
+    end: int | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar:
+    """
+    Find the index of the first occurrence of a given value.
+
+    Parameters
+    ----------
+    data : Array-like
+    value : Scalar-like object
+        The value to search for.
+    start : int, optional
+    end : int, optional
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    index : int
+        the index, or -1 if not found
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"])
+    >>> pc.index(arr, "ipsum")
+    <pyarrow.Int64Scalar: 1>
+    >>> pc.index(arr, "ipsum", start=2)
+    <pyarrow.Int64Scalar: 5>
+    >>> pc.index(arr, "amet")
+    <pyarrow.Int64Scalar: -1>
+    """
+
+last = _clone_signature(first)
+"""
+Compute the first and last values of an array.
+
+Null values are ignored by default.
+If skip_nulls = false, then this will return the first and last values
+regardless if it is null
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+In [15]: print(pc.last.__doc__)
+Compute the first value in each group.
+
+Null values are ignored by default.
+If skip_nulls = false, then this will return the first and last values
+regardless if it is null
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+max = _clone_signature(first)
+"""
+Compute the minimum or maximum values of a numeric array.
+
+Null values are ignored by default.
+This can be changed through ScalarAggregateOptions.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+min = _clone_signature(first)
+"""
+Compute the minimum or maximum values of a numeric array.
+
+Null values are ignored by default.
+This can be changed through ScalarAggregateOptions.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+min_max = _clone_signature(first_last)
+"""
+Compute the minimum and maximum values of a numeric array.
+
+Null values are ignored by default.
+This can be changed through ScalarAggregateOptions.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def mean(
+    array: FloatScalar | FloatArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar: ...
+@overload
+def mean(
+    array: lib.NumericArray[lib.Decimal128Scalar]
+    | lib.ChunkedArray[lib.Decimal128Scalar]
+    | lib.Decimal128Scalar,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Decimal128Scalar: ...
+@overload
+def mean(
+    array: lib.NumericArray[lib.Decimal256Scalar]
+    | lib.ChunkedArray[lib.Decimal256Scalar]
+    | lib.Decimal256Scalar,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Decimal256Scalar: ...
+def mean(*args, **kwargs):
+    """
+    Compute the mean of a numeric array.
+
+    Null values are ignored by default. Minimum count of non-null
+    values can be set and null is returned if too few are present.
+    This can be changed through ScalarAggregateOptions.
+    The result is a double for integer and floating point arguments,
+    and a decimal with the same bit-width/precision/scale for decimal arguments.
+    For integers and floats, NaN is returned if min_count = 0 and
+    there are no values. For decimals, null is returned instead.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def mode(
+    array: NumericScalar | NumericArray,
+    /,
+    n: int = 1,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: ModeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray:
+    """
+    Compute the modal (most common) values of a numeric array.
+
+    Compute the n most common values and their respective occurrence counts.
+    The output has type `struct<mode: T, count: int64>`, where T is the
+    input type.
+    The results are ordered by descending `count` first, and ascending `mode`
+    when breaking ties.
+    Nulls are ignored.  If there are no non-null values in the array,
+    an empty array is returned.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    n : int, default 1
+        Number of distinct most-common values to return.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ModeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
+    >>> modes = pc.mode(arr, 2)
+    >>> modes[0]
+    <pyarrow.StructScalar: [('mode', 2), ('count', 5)]>
+    >>> modes[1]
+    <pyarrow.StructScalar: [('mode', 1), ('count', 2)]>
+    """
+
+def product(
+    array: _ScalarT | lib.NumericArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT:
+    """
+    Compute the product of values in a numeric array.
+
+    Null values are ignored by default. Minimum count of non-null
+    values can be set and null is returned if too few are present.
+    This can be changed through ScalarAggregateOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def quantile(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float = 0.5,
+    *,
+    interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: QuantileOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray:
+    """
+    Compute an array of quantiles of a numeric array or chunked array.
+
+    By default, 0.5 quantile (median) is returned.
+    If quantile lies between two data points, an interpolated value is
+    returned based on selected interpolation method.
+    Nulls and NaNs are ignored.
+    An array of nulls is returned if there is no valid data point.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to compute. All values must be in
+        [0, 1].
+    interpolation : str, default "linear"
+        How to break ties between competing data points for a given quantile.
+        Accepted values are:
+
+        - "linear": compute an interpolation
+        - "lower": always use the smallest of the two data points
+        - "higher": always use the largest of the two data points
+        - "nearest": select the data point that is closest to the quantile
+        - "midpoint": compute the (unweighted) mean of the two data points
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.QuantileOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def stddev(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    ddof: float = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar:
+    """
+    Calculate the standard deviation of a numeric array.
+
+    The number of degrees of freedom can be controlled using VarianceOptions.
+    By default (`ddof` = 0), the population standard deviation is calculated.
+    Nulls are ignored.  If there are not enough non-null values in the array
+    to satisfy `ddof`, null is returned.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    ddof : int, default 0
+        Number of degrees of freedom.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.VarianceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def sum(
+    array: _NumericScalarT | NumericArray[_NumericScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT:
+    """
+    Compute the sum of a numeric array.
+
+    Null values are ignored by default. Minimum count of non-null
+    values can be set and null is returned if too few are present.
+    This can be changed through ScalarAggregateOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def tdigest(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float = 0.5,
+    *,
+    delta: int = 100,
+    buffer_size: int = 500,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: TDigestOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray:
+    """
+    Approximate quantiles of a numeric array with T-Digest algorithm.
+
+    By default, 0.5 quantile (median) is returned.
+    Nulls and NaNs are ignored.
+    An array of nulls is returned if there is no valid data point.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to approximate. All values must be
+        in [0, 1].
+    delta : int, default 100
+        Compression parameter for the T-digest algorithm.
+    buffer_size : int, default 500
+        Buffer size for the T-digest algorithm.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.TDigestOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+def variance(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    ddof: int = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar:
+    """
+    Calculate the variance of a numeric array.
+
+    The number of degrees of freedom can be controlled using VarianceOptions.
+    By default (`ddof` = 0), the population variance is calculated.
+    Nulls are ignored.  If there are not enough non-null values in the array
+    to satisfy `ddof`, null is returned.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    ddof : int, default 0
+        Number of degrees of freedom.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.VarianceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def top_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array:
+    """
+    Select the indices of the top-k ordered elements from array- or table-like
+    data.
+
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get top indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array
+        Indices of the top-k ordered elements
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.top_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      5,
+      4,
+      2
+    ]
+    """
+
+def bottom_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array:
+    """
+    Select the indices of the bottom-k ordered elements from
+    array- or table-like data.
+
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get bottom indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array of indices
+        Indices of the bottom-k ordered elements
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.bottom_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+# ========================= 2. Element-wise (“scalar”) functions =========================
+
+# ========================= 2.1 Arithmetic =========================
+@overload
+def abs(
+    x: _NumericOrDurationT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationT: ...
+@overload
+def abs(
+    x: _NumericOrDurationArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationArrayT: ...
+@overload
+def abs(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def abs(*args, **kwargs):
+    """
+    Calculate the absolute value of the argument element-wise.
+
+    Results will wrap around on integer overflow.
+    Use function "abs_checked" if you want overflow
+    to return an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+abs_checked = _clone_signature(abs)
+"""
+Calculate the absolute value of the argument element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "abs".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def add(
+    x: _NumericOrTemporalScalarT,
+    y: _NumericOrTemporalScalarT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT: ...
+@overload
+def add(
+    x: _NumericOrTemporalArrayT,
+    y: _NumericOrTemporalArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def add(
+    x: Expression, y: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def add(
+    x: NumericOrTemporalScalar,
+    y: _NumericOrTemporalArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def add(
+    x: _NumericOrTemporalArrayT,
+    y: NumericOrTemporalScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def add(
+    x: NumericOrTemporalScalar, y: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def add(
+    x: Expression, y: NumericOrTemporalScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def add(*args, **kwargs):
+    """
+    Add the arguments element-wise.
+
+    Results will wrap around on integer overflow.
+    Use function "add_checked" if you want overflow
+    to return an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+add_checked = _clone_signature(add)
+"""
+Add the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "add".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+
+"""
+
+@overload
+def divide(
+    dividend: _NumericOrTemporalScalarT,
+    divisor: _NumericOrTemporalScalarT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT: ...
+@overload
+def divide(
+    dividend: _NumericOrTemporalArrayT,
+    divisor: _NumericOrTemporalArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def divide(
+    dividend: Expression,
+    divisor: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def divide(
+    dividend: NumericOrTemporalScalar,
+    divisor: _NumericOrTemporalArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def divide(
+    dividend: _NumericOrTemporalArrayT,
+    divisor: NumericOrTemporalScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def divide(
+    dividend: NumericOrTemporalScalar,
+    divisor: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def divide(
+    dividend: Expression,
+    divisor: NumericOrTemporalScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def divide(*args, **kwargs):
+    """
+    Divide the arguments element-wise.
+
+    Integer division by zero returns an error. However, integer overflow
+    wraps around, and floating-point division by zero returns an infinite.
+    Use function "divide_checked" if you want to get an error
+    in all the aforementioned cases.
+
+    Parameters
+    ----------
+    dividend : Array-like or scalar-like
+        Argument to compute function.
+    divisor : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+divide_checked = _clone_signature(divide)
+"""
+Divide the arguments element-wise.
+
+An error is returned when trying to divide by zero, or when
+integer overflow is encountered.
+
+Parameters
+----------
+dividend : Array-like or scalar-like
+    Argument to compute function.
+divisor : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def exp(
+    exponent: _FloatArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _FloatArrayT: ...
+@overload
+def exp(
+    exponent: ArrayOrChunkedArray[NonFloatNumericScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray: ...
+@overload
+def exp(
+    exponent: _FloatScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _FloatScalarT: ...
+@overload
+def exp(
+    exponent: NonFloatNumericScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.DoubleScalar: ...
+@overload
+def exp(exponent: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def exp(*args, **kwargs):
+    """
+    Compute Euler's number raised to the power of specified exponent, element-wise.
+
+    If exponent is null the result will be null.
+
+    Parameters
+    ----------
+    exponent : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+multiply = _clone_signature(add)
+"""
+Multiply the arguments element-wise.
+
+Results will wrap around on integer overflow.
+Use function "multiply_checked" if you want overflow
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+multiply_checked = _clone_signature(add)
+"""
+Multiply the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "multiply".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def negate(
+    x: _NumericOrDurationT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationT: ...
+@overload
+def negate(
+    x: _NumericOrDurationArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationArrayT: ...
+@overload
+def negate(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def negate(*args, **kwargs):
+    """
+    Negate the argument element-wise.
+
+    Results will wrap around on integer overflow.
+    Use function "negate_checked" if you want overflow
+    to return an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+negate_checked = _clone_signature(negate)
+"""
+Negate the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "negate".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def power(
+    base: _NumericScalarT,
+    exponent: _NumericScalarT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT: ...
+@overload
+def power(
+    base: _NumericArrayT,
+    exponent: _NumericArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def power(
+    base: Expression,
+    exponent: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def power(
+    base: _NumericArrayT,
+    exponent: NumericScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def power(
+    base: NumericScalar,
+    exponent: _NumericArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def power(
+    base: NumericScalar,
+    exponent: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def power(
+    base: Expression,
+    exponent: NumericScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def power(*args, **kwargs):
+    """
+    Raise arguments to power element-wise.
+
+    Integer to negative integer power returns an error. However, integer overflow
+    wraps around. If either base or exponent is null the result will be null.
+
+    Parameters
+    ----------
+    base : Array-like or scalar-like
+        Argument to compute function.
+    exponent : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+power_checked = _clone_signature(power)
+"""
+Raise arguments to power element-wise.
+
+An error is returned when integer to negative integer power is encountered,
+or integer overflow is encountered.
+
+Parameters
+----------
+base : Array-like or scalar-like
+    Argument to compute function.
+exponent : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def sign(
+    x: NumericOrDurationArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.NumericArray[lib.Int8Scalar]
+    | lib.NumericArray[lib.FloatScalar]
+    | lib.NumericArray[lib.DoubleScalar]
+): ...
+@overload
+def sign(
+    x: NumericOrDurationScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int8Scalar | lib.FloatScalar | lib.DoubleScalar: ...
+@overload
+def sign(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def sign(*args, **kwargs):
+    """
+    Get the signedness of the arguments element-wise.
+
+    Output is any of (-1,1) for nonzero inputs and 0 for zero input.
+    NaN values return NaN.  Integral values return signedness as Int8 and
+    floating-point values return it with the same type as the input values.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+@overload
+def sqrt(x: NumericArray, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatArray: ...
+@overload
+def sqrt(x: NumericScalar, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatScalar: ...
+@overload
+def sqrt(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def sqrt(*args, **kwargs):
+    """
+    Takes the square root of arguments element-wise.
+
+    A negative argument returns a NaN.  For a variant that returns an
+    error, use function "sqrt_checked".
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+sqrt_checked = _clone_signature(sqrt)
+"""
+Takes the square root of arguments element-wise.
+
+A negative argument returns an error.  For a variant that returns a
+NaN, use function "sqrt".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+subtract = _clone_signature(add)
+"""
+Subtract the arguments element-wise.
+
+Results will wrap around on integer overflow.
+Use function "subtract_checked" if you want overflow
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+subtract_checked = _clone_signature(add)
+"""
+Subtract the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "subtract".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.1 Bit-wise functions =========================
+@overload
+def bit_wise_and(
+    x: _NumericScalarT, y: _NumericScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT: ...
+@overload
+def bit_wise_and(
+    x: _NumericArrayT,
+    y: _NumericArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def bit_wise_and(
+    x: NumericScalar, y: _NumericArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericArrayT: ...
+@overload
+def bit_wise_and(
+    x: _NumericArrayT, y: NumericScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericArrayT: ...
+@overload
+def bit_wise_and(
+    x: Expression,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def bit_wise_and(
+    x: Expression,
+    y: NumericScalar | ArrayOrChunkedArray[NumericScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def bit_wise_and(
+    x: NumericScalar | ArrayOrChunkedArray[NumericScalar],
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def bit_wise_and(*args, **kwargs):
+    """
+    Bit-wise AND the arguments element-wise.
+
+    Null values return null.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def bit_wise_not(
+    x: _NumericScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT: ...
+@overload
+def bit_wise_not(
+    x: _NumericArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericArrayT: ...
+@overload
+def bit_wise_not(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def bit_wise_not(*args, **kwargs):
+    """
+    Bit-wise negate the arguments element-wise.
+
+    Null values return null.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+bit_wise_or = _clone_signature(bit_wise_and)
+"""
+Bit-wise OR the arguments element-wise.
+
+Null values return null.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+bit_wise_xor = _clone_signature(bit_wise_and)
+"""
+Bit-wise XOR the arguments element-wise.
+
+Null values return null.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_left = _clone_signature(bit_wise_and)
+"""
+Left shift `x` by `y`.
+
+The shift operates as if on the two's complement representation of the number.
+In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
+even if overflow occurs.
+`x` is returned if `y` (the amount to shift by) is (1) negative or
+(2) greater than or equal to the precision of `x`.
+Use function "shift_left_checked" if you want an invalid shift amount
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_left_checked = _clone_signature(bit_wise_and)
+"""
+Left shift `x` by `y`.
+
+The shift operates as if on the two's complement representation of the number.
+In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
+even if overflow occurs.
+An error is raised if `y` (the amount to shift by) is (1) negative or
+(2) greater than or equal to the precision of `x`.
+See "shift_left" for a variant that doesn't fail for an invalid shift amount.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_right = _clone_signature(bit_wise_and)
+"""
+Right shift `x` by `y`.
+
+This is equivalent to dividing `x` by 2 to the power `y`.
+`x` is returned if `y` (the amount to shift by) is: (1) negative or
+(2) greater than or equal to the precision of `x`.
+Use function "shift_right_checked" if you want an invalid shift amount
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_right_checked = _clone_signature(bit_wise_and)
+"""
+Right shift `x` by `y`.
+
+This is equivalent to dividing `x` by 2 to the power `y`.
+An error is raised if `y` (the amount to shift by) is (1) negative or
+(2) greater than or equal to the precision of `x`.
+See "shift_right" for a variant that doesn't fail for an invalid shift amount
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.2 Rounding functions =========================
+@overload
+def ceil(x: _FloatScalarT, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatScalarT: ...
+@overload
+def ceil(x: _FloatArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatArrayT: ...
+@overload
+def ceil(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def ceil(*args, **kwargs):
+    """
+    Round up to the nearest integer.
+
+    Compute the smallest integer value not less in magnitude than `x`.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+floor = _clone_signature(ceil)
+"""
+Round down to the nearest integer.
+
+Compute the largest integer value not greater in magnitude than `x`.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def round(
+    x: _NumericScalarT,
+    /,
+    ndigits: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT: ...
+@overload
+def round(
+    x: _NumericArrayT,
+    /,
+    ndigits: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def round(
+    x: Expression,
+    /,
+    ndigits: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def round(*args, **kwargs):
+    """
+    Round to a given precision.
+
+    Options are used to control the number of digits and rounding mode.
+    Default behavior is to round to the nearest integer and
+    use half-to-even rule to break ties.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    ndigits : int, default 0
+        Number of fractional digits to round to.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    options : pyarrow.compute.RoundOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def round_to_multiple(
+    x: _NumericScalarT,
+    /,
+    multiple: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundToMultipleOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT: ...
+@overload
+def round_to_multiple(
+    x: _NumericArrayT,
+    /,
+    multiple: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundToMultipleOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def round_to_multiple(
+    x: Expression,
+    /,
+    multiple: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundToMultipleOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def round_to_multiple(*args, **kwargs):
+    """
+    Round to a given multiple.
+
+    Options are used to control the rounding multiple and rounding mode.
+    Default behavior is to round to the nearest integer and
+    use half-to-even rule to break ties.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    multiple : numeric scalar, default 1.0
+        Multiple to round to. Should be a scalar of a type compatible
+        with the argument to be rounded.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    options : pyarrow.compute.RoundToMultipleOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def round_binary(
+    x: _NumericScalarT,
+    s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT: ...
+@overload
+def round_binary(
+    x: _NumericScalarT,
+    s: Iterable,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[_NumericScalarT]: ...
+@overload
+def round_binary(
+    x: _NumericArrayT,
+    s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar | Iterable,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def round_binary(
+    x: Expression,
+    s: Iterable,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def round_binary(*args, **kwargs):
+    """
+    Round to the given precision.
+
+    Options are used to control the rounding mode.
+    Default behavior is to use the half-to-even rule to break ties.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    s : Array-like or scalar-like
+        Argument to compute function.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    options : pyarrow.compute.RoundBinaryOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+trunc = _clone_signature(ceil)
+"""
+Compute the integral part.
+
+Compute the nearest integer not greater in magnitude than `x`.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.3 Logarithmic functions =========================
+@overload
+def ln(
+    x: FloatScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar: ...
+@overload
+def ln(
+    x: FloatArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def ln(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def ln(*args, **kwargs):
+    """
+    Compute natural logarithm.
+
+    Non-positive values return -inf or NaN. Null values return null.
+    Use function "ln_checked" if you want non-positive values to raise an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ln_checked = _clone_signature(ln)
+"""
+Compute natural logarithm.
+
+Non-positive values raise an error. Null values return null.
+Use function "ln" if you want non-positive values to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log10 = _clone_signature(ln)
+"""
+Compute base 10 logarithm.
+
+Non-positive values return -inf or NaN. Null values return null.
+Use function "log10_checked" if you want non-positive values
+to raise an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log10_checked = _clone_signature(ln)
+"""
+Compute base 10 logarithm.
+
+Non-positive values raise an error. Null values return null.
+Use function "log10" if you want non-positive values
+to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log1p = _clone_signature(ln)
+"""
+Compute natural log of (1+x).
+
+Values <= -1 return -inf or NaN. Null values return null.
+This function may be more precise than log(1 + x) for x close to zero.
+Use function "log1p_checked" if you want invalid values to raise an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log1p_checked = _clone_signature(ln)
+"""
+Compute natural log of (1+x).
+
+Values <= -1 return -inf or NaN. Null values return null.
+This function may be more precise than log(1 + x) for x close to zero.
+Use function "log1p" if you want invalid values to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log2 = _clone_signature(ln)
+"""
+Compute base 2 logarithm.
+
+Non-positive values return -inf or NaN. Null values return null.
+Use function "log2_checked" if you want non-positive values
+to raise an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log2_checked = _clone_signature(ln)
+"""
+Compute base 2 logarithm.
+
+Non-positive values raise an error. Null values return null.
+Use function "log2" if you want non-positive values
+to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def logb(
+    x: FloatScalar, b: FloatScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar: ...
+@overload
+def logb(
+    x: FloatArray, b: FloatArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def logb(
+    x: FloatScalar,
+    b: FloatArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def logb(
+    x: FloatArray,
+    b: FloatScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def logb(
+    x: Expression | Any, b: Expression | Any, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression | Any: ...
+def logb(*args, **kwargs):
+    """
+    Compute base `b` logarithm.
+
+    Values <= 0 return -inf or NaN. Null values return null.
+    Use function "logb_checked" if you want non-positive values to raise an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    b : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+logb_checked = _clone_signature(logb)
+"""
+Compute base `b` logarithm.
+
+Values <= 0 return -inf or NaN. Null values return null.
+Use function "logb" if you want non-positive values to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+b : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.4 Trigonometric functions =========================
+acos = _clone_signature(ln)
+"""
+Compute the inverse cosine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "acos_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+acos_checked = _clone_signature(ln)
+"""
+Compute the inverse cosine.
+
+Invalid input values raise an error;
+to return NaN instead, see "acos".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+asin = _clone_signature(ln)
+"""
+Compute the inverse sine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "asin_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+asin_checked = _clone_signature(ln)
+"""
+Compute the inverse sine.
+
+Invalid input values raise an error;
+to return NaN instead, see "asin".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+atan = _clone_signature(ln)
+"""
+Compute the inverse tangent of x.
+
+The return value is in the range [-pi/2, pi/2];
+for a full return range [-pi, pi], see "atan2".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cos = _clone_signature(ln)
+"""
+Compute the cosine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "cos_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cos_checked = _clone_signature(ln)
+"""
+Compute the cosine.
+
+Infinite values raise an error;
+to return NaN instead, see "cos".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+sin = _clone_signature(ln)
+"""
+Compute the sine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "sin_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+sin_checked = _clone_signature(ln)
+"""
+Compute the sine.
+
+Invalid input values raise an error;
+to return NaN instead, see "sin".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+tan = _clone_signature(ln)
+"""
+Compute the tangent.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "tan_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+tan_checked = _clone_signature(ln)
+"""
+Compute the tangent.
+
+Infinite values raise an error;
+to return NaN instead, see "tan".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def atan2(
+    y: FloatScalar, x: FloatScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar: ...
+@overload
+def atan2(
+    y: FloatArray, x: FloatArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def atan2(
+    y: FloatArray,
+    x: FloatScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def atan2(
+    y: FloatScalar,
+    x: FloatArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
+@overload
+def atan2(
+    y: Expression, x: Any, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def atan2(
+    y: Any, x: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def atan2(*args, **kwargs):
+    """
+    Compute the inverse tangent of y/x.
+
+    The return value is in the range [-pi, pi].
+
+    Parameters
+    ----------
+    y : Array-like or scalar-like
+        Argument to compute function.
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.5 Comparisons functions =========================
+@overload
+def equal(
+    x: lib.Scalar, y: lib.Scalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def equal(
+    x: lib.Scalar,
+    y: lib.Array | lib.ChunkedArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def equal(
+    x: lib.Array | lib.ChunkedArray,
+    y: lib.Scalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def equal(
+    x: lib.Array | lib.ChunkedArray,
+    y: lib.Array | lib.ChunkedArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def equal(
+    x: Expression,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def equal(
+    x: lib.Scalar,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def equal(
+    x: Expression,
+    y: lib.Scalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def equal(*args, **kwargs):
+    """
+    Compare values for equality (x == y).
+
+    A null on either side emits a null comparison result.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+greater = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x > y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+greater_equal = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x >= y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+less = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x < y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+less_equal = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x <= y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+not_equal = _clone_signature(equal)
+"""
+Compare values for inequality (x != y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def max_element_wise(
+    *args: ScalarOrArray[_Scalar_CoT],
+    skip_nulls: bool = True,
+    options: ElementWiseAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _Scalar_CoT: ...
+@overload
+def max_element_wise(
+    *args: Expression,
+    skip_nulls: bool = True,
+    options: ElementWiseAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def max_element_wise(*args, **kwargs):
+    """
+    Find the element-wise maximum value.
+
+    Nulls are ignored (by default) or propagated.
+    NaN is preferred over null, but not over any valid value.
+
+    Parameters
+    ----------
+    *args : Array-like or scalar-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    options : pyarrow.compute.ElementWiseAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+min_element_wise = _clone_signature(max_element_wise)
+"""
+Find the element-wise minimum value.
+
+Nulls are ignored (by default) or propagated.
+NaN is preferred over null, but not over any valid value.
+
+Parameters
+----------
+*args : Array-like or scalar-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+options : pyarrow.compute.ElementWiseAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.6 Logical functions =========================
+@overload
+def and_(
+    x: lib.BooleanScalar, y: lib.BooleanScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def and_(
+    x: BooleanArray,
+    y: BooleanArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def and_(
+    x: Expression,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def and_(
+    x: lib.BooleanScalar,
+    y: BooleanArray,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def and_(
+    x: BooleanArray,
+    y: lib.BooleanScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def and_(
+    x: lib.BooleanScalar,
+    y: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def and_(
+    x: Expression,
+    y: lib.BooleanScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def and_(
+    x: ScalarOrArray[lib.BooleanScalar],
+    y: ScalarOrArray[lib.BooleanScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> ScalarOrArray[lib.BooleanScalar]: ...
+def and_(*args, **kwargs):
+    """
+    Logical 'and' boolean values.
+
+    When a null is encountered in either input, a null is output.
+    For a different null behavior, see function "and_kleene".
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+and_kleene = _clone_signature(and_)
+"""
+Logical 'and' boolean values (Kleene logic).
+
+This function behaves as follows with nulls:
+
+- true and null = null
+- null and true = null
+- false and null = false
+- null and false = false
+- null and null = null
+
+In other words, in this context a null value really means "unknown",
+and an unknown value 'and' false is always false.
+For a different null behavior, see function "and".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+and_not = _clone_signature(and_)
+"""
+Logical 'and not' boolean values.
+
+When a null is encountered in either input, a null is output.
+For a different null behavior, see function "and_not_kleene".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+and_not_kleene = _clone_signature(and_)
+"""
+Logical 'and not' boolean values (Kleene logic).
+
+This function behaves as follows with nulls:
+
+- true and not null = null
+- null and not false = null
+- false and not null = false
+- null and not true = false
+- null and not null = null
+
+In other words, in this context a null value really means "unknown",
+and an unknown value 'and not' true is always false, as is false
+'and not' an unknown value.
+For a different null behavior, see function "and_not".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+or_ = _clone_signature(and_)
+"""
+Logical 'or' boolean values.
+
+When a null is encountered in either input, a null is output.
+For a different null behavior, see function "or_kleene".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+or_kleene = _clone_signature(and_)
+"""
+Logical 'or' boolean values (Kleene logic).
+
+This function behaves as follows with nulls:
+
+- true or null = true
+- null or true = true
+- false or null = null
+- null or false = null
+- null or null = null
+
+In other words, in this context a null value really means "unknown",
+and an unknown value 'or' true is always true.
+For a different null behavior, see function "or".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+xor = _clone_signature(and_)
+"""
+Logical 'xor' boolean values.
+
+When a null is encountered in either input, a null is output.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def invert(
+    x: lib.BooleanScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def invert(
+    x: _BooleanArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _BooleanArrayT: ...
+@overload
+def invert(
+    x: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def invert(*args, **kwargs):
+    """
+    Invert boolean values.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.10 String predicates =========================
+@overload
+def ascii_is_alnum(
+    strings: StringScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def ascii_is_alnum(
+    strings: StringArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanArray: ...
+@overload
+def ascii_is_alnum(
+    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def ascii_is_alnum(*args, **kwargs):
+    """
+    Classify strings as ASCII alphanumeric.
+
+    For each string in `strings`, emit true iff the string is non-empty
+    and consists only of alphanumeric ASCII characters.  Null strings emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_is_alpha = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII alphabetic.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of alphabetic ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_decimal = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII decimal.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of decimal ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_lower = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII lowercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of lowercase ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_printable = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII printable.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of printable ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_space = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII whitespace.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of whitespace ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_upper = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII uppercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of uppercase ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_alnum = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as alphanumeric.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of alphanumeric Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_alpha = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as alphabetic.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of alphabetic Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_decimal = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as decimal.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of decimal Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_digit = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as digits.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of Unicode digits.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_lower = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as lowercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of lowercase Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_numeric = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as numeric.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of numeric Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_printable = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as printable.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of printable Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_space = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as whitespace.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of whitespace Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_upper = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as uppercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of uppercase Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_title = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII titlecase.
+
+For each string in `strings`, emit true iff the string is title-cased,
+i.e. it has at least one cased character, each uppercase character
+follows an uncased character, and each lowercase character follows
+an uppercase character.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_title = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as titlecase.
+
+For each string in `strings`, emit true iff the string is title-cased,
+i.e. it has at least one cased character, each uppercase character
+follows an uncased character, and each lowercase character follows
+an uppercase character.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+string_is_ascii = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII.
+
+For each string in `strings`, emit true iff the string consists only
+of ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.11 String transforms =========================
+@overload
+def ascii_capitalize(
+    strings: _StringScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT: ...
+@overload
+def ascii_capitalize(
+    strings: _StringArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringArrayT: ...
+@overload
+def ascii_capitalize(
+    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def ascii_capitalize(*args, **kwargs):
+    """
+    Capitalize the first character of ASCII input.
+
+    For each string in `strings`, return a capitalized version.
+
+    This function assumes the input is fully ASCII.  If it may contain
+    non-ASCII characters, use "utf8_capitalize" instead.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_lower = _clone_signature(ascii_capitalize)
+"""
+Transform ASCII input to lowercase.
+
+For each string in `strings`, return a lowercase version.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_lower" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_reverse = _clone_signature(ascii_capitalize)
+"""
+Reverse ASCII input.
+
+For each ASCII string in `strings`, return a reversed version.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_reverse" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_swapcase = _clone_signature(ascii_capitalize)
+"""
+Transform ASCII input by inverting casing.
+
+For each string in `strings`, return a string with opposite casing.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_swapcase" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_title = _clone_signature(ascii_capitalize)
+"""
+Titlecase each word of ASCII input.
+
+For each string in `strings`, return a titlecased version.
+Each word in the output will start with an uppercase character and its
+remaining characters will be lowercase.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_title" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_upper = _clone_signature(ascii_capitalize)
+"""
+Transform ASCII input to uppercase.
+
+For each string in `strings`, return an uppercase version.
+
+This function assumes the input is fully ASCII.  It it may contain
+non-ASCII characters, use "utf8_upper" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def binary_length(
+    strings: lib.BinaryScalar | lib.StringScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int32Scalar: ...
+@overload
+def binary_length(
+    strings: lib.LargeBinaryScalar | lib.LargeStringScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def binary_length(
+    strings: lib.BinaryArray
+    | lib.StringArray
+    | lib.ChunkedArray[lib.BinaryScalar]
+    | lib.ChunkedArray[lib.StringScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def binary_length(
+    strings: lib.LargeBinaryArray
+    | lib.LargeStringArray
+    | lib.ChunkedArray[lib.LargeBinaryScalar]
+    | lib.ChunkedArray[lib.LargeStringScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def binary_length(
+    strings: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_length(*args, **kwargs):
+    """
+    Compute string lengths.
+
+    For each string in `strings`, emit its length of bytes.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def binary_repeat(
+    strings: _StringOrBinaryScalarT,
+    num_repeats: int,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT: ...
+@overload
+def binary_repeat(
+    strings: _StringOrBinaryScalarT,
+    num_repeats: list[int] | list[int | None],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array[_StringOrBinaryScalarT]: ...
+@overload
+def binary_repeat(
+    strings: _StringOrBinaryArrayT,
+    num_repeats: int | list[int] | list[int | None],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryArrayT: ...
+@overload
+def binary_repeat(
+    strings: Expression,
+    num_repeats: int | list[int] | list[int | None],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_repeat(*args, **kwargs):
+    """
+    Repeat a binary string.
+
+    For each binary string in `strings`, return a replicated version.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    num_repeats : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def binary_replace_slice(
+    strings: _StringOrBinaryScalarT,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT: ...
+@overload
+def binary_replace_slice(
+    strings: _StringOrBinaryArrayT,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryArrayT: ...
+@overload
+def binary_replace_slice(
+    strings: Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_replace_slice(*args, **kwargs):
+    """
+    Replace a slice of a binary string.
+
+    For each string in `strings`, replace a slice of the string defined by `start`
+    and `stop` indices with the given `replacement`. `start` is inclusive
+    and `stop` is exclusive, and both are measured in bytes.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int
+        Index to stop slicing at (exclusive).
+    replacement : str
+        What to replace the slice with.
+    options : pyarrow.compute.ReplaceSliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def binary_reverse(
+    strings: _BinaryScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _BinaryScalarT: ...
+@overload
+def binary_reverse(
+    strings: _BinaryArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _BinaryArrayT: ...
+@overload
+def binary_reverse(
+    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def binary_reverse(*args, **kwargs):
+    """
+    Reverse binary input.
+
+    For each binary string in `strings`, return a reversed version.
+
+    This function reverses the binary data at a byte-level.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def replace_substring(
+    strings: _StringScalarT,
+    /,
+    pattern: str | bytes,
+    replacement: str | bytes,
+    *,
+    max_replacements: int | None = None,
+    options: ReplaceSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def replace_substring(
+    strings: _StringArrayT,
+    /,
+    pattern: str | bytes,
+    replacement: str | bytes,
+    *,
+    max_replacements: int | None = None,
+    options: ReplaceSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def replace_substring(
+    strings: Expression,
+    /,
+    pattern: str | bytes,
+    replacement: str | bytes,
+    *,
+    max_replacements: int | None = None,
+    options: ReplaceSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def replace_substring(*args, **kwargs):
+    """
+    Replace matching non-overlapping substrings with replacement.
+
+    For each string in `strings`, replace non-overlapping substrings that match
+    the given literal `pattern` with the given `replacement`.
+    If `max_replacements` is given and not equal to -1, it limits the
+    maximum amount replacements per input, counted from the left.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Substring pattern to look for inside input values.
+    replacement : str
+        What to replace the pattern with.
+    max_replacements : int or None, default None
+        The maximum number of strings to replace in each
+        input value (unlimited if None).
+    options : pyarrow.compute.ReplaceSubstringOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+replace_substring_regex = _clone_signature(replace_substring)
+"""
+Replace matching non-overlapping substrings with replacement.
+
+For each string in `strings`, replace non-overlapping substrings that match
+the given regular expression `pattern` with the given `replacement`.
+If `max_replacements` is given and not equal to -1, it limits the
+maximum amount replacements per input, counted from the left.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+replacement : str
+    What to replace the pattern with.
+max_replacements : int or None, default None
+    The maximum number of strings to replace in each
+    input value (unlimited if None).
+options : pyarrow.compute.ReplaceSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def utf8_capitalize(
+    strings: _StringScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT: ...
+@overload
+def utf8_capitalize(
+    strings: _StringArrayT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringArrayT: ...
+@overload
+def utf8_capitalize(
+    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def utf8_capitalize(*args, **kwargs):
+    """
+    Capitalize the first character of input.
+
+    For each string in `strings`, return a capitalized version,
+    with the first character uppercased and the others lowercased.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def utf8_length(
+    strings: lib.StringScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int32Scalar: ...
+@overload
+def utf8_length(
+    strings: lib.LargeStringScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def utf8_length(
+    strings: lib.StringArray | lib.ChunkedArray[lib.StringScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def utf8_length(
+    strings: lib.LargeStringArray | lib.ChunkedArray[lib.LargeStringScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def utf8_length(
+    strings: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def utf8_length(*args, **kwargs):
+    """
+    Compute UTF8 string lengths.
+
+    For each string in `strings`, emit its length in UTF8 characters.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+utf8_lower = _clone_signature(utf8_capitalize)
+"""
+Transform input to lowercase.
+
+For each string in `strings`, return a lowercase version.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def utf8_replace_slice(
+    strings: _StringScalarT,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def utf8_replace_slice(
+    strings: _StringArrayT,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def utf8_replace_slice(
+    strings: Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def utf8_replace_slice(*args, **kwargs):
+    """
+    Replace a slice of a string.
+
+    For each string in `strings`, replace a slice of the string defined by `start`
+    and `stop` indices with the given `replacement`. `start` is inclusive
+    and `stop` is exclusive, and both are measured in UTF8 characters.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int
+        Index to stop slicing at (exclusive).
+    replacement : str
+        What to replace the slice with.
+    options : pyarrow.compute.ReplaceSliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+utf8_reverse = _clone_signature(utf8_capitalize)
+"""
+Reverse input.
+
+For each string in `strings`, return a reversed version.
+
+This function operates on Unicode codepoints, not grapheme
+clusters. Hence, it will not correctly reverse grapheme clusters
+composed of multiple codepoints.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_swapcase = _clone_signature(utf8_capitalize)
+"""
+Transform input lowercase characters to uppercase and uppercase characters to lowercase.
+
+For each string in `strings`, return an opposite case version.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_title = _clone_signature(utf8_capitalize)
+"""
+Titlecase each word of input.
+
+For each string in `strings`, return a titlecased version.
+Each word in the output will start with an uppercase character and its
+remaining characters will be lowercase.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_upper = _clone_signature(utf8_capitalize)
+"""
+Transform input to uppercase.
+
+For each string in `strings`, return an uppercase version.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory poo
+"""
+
+# ========================= 2.12 String padding =========================
+@overload
+def ascii_center(
+    strings: _StringScalarT,
+    /,
+    width: int,
+    padding: str = " ",
+    lean_left_on_odd_padding: bool = True,
+    *,
+    options: PadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def ascii_center(
+    strings: _StringArrayT,
+    /,
+    width: int,
+    padding: str = " ",
+    lean_left_on_odd_padding: bool = True,
+    *,
+    options: PadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def ascii_center(
+    strings: Expression,
+    /,
+    width: int,
+    padding: str = " ",
+    lean_left_on_odd_padding: bool = True,
+    *,
+    options: PadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ascii_center(*args, **kwargs):
+    """
+    Center strings by padding with a given character.
+
+    For each string in `strings`, emit a centered string by padding both sides
+    with the given ASCII character.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    width : int
+        Desired string length.
+    padding : str, default " "
+        What to pad the string with. Should be one byte or codepoint.
+    lean_left_on_odd_padding : bool, default True
+        What to do if there is an odd number of padding characters (in case
+        of centered padding). Defaults to aligning on the left (i.e. adding
+        the extra padding character on the right).
+    options : pyarrow.compute.PadOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_lpad = _clone_signature(ascii_center)
+"""
+Right-align strings by padding with a given character.
+
+For each string in `strings`, emit a right-aligned string by prepending
+the given ASCII character.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_rpad = _clone_signature(ascii_center)
+"""
+Left-align strings by padding with a given character.
+
+For each string in `strings`, emit a left-aligned string by appending
+the given ASCII character.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_center = _clone_signature(ascii_center)
+"""
+Center strings by padding with a given character.
+
+For each string in `strings`, emit a centered string by padding both sides
+with the given UTF8 codeunit.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_lpad = _clone_signature(ascii_center)
+"""
+Right-align strings by padding with a given character.
+
+For each string in `strings`, emit a right-aligned string by prepending
+the given UTF8 codeunit.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_rpad = _clone_signature(ascii_center)
+"""
+Left-align strings by padding with a given character.
+
+For each string in `strings`, emit a left-aligned string by appending
+the given UTF8 codeunit.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.13 String trimming =========================
+@overload
+def ascii_ltrim(
+    strings: _StringScalarT,
+    /,
+    characters: str,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def ascii_ltrim(
+    strings: _StringArrayT,
+    /,
+    characters: str,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def ascii_ltrim(
+    strings: Expression,
+    /,
+    characters: str,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ascii_ltrim(*args, **kwargs):
+    """
+    Trim leading characters.
+
+    For each string in `strings`, remove any leading characters
+    from the `characters` option (as given in TrimOptions).
+    Null values emit null.
+    Both the `strings` and the `characters` are interpreted as
+    ASCII; to trim non-ASCII characters, use `utf8_ltrim`.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    characters : str
+        Individual characters to be trimmed from the string.
+    options : pyarrow.compute.TrimOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_rtrim = _clone_signature(ascii_ltrim)
+"""
+Trim trailing characters.
+
+For each string in `strings`, remove any trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+Both the `strings` and the `characters` are interpreted as
+ASCII; to trim non-ASCII characters, use `utf8_rtrim`.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_trim = _clone_signature(ascii_ltrim)
+"""
+Trim leading and trailing characters.
+
+For each string in `strings`, remove any leading or trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+Both the `strings` and the `characters` are interpreted as
+ASCII; to trim non-ASCII characters, use `utf8_trim`.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_ltrim = _clone_signature(ascii_ltrim)
+"""
+Trim leading characters.
+
+For each string in `strings`, remove any leading characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_rtrim = _clone_signature(ascii_ltrim)
+"""
+Trim trailing characters.
+
+For each string in `strings`, remove any trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_trim = _clone_signature(ascii_ltrim)
+"""
+Trim leading and trailing characters.
+
+For each string in `strings`, remove any leading or trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def ascii_ltrim_whitespace(
+    strings: _StringScalarT,
+    /,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def ascii_ltrim_whitespace(
+    strings: _StringArrayT,
+    /,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def ascii_ltrim_whitespace(
+    strings: Expression,
+    /,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ascii_ltrim_whitespace(*args, **kwargs):
+    """
+    Trim leading ASCII whitespace characters.
+
+    For each string in `strings`, emit a string with leading ASCII whitespace
+    characters removed.  Use `utf8_ltrim_whitespace` to trim leading Unicode
+    whitespace characters. Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim trailing ASCII whitespace characters.
+
+For each string in `strings`, emit a string with trailing ASCII whitespace
+characters removed. Use `utf8_rtrim_whitespace` to trim trailing Unicode
+whitespace characters. Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim leading and trailing ASCII whitespace characters.
+
+For each string in `strings`, emit a string with leading and trailing ASCII
+whitespace characters removed. Use `utf8_trim_whitespace` to trim Unicode
+whitespace characters. Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_ltrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim leading whitespace characters.
+
+For each string in `strings`, emit a string with leading whitespace
+characters removed, where whitespace characters are defined by the Unicode
+standard.  Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim trailing whitespace characters.
+
+For each string in `strings`, emit a string with trailing whitespace
+characters removed, where whitespace characters are defined by the Unicode
+standard.  Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim leading and trailing whitespace characters.
+
+For each string in `strings`, emit a string with leading and trailing
+whitespace characters removed, where whitespace characters are defined
+by the Unicode standard.  Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.14 String splitting =========================
+@overload
+def ascii_split_whitespace(
+    strings: _StringScalarT,
+    /,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[_StringScalarT]: ...
+@overload
+def ascii_split_whitespace(
+    strings: lib.Array[lib.Scalar[_DataTypeT]],
+    /,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[lib.ListScalar[_DataTypeT]]: ...
+@overload
+def ascii_split_whitespace(
+    strings: Expression,
+    /,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ascii_split_whitespace(*args, **kwargs):
+    """
+    Split string according to any ASCII whitespace.
+
+    Split each string according any non-zero length sequence of ASCII
+    whitespace characters.  The output for each string input is a list
+    of strings.
+
+    The maximum number of splits and direction of splitting
+    (forward, reverse) can optionally be defined in SplitOptions.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    options : pyarrow.compute.SplitOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def split_pattern(
+    strings: _StringOrBinaryScalarT,
+    /,
+    pattern: str,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[_StringOrBinaryScalarT]: ...
+@overload
+def split_pattern(
+    strings: lib.Array[lib.Scalar[_DataTypeT]],
+    /,
+    pattern: str,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitPatternOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[lib.ListScalar[_DataTypeT]]: ...
+@overload
+def split_pattern(
+    strings: Expression,
+    /,
+    pattern: str,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitPatternOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def split_pattern(*args, **kwargs):
+    """
+    Split string according to separator.
+
+    Split each string according to the exact `pattern` defined in
+    SplitPatternOptions.  The output for each string input is a list
+    of strings.
+
+    The maximum number of splits and direction of splitting
+    (forward, reverse) can optionally be defined in SplitPatternOptions.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        String pattern to split on.
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    options : pyarrow.compute.SplitPatternOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+split_pattern_regex = _clone_signature(split_pattern)
+"""
+Split string according to regex pattern.
+
+Split each string according to the regex `pattern` defined in
+SplitPatternOptions.  The output for each string input is a list
+of strings.
+
+The maximum number of splits and direction of splitting
+(forward, reverse) can optionally be defined in SplitPatternOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    String pattern to split on.
+max_splits : int or None, default None
+    Maximum number of splits for each input value (unlimited if None).
+reverse : bool, default False
+    Whether to start splitting from the end of each input value.
+    This only has an effect if `max_splits` is not None.
+options : pyarrow.compute.SplitPatternOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_split_whitespace = _clone_signature(ascii_split_whitespace)
+"""
+Split string according to any Unicode whitespace.
+
+Split each string according any non-zero length sequence of Unicode
+whitespace characters.  The output for each string input is a list
+of strings.
+
+The maximum number of splits and direction of splitting
+(forward, reverse) can optionally be defined in SplitOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+max_splits : int or None, default None
+    Maximum number of splits for each input value (unlimited if None).
+reverse : bool, default False
+    Whether to start splitting from the end of each input value.
+    This only has an effect if `max_splits` is not None.
+options : pyarrow.compute.SplitOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.15 String component extraction =========================
+@overload
+def extract_regex(
+    strings: StringOrBinaryScalar,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar: ...
+@overload
+def extract_regex(
+    strings: StringOrBinaryArray,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray: ...
+@overload
+def extract_regex(
+    strings: Expression,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def extract_regex(*args, **kwargs):
+    """
+    Extract substrings captured by a regex pattern.
+
+    For each string in `strings`, match the regular expression and, if
+    successful, emit a struct with field names and values coming from the
+    regular expression's named capture groups. If the input is null or the
+    regular expression fails matching, a null output value is emitted.
+
+    Regular expression matching is done using the Google RE2 library.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Regular expression with named capture fields.
+    options : pyarrow.compute.ExtractRegexOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.16 String join =========================
+def binary_join(
+    strings, separator, /, *, memory_pool: lib.MemoryPool | None = None
+) -> StringScalar | StringArray:
+    """
+    Join a list of strings together with a separator.
+
+    Concatenate the strings in `list`. The `separator` is inserted
+    between each given string.
+    Any null input and any null `list` element emits a null output.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    separator : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def binary_join_element_wise(
+    *strings: _StringOrBinaryScalarT,
+    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
+    null_replacement: str = "",
+    options: JoinOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT: ...
+@overload
+def binary_join_element_wise(
+    *strings: _StringOrBinaryArrayT,
+    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
+    null_replacement: str = "",
+    options: JoinOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryArrayT: ...
+@overload
+def binary_join_element_wise(
+    *strings: Expression,
+    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
+    null_replacement: str = "",
+    options: JoinOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_join_element_wise(*args, **kwargs):
+    """
+    Join string arguments together, with the last argument as separator.
+
+    Concatenate the `strings` except for the last one. The last argument
+    in `strings` is inserted between each given string.
+    Any null separator element emits a null output. Null elements either
+    emit a null (the default), are skipped, or replaced with a given string.
+
+    Parameters
+    ----------
+    *strings : Array-like or scalar-like
+        Argument to compute function.
+    null_handling : str, default "emit_null"
+        How to handle null values in the inputs.
+        Accepted values are "emit_null", "skip", "replace".
+    null_replacement : str, default ""
+        Replacement string to emit for null inputs if `null_handling`
+        is "replace".
+    options : pyarrow.compute.JoinOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.17 String Slicing =========================
+@overload
+def binary_slice(
+    strings: _BinaryScalarT,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _BinaryScalarT: ...
+@overload
+def binary_slice(
+    strings: _BinaryArrayT,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _BinaryArrayT: ...
+@overload
+def binary_slice(
+    strings: Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def binary_slice(*args, **kwargs):
+    """
+    Slice binary string.
+
+    For each binary string in `strings`, emit the substring defined by
+    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
+    inclusive and `stop` is exclusive. All three values are measured in
+    bytes.
+    If `step` is negative, the string will be advanced in reversed order.
+    An error is raised if `step` is zero.
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int or None, default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end.
+    step : int, default 1
+        Slice step.
+    options : pyarrow.compute.SliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def utf8_slice_codeunits(
+    strings: _StringScalarT,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def utf8_slice_codeunits(
+    strings: _StringArrayT,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def utf8_slice_codeunits(
+    strings: Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def utf8_slice_codeunits(*args, **kwargs):
+    """
+    Slice string.
+
+    For each string in `strings`, emit the substring defined by
+    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
+    inclusive and `stop` is exclusive. All three values are measured in
+    UTF8 codeunits.
+    If `step` is negative, the string will be advanced in reversed order.
+    An error is raised if `step` is zero.
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int or None, default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end.
+    step : int, default 1
+        Slice step.
+    options : pyarrow.compute.SliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.18 Containment tests =========================
+@overload
+def count_substring(
+    strings: lib.StringScalar | lib.BinaryScalar,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar: ...
+@overload
+def count_substring(
+    strings: lib.LargeStringScalar | lib.LargeBinaryScalar,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def count_substring(
+    strings: lib.StringArray
+    | lib.BinaryArray
+    | lib.ChunkedArray[lib.StringScalar]
+    | lib.ChunkedArray[lib.BinaryScalar],
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def count_substring(
+    strings: lib.LargeStringArray
+    | lib.LargeBinaryArray
+    | lib.ChunkedArray[lib.LargeStringScalar]
+    | lib.ChunkedArray[lib.LargeBinaryScalar],
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def count_substring(
+    strings: Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def count_substring(*args, **kwargs):
+    """
+    Count occurrences of substring.
+
+    For each string in `strings`, emit the number of occurrences of the given
+    literal pattern.
+    Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Substring pattern to look for inside input values.
+    ignore_case : bool, default False
+        Whether to perform a case-insensitive match.
+    options : pyarrow.compute.MatchSubstringOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+count_substring_regex = _clone_signature(count_substring)
+"""
+Count occurrences of substring.
+
+For each string in `strings`, emit the number of occurrences of the given
+regular expression pattern.
+Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def ends_with(
+    strings: StringScalar | BinaryScalar,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+@overload
+def ends_with(
+    strings: StringArray | BinaryArray,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def ends_with(
+    strings: Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ends_with(*args, **kwargs):
+    """
+    Check if strings end with a literal pattern.
+
+    For each string in `strings`, emit true iff it ends with a given pattern.
+    The pattern must be given in MatchSubstringOptions.
+    If ignore_case is set, only simple case folding is performed.
+
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Substring pattern to look for inside input values.
+    ignore_case : bool, default False
+        Whether to perform a case-insensitive match.
+    options : pyarrow.compute.MatchSubstringOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+find_substring = _clone_signature(count_substring)
+"""
+Find first occurrence of substring.
+
+For each string in `strings`, emit the index in bytes of the first occurrence
+of the given literal pattern, or -1 if not found.
+Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+find_substring_regex = _clone_signature(count_substring)
+"""
+Find location of first match of regex pattern.
+
+For each string in `strings`, emit the index in bytes of the first occurrence
+of the given literal pattern, or -1 if not found.
+Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def index_in(
+    values: lib.Scalar,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar: ...
+@overload
+def index_in(
+    values: lib.Array | lib.ChunkedArray,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def index_in(
+    values: Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def index_in(*args, **kwargs):
+    """
+    Return index of each element in a set of values.
+
+    For each element in `values`, return its index in a given set of
+    values, or null if it is not found there.
+    The set of values to look for must be given in SetLookupOptions.
+    By default, nulls are matched against the value set, this can be
+    changed in SetLookupOptions.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    value_set : Array
+        Set of values to look for in the input.
+    skip_nulls : bool, default False
+        If False, nulls in the input are matched in the value_set just
+        like regular values.
+        If True, nulls in the input always fail matching.
+    options : pyarrow.compute.SetLookupOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def is_in(
+    values: lib.Scalar,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+@overload
+def is_in(
+    values: lib.Array | lib.ChunkedArray,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def is_in(
+    values: Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def is_in(*args, **kwargs):
+    """
+    Find each element in a set of values.
+
+    For each element in `values`, return true if it is found in a given
+    set of values, false otherwise.
+    The set of values to look for must be given in SetLookupOptions.
+    By default, nulls are matched against the value set, this can be
+    changed in SetLookupOptions.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    value_set : Array
+        Set of values to look for in the input.
+    skip_nulls : bool, default False
+        If False, nulls in the input are matched in the value_set just
+        like regular values.
+        If True, nulls in the input always fail matching.
+    options : pyarrow.compute.SetLookupOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+match_like = _clone_signature(ends_with)
+"""
+Match strings against SQL-style LIKE pattern.
+
+For each string in `strings`, emit true iff it matches a given pattern
+at any position. '%' will match any number of characters, '_' will
+match exactly one character, and any other character matches itself.
+To match a literal '%', '_', or '\', precede the character with a backslash.
+Null inputs emit null.  The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+match_substring = _clone_signature(ends_with)
+"""
+Match strings against literal pattern.
+
+For each string in `strings`, emit true iff it contains a given pattern.
+Null inputs emit null.
+The pattern must be given in MatchSubstringOptions.
+If ignore_case is set, only simple case folding is performed.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+match_substring_regex = _clone_signature(ends_with)
+"""
+Match strings against regex pattern.
+
+For each string in `strings`, emit true iff it matches a given pattern
+at any position. The pattern must be given in MatchSubstringOptions.
+If ignore_case is set, only simple case folding is performed.
+
+Null inputs emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+starts_with = _clone_signature(ends_with)
+"""
+Check if strings start with a literal pattern.
+
+For each string in `strings`, emit true iff it starts with a given pattern.
+The pattern must be given in MatchSubstringOptions.
+If ignore_case is set, only simple case folding is performed.
+
+Null inputs emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.19 Categorizations =========================
+@overload
+def is_finite(
+    values: NumericScalar | lib.NullScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def is_finite(
+    values: NumericArray | lib.NullArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanArray: ...
+@overload
+def is_finite(
+    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def is_finite(*args, **kwargs):
+    """
+    Return true if value is finite.
+
+    For each input value, emit true iff the value is finite
+    (i.e. neither NaN, inf, nor -inf).
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+is_inf = _clone_signature(is_finite)
+"""
+Return true if infinity.
+
+For each input value, emit true iff the value is infinite (inf or -inf).
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+is_nan = _clone_signature(is_finite)
+"""
+Return true if NaN.
+
+For each input value, emit true iff the value is NaN.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def is_null(
+    values: lib.Scalar,
+    /,
+    *,
+    nan_is_null: bool = False,
+    options: NullOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+@overload
+def is_null(
+    values: lib.Array | lib.ChunkedArray,
+    /,
+    *,
+    nan_is_null: bool = False,
+    options: NullOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def is_null(
+    values: Expression,
+    /,
+    *,
+    nan_is_null: bool = False,
+    options: NullOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def is_null(*args, **kwargs):
+    """
+    Return true if null (and optionally NaN).
+
+    For each input value, emit true iff the value is null.
+    True may also be emitted for NaN values by setting the `nan_is_null` flag.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    nan_is_null : bool, default False
+        Whether floating-point NaN values are considered null.
+    options : pyarrow.compute.NullOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def is_valid(
+    values: lib.Scalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def is_valid(
+    values: lib.Array | lib.ChunkedArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanArray: ...
+@overload
+def is_valid(
+    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def is_valid(*args, **kwargs):
+    """
+    Return true if non-null.
+
+    For each input value, emit true iff the value is valid (i.e. non-null).
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+true_unless_null = _clone_signature(is_valid)
+"""
+Return true if non-null, else return null.
+
+For each input value, emit true iff the value
+is valid (non-null), otherwise emit null.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.20 Selecting / multiplexing =========================
+def case_when(cond, /, *cases, memory_pool: lib.MemoryPool | None = None):
+    """
+    Choose values based on multiple conditions.
+
+    `cond` must be a struct of Boolean values. `cases` can be a mix
+    of scalar and array arguments (of any type, but all must be the
+    same type or castable to a common type), with either exactly one
+    datum per child of `cond`, or one more `cases` than children of
+    `cond` (in which case we have an "else" value).
+
+    Each row of the output will be the corresponding value of the
+    first datum in `cases` for which the corresponding child of `cond`
+    is true, or otherwise the "else" value (if given), or null.
+
+    Essentially, this implements a switch-case or if-else, if-else... statement.
+
+    Parameters
+    ----------
+    cond : Array-like or scalar-like
+        Argument to compute function.
+    *cases : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def choose(indices, /, *values, memory_pool: lib.MemoryPool | None = None):
+    """
+    Choose values from several arrays.
+
+    For each row, the value of the first argument is used as a 0-based index
+    into the list of `values` arrays (i.e. index 0 selects the first of the
+    `values` arrays). The output value is the corresponding value of the
+    selected argument.
+
+    If an index is null, the output will be null.
+
+    Parameters
+    ----------
+    indices : Array-like or scalar-like
+        Argument to compute function.
+    *values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def coalesce(
+    *values: _ScalarOrArrayT, memory_pool: lib.MemoryPool | None = None
+) -> _ScalarOrArrayT:
+    """
+    Select the first non-null value.
+
+    Each row of the output will be the value from the first corresponding input
+    for which the value is not null. If all inputs are null in a row, the output
+    will be null.
+
+    Parameters
+    ----------
+    *values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+fill_null = coalesce
+"""Replace each null element in values with a corresponding
+element from fill_value.
+
+If fill_value is scalar-like, then every null element in values
+will be replaced with fill_value. If fill_value is array-like,
+then the i-th element in values will be replaced with the i-th
+element in fill_value.
+
+The fill_value's type must be the same as that of values, or it
+must be able to be implicitly casted to the array's type.
+
+This is an alias for :func:`coalesce`.
+
+Parameters
+----------
+values : Array, ChunkedArray, or Scalar-like object
+    Each null element is replaced with the corresponding value
+    from fill_value.
+fill_value : Array, ChunkedArray, or Scalar-like object
+    If not same type as values, will attempt to cast.
+
+Returns
+-------
+result : depends on inputs
+    Values with all null elements replaced
+
+Examples
+--------
+>>> import pyarrow as pa
+>>> arr = pa.array([1, 2, None, 3], type=pa.int8())
+>>> fill_value = pa.scalar(5, type=pa.int8())
+>>> arr.fill_null(fill_value)
+<pyarrow.lib.Int8Array object at ...>
+[
+    1,
+    2,
+    5,
+    3
+]
+>>> arr = pa.array([1, 2, None, 4, None])
+>>> arr.fill_null(pa.array([10, 20, 30, 40, 50]))
+<pyarrow.lib.Int64Array object at ...>
+[
+    1,
+    2,
+    30,
+    4,
+    50
+]
+"""
+
+def if_else(
+    cond: ArrayLike | ScalarLike,
+    left: ArrayLike | ScalarLike,
+    right: ArrayLike | ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> ArrayLike | ScalarLike:
+    """
+    Choose values based on a condition.
+
+    `cond` must be a Boolean scalar/ array.
+    `left` or `right` must be of the same type scalar/ array.
+    `null` values in `cond` will be promoted to the output.
+
+    Parameters
+    ----------
+    cond : Array-like or scalar-like
+        Argument to compute function.
+    left : Array-like or scalar-like
+        Argument to compute function.
+    right : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.21 Structural transforms =========================
+
+@overload
+def list_value_length(
+    lists: _ListArray[Any],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array: ...
+@overload
+def list_value_length(
+    lists: _LargeListArray[Any],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def list_value_length(
+    lists: ListArray[Any],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array | lib.Int64Array: ...
+@overload
+def list_value_length(
+    lists: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def list_value_length(*args, **kwargs):
+    """
+    Compute list lengths.
+
+    `lists` must have a list-like type.
+    For each non-null value in `lists`, its length is emitted.
+    Null values emit a null in the output.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def make_struct(
+    *args: lib.Scalar,
+    field_names: list[str] | tuple[str, ...] = (),
+    field_nullability: bool | None = None,
+    field_metadata: list[lib.KeyValueMetadata] | None = None,
+    options: MakeStructOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar: ...
+@overload
+def make_struct(
+    *args: lib.Array | lib.ChunkedArray,
+    field_names: list[str] | tuple[str, ...] = (),
+    field_nullability: bool | None = None,
+    field_metadata: list[lib.KeyValueMetadata] | None = None,
+    options: MakeStructOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray: ...
+@overload
+def make_struct(
+    *args: Expression,
+    field_names: list[str] | tuple[str, ...] = (),
+    field_nullability: bool | None = None,
+    field_metadata: list[lib.KeyValueMetadata] | None = None,
+    options: MakeStructOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def make_struct(*args, **kwargs):
+    """
+    Wrap Arrays into a StructArray.
+
+    Names of the StructArray's fields are
+    specified through MakeStructOptions.
+
+    Parameters
+    ----------
+    *args : Array-like or scalar-like
+        Argument to compute function.
+    field_names : sequence of str
+        Names of the struct fields to create.
+    field_nullability : sequence of bool, optional
+        Nullability information for each struct field.
+        If omitted, all fields are nullable.
+    field_metadata : sequence of KeyValueMetadata, optional
+        Metadata for each struct field.
+    options : pyarrow.compute.MakeStructOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.22 Conversions =========================
+@overload
+def ceil_temporal(
+    timestamps: _TemporalScalarT,
+    /,
+    multiple: int = 1,
+    unit: Literal[
+        "year",
+        "quarter",
+        "month",
+        "week",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "millisecond",
+        "microsecond",
+        "nanosecond",
+    ] = "day",
+    *,
+    week_starts_monday: bool = True,
+    ceil_is_strictly_greater: bool = False,
+    calendar_based_origin: bool = False,
+    options: RoundTemporalOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _TemporalScalarT: ...
+@overload
+def ceil_temporal(
+    timestamps: _TemporalArrayT,
+    /,
+    multiple: int = 1,
+    unit: Literal[
+        "year",
+        "quarter",
+        "month",
+        "week",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "millisecond",
+        "microsecond",
+        "nanosecond",
+    ] = "day",
+    *,
+    week_starts_monday: bool = True,
+    ceil_is_strictly_greater: bool = False,
+    calendar_based_origin: bool = False,
+    options: RoundTemporalOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _TemporalArrayT: ...
+@overload
+def ceil_temporal(
+    timestamps: Expression,
+    /,
+    multiple: int = 1,
+    unit: Literal[
+        "year",
+        "quarter",
+        "month",
+        "week",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "millisecond",
+        "microsecond",
+        "nanosecond",
+    ] = "day",
+    *,
+    week_starts_monday: bool = True,
+    ceil_is_strictly_greater: bool = False,
+    calendar_based_origin: bool = False,
+    options: RoundTemporalOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def ceil_temporal(*args, **kwargs):
+    """
+    Round temporal values up to nearest multiple of specified time unit.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    timestamps : Array-like or scalar-like
+        Argument to compute function.
+    multiple : int, default 1
+        Number of units to round to.
+    unit : str, default "day"
+        The unit in which `multiple` is expressed.
+        Accepted values are "year", "quarter", "month", "week", "day",
+        "hour", "minute", "second", "millisecond", "microsecond",
+        "nanosecond".
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    ceil_is_strictly_greater : bool, default False
+        If True, ceil returns a rounded value that is strictly greater than the
+        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+        if set to False.
+        This applies to the ceil_temporal function only.
+    calendar_based_origin : bool, default False
+        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+        rounding origin will be beginning of one less precise calendar unit.
+        E.g.: rounding to hours will use beginning of day as origin.
+
+        By default time is rounded to a multiple of units since
+        1970-01-01T00:00:00. By setting calendar_based_origin to true,
+        time will be rounded to number of units since the last greater
+        calendar unit.
+        For example: rounding to multiple of days since the beginning of the
+        month or to hours since the beginning of the day.
+        Exceptions: week and quarter are not used as greater units,
+        therefore days will be rounded to the beginning of the month not
+        week. Greater unit of week is a year.
+        Note that ceiling and rounding might change sorting order of an array
+        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+        order of an already ordered array.
+    options : pyarrow.compute.RoundTemporalOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+floor_temporal = _clone_signature(ceil_temporal)
+"""
+Round temporal values down to nearest multiple of specified time unit.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+timestamps : Array-like or scalar-like
+    Argument to compute function.
+multiple : int, default 1
+    Number of units to round to.
+unit : str, default "day"
+    The unit in which `multiple` is expressed.
+    Accepted values are "year", "quarter", "month", "week", "day",
+    "hour", "minute", "second", "millisecond", "microsecond",
+    "nanosecond".
+week_starts_monday : bool, default True
+    If True, weeks start on Monday; if False, on Sunday.
+ceil_is_strictly_greater : bool, default False
+    If True, ceil returns a rounded value that is strictly greater than the
+    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+    if set to False.
+    This applies to the ceil_temporal function only.
+calendar_based_origin : bool, default False
+    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+    rounding origin will be beginning of one less precise calendar unit.
+    E.g.: rounding to hours will use beginning of day as origin.
+
+    By default time is rounded to a multiple of units since
+    1970-01-01T00:00:00. By setting calendar_based_origin to true,
+    time will be rounded to number of units since the last greater
+    calendar unit.
+    For example: rounding to multiple of days since the beginning of the
+    month or to hours since the beginning of the day.
+    Exceptions: week and quarter are not used as greater units,
+    therefore days will be rounded to the beginning of the month not
+    week. Greater unit of week is a year.
+    Note that ceiling and rounding might change sorting order of an array
+    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+    order of an already ordered array.
+options : pyarrow.compute.RoundTemporalOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+round_temporal = _clone_signature(ceil_temporal)
+"""
+Round temporal values to the nearest multiple of specified time unit.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+timestamps : Array-like or scalar-like
+    Argument to compute function.
+multiple : int, default 1
+    Number of units to round to.
+unit : str, default "day"
+    The unit in which `multiple` is expressed.
+    Accepted values are "year", "quarter", "month", "week", "day",
+    "hour", "minute", "second", "millisecond", "microsecond",
+    "nanosecond".
+week_starts_monday : bool, default True
+    If True, weeks start on Monday; if False, on Sunday.
+ceil_is_strictly_greater : bool, default False
+    If True, ceil returns a rounded value that is strictly greater than the
+    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+    if set to False.
+    This applies to the ceil_temporal function only.
+calendar_based_origin : bool, default False
+    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+    rounding origin will be beginning of one less precise calendar unit.
+    E.g.: rounding to hours will use beginning of day as origin.
+
+    By default time is rounded to a multiple of units since
+    1970-01-01T00:00:00. By setting calendar_based_origin to true,
+    time will be rounded to number of units since the last greater
+    calendar unit.
+    For example: rounding to multiple of days since the beginning of the
+    month or to hours since the beginning of the day.
+    Exceptions: week and quarter are not used as greater units,
+    therefore days will be rounded to the beginning of the month not
+    week. Greater unit of week is a year.
+    Note that ceiling and rounding might change sorting order of an array
+    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+    order of an already ordered array.
+options : pyarrow.compute.RoundTemporalOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def cast(
+    arr: lib.Scalar,
+    target_type: _DataTypeT,
+    safe: bool | None = None,
+    options: CastOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Scalar[_DataTypeT]: ...
+@overload
+def cast(
+    arr: lib.Array,
+    target_type: _DataTypeT,
+    safe: bool | None = None,
+    options: CastOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array[lib.Scalar[_DataTypeT]]: ...
+@overload
+def cast(
+    arr: lib.ChunkedArray,
+    target_type: _DataTypeT,
+    safe: bool | None = None,
+    options: CastOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ChunkedArray[lib.Scalar[_DataTypeT]]: ...
+def cast(*args, **kwargs):
+    """
+    Cast array values to another data type. Can also be invoked as an array
+    instance method.
+
+    Parameters
+    ----------
+    arr : Array-like
+    target_type : DataType or str
+        Type to cast to
+    safe : bool, default True
+        Check for overflows or other unsafe conversions
+    options : CastOptions, default None
+        Additional checks pass by CastOptions
+    memory_pool : MemoryPool, optional
+        memory pool to use for allocations during function execution.
+
+    Examples
+    --------
+    >>> from datetime import datetime
+    >>> import pyarrow as pa
+    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
+    >>> arr.type
+    TimestampType(timestamp[us])
+
+    You can use ``pyarrow.DataType`` objects to specify the target type:
+
+    >>> cast(arr, pa.timestamp("ms"))
+    <pyarrow.lib.TimestampArray object at ...>
+    [
+      2010-01-01 00:00:00.000,
+      2015-01-01 00:00:00.000
+    ]
+
+    >>> cast(arr, pa.timestamp("ms")).type
+    TimestampType(timestamp[ms])
+
+    Alternatively, it is also supported to use the string aliases for these
+    types:
+
+    >>> arr.cast("timestamp[ms]")
+    <pyarrow.lib.TimestampArray object at ...>
+    [
+      2010-01-01 00:00:00.000,
+      2015-01-01 00:00:00.000
+    ]
+    >>> arr.cast("timestamp[ms]").type
+    TimestampType(timestamp[ms])
+
+    Returns
+    -------
+    casted : Array
+        The cast result as a new Array
+    """
+
+@overload
+def strftime(
+    timestamps: TemporalScalar,
+    /,
+    format: str = "%Y-%m-%dT%H:%M:%S",
+    locale: str = "C",
+    *,
+    options: StrftimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StringScalar: ...
+@overload
+def strftime(
+    timestamps: TemporalArray,
+    /,
+    format: str = "%Y-%m-%dT%H:%M:%S",
+    locale: str = "C",
+    *,
+    options: StrftimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StringArray: ...
+@overload
+def strftime(
+    timestamps: Expression,
+    /,
+    format: str = "%Y-%m-%dT%H:%M:%S",
+    locale: str = "C",
+    *,
+    options: StrftimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def strftime(*args, **kwargs):
+    """
+    Format temporal values according to a format string.
+
+    For each input value, emit a formatted string.
+    The time format string and locale can be set using StrftimeOptions.
+    The output precision of the "%S" (seconds) format code depends on
+    the input time precision: it is an integer for timestamps with
+    second precision, a real number with the required number of fractional
+    digits for higher precisions.
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database, or if the specified locale
+    does not exist on this system.
+
+    Parameters
+    ----------
+    timestamps : Array-like or scalar-like
+        Argument to compute function.
+    format : str, default "%Y-%m-%dT%H:%M:%S"
+        Pattern for formatting input values.
+    locale : str, default "C"
+        Locale to use for locale-specific format specifiers.
+    options : pyarrow.compute.StrftimeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def strptime(
+    strings: StringScalar,
+    /,
+    format: str,
+    unit: Literal["s", "ms", "us", "ns"],
+    error_is_null: bool = False,
+    *,
+    options: StrptimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampScalar: ...
+@overload
+def strptime(
+    strings: StringArray,
+    /,
+    format: str,
+    unit: Literal["s", "ms", "us", "ns"],
+    error_is_null: bool = False,
+    *,
+    options: StrptimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampArray: ...
+@overload
+def strptime(
+    strings: Expression,
+    /,
+    format: str,
+    unit: Literal["s", "ms", "us", "ns"],
+    error_is_null: bool = False,
+    *,
+    options: StrptimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def strptime(*args, **kwargs):
+    """
+    Parse timestamps.
+
+    For each string in `strings`, parse it as a timestamp.
+    The timestamp unit and the expected string pattern must be given
+    in StrptimeOptions. Null inputs emit null. If a non-null string
+    fails parsing, an error is returned by default.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    format : str
+        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
+        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
+        There are differences in behavior, for example how the "%y" placeholder
+        handles years with less than four digits.
+    unit : str
+        Timestamp unit of the output.
+        Accepted values are "s", "ms", "us", "ns".
+    error_is_null : boolean, default False
+        Return null on parsing errors if true or raise if false.
+    options : pyarrow.compute.StrptimeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.23 Temporal component extraction =========================
+@overload
+def day(
+    values: TemporalScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar: ...
+@overload
+def day(
+    values: TemporalArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Array: ...
+@overload
+def day(values: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def day(*args, **kwargs):
+    """
+    Extract day number.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def day_of_week(
+    values: TemporalScalar,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def day_of_week(
+    values: TemporalArray,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def day_of_week(
+    values: Expression,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def day_of_week(*args, **kwargs):
+    """
+    Extract day of the week number.
+
+    By default, the week starts on Monday represented by 0 and ends on Sunday
+    represented by 6.
+    `DayOfWeekOptions.week_start` can be used to set another starting day using
+    the ISO numbering convention (1=start week on Monday, 7=start week on Sunday).
+    Day numbers can start at 0 or 1 based on `DayOfWeekOptions.count_from_zero`.
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    count_from_zero : bool, default True
+        If True, number days from 0, otherwise from 1.
+    week_start : int, default 1
+        Which day does the week start with (Monday=1, Sunday=7).
+        How this value is numbered is unaffected by `count_from_zero`.
+    options : pyarrow.compute.DayOfWeekOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+day_of_year = _clone_signature(day)
+"""
+Extract day of year number.
+
+January 1st maps to day number 1, February 1st to 32, etc.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def hour(
+    values: lib.TimestampScalar[Any] | lib.Time32Scalar[Any] | lib.Time64Scalar[Any],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def hour(
+    values: lib.TimestampArray[Any]
+    | lib.Time32Array[Any]
+    | lib.Time64Array[Any]
+    | lib.ChunkedArray[lib.TimestampScalar[Any]]
+    | lib.ChunkedArray[lib.Time32Scalar[Any]]
+    | lib.ChunkedArray[lib.Time64Scalar[Any]],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def hour(
+    values: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def hour(*args, **kwargs):
+    """
+    Extract hour value.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def is_dst(
+    values: lib.TimestampScalar[Any], /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar: ...
+@overload
+def is_dst(
+    values: lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar[Any]],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def is_dst(values: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def is_dst(*args, **kwargs):
+    """
+    Extracts if currently observing daylight savings.
+
+    IsDaylightSavings returns true if a timestamp has a daylight saving
+    offset in the given timezone.
+    Null values emit null.
+    An error is returned if the values do not have a defined timezone.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def iso_week(
+    values: lib.TimestampScalar[Any], /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar: ...
+@overload
+def iso_week(
+    values: lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar[Any]],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def iso_week(
+    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def iso_week(*args, **kwargs):
+    """
+    Extract ISO week of year number.
+
+    First ISO week has the majority (4 or more) of its days in January.
+    ISO week starts on Monday. The week number starts with 1 and can run
+    up to 53.
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+iso_year = _clone_signature(iso_week)
+"""
+Extract ISO year number.
+
+First week of an ISO year has the majority (4 or more) of its days in January.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def is_leap_year(
+    values: lib.TimestampScalar[Any] | lib.Date32Scalar | lib.Date64Scalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar: ...
+@overload
+def is_leap_year(
+    values: lib.TimestampArray
+    | lib.Date32Array
+    | lib.Date64Array
+    | lib.ChunkedArray[lib.TimestampScalar]
+    | lib.ChunkedArray[lib.Date32Scalar]
+    | lib.ChunkedArray[lib.Date64Scalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanArray: ...
+@overload
+def is_leap_year(
+    values: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def is_leap_year(*args, **kwargs):
+    """
+    Extract if year is a leap year.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+microsecond = _clone_signature(iso_week)
+"""
+Extract microsecond values.
+
+Microsecond returns number of microseconds since the last full millisecond.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+millisecond = _clone_signature(iso_week)
+"""
+Extract millisecond values.
+
+Millisecond returns number of milliseconds since the last full second.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+minute = _clone_signature(iso_week)
+"""
+Extract minute values.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+month = _clone_signature(day_of_week)
+"""
+Extract month number.
+
+Month is encoded as January=1, December=12.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+nanosecond = _clone_signature(hour)
+"""
+Extract nanosecond values.
+
+Nanosecond returns number of nanoseconds since the last full microsecond.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+quarter = _clone_signature(day_of_week)
+"""
+Extract quarter of year number.
+
+First quarter maps to 1 and forth quarter maps to 4.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+second = _clone_signature(hour)
+"""
+Extract second values.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+subsecond = _clone_signature(hour)
+"""
+Extract subsecond values.
+
+Subsecond returns the fraction of a second since the last full second.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+us_week = _clone_signature(iso_week)
+"""
+Extract US week of year number.
+
+First US week has the majority (4 or more) of its days in January.
+US week starts on Monday. The week number starts with 1 and can run
+up to 53.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+us_year = _clone_signature(iso_week)
+"""
+Extract US epidemiological year number.
+
+First week of US epidemiological year has the majority (4 or more) of
+it's days in January. Last week of US epidemiological year has the
+year's last Wednesday in it. US epidemiological week starts on Sunday.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+year = _clone_signature(iso_week)
+"""
+Extract year number.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+@overload
+def week(
+    values: lib.TimestampScalar,
+    /,
+    *,
+    week_starts_monday: bool = True,
+    count_from_zero: bool = False,
+    first_week_is_fully_in_year: bool = False,
+    options: WeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar: ...
+@overload
+def week(
+    values: lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar],
+    /,
+    *,
+    week_starts_monday: bool = True,
+    count_from_zero: bool = False,
+    first_week_is_fully_in_year: bool = False,
+    options: WeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Array: ...
+@overload
+def week(
+    values: Expression,
+    /,
+    *,
+    week_starts_monday: bool = True,
+    count_from_zero: bool = False,
+    first_week_is_fully_in_year: bool = False,
+    options: WeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def week(*args, **kwargs):
+    """
+    Extract week of year number.
+
+    First week has the majority (4 or more) of its days in January.
+    Year can have 52 or 53 weeks. Week numbering can start with 0 or 1 using
+    DayOfWeekOptions.count_from_zero.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    count_from_zero : bool, default False
+        If True, dates at the start of a year that fall into the last week
+        of the previous year emit 0.
+        If False, they emit 52 or 53 (the week number of the last week
+        of the previous year).
+    first_week_is_fully_in_year : bool, default False
+        If True, week number 0 is fully in January.
+        If False, a week that begins on December 29, 30 or 31 is considered
+        to be week number 0 of the following year.
+    options : pyarrow.compute.WeekOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def year_month_day(
+    values: TemporalScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructScalar: ...
+@overload
+def year_month_day(
+    values: TemporalArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructArray: ...
+@overload
+def year_month_day(
+    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def year_month_day(*args, **kwargs):
+    """
+    Extract (year, month, day) struct.
+
+    Null values emit null.
+    An error is returned in the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.24 Temporal difference =========================
+def day_time_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Compute the number of days and milliseconds between two timestamps.
+
+    Returns the number of days and milliseconds from `start` to `end`.
+    That is, first the difference in days is computed as if both
+    timestamps were truncated to the day, then the difference between time times
+    of the two timestamps is computed as if both times were truncated to the
+    millisecond.
+    Null values return null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def days_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array:
+    """
+    Compute the number of days between two timestamps.
+
+    Returns the number of day boundaries crossed from `start` to `end`.
+    That is, the difference is calculated as if the timestamps were
+    truncated to the day.
+    Null values emit null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+hours_between = _clone_signature(days_between)
+"""
+Compute the number of hours between two timestamps.
+
+Returns the number of hour boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the hour.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+microseconds_between = _clone_signature(days_between)
+"""
+Compute the number of microseconds between two timestamps.
+
+Returns the number of microsecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the microsecond.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+milliseconds_between = _clone_signature(days_between)
+"""
+Compute the number of millisecond boundaries between two timestamps.
+
+Returns the number of millisecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the millisecond.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+minutes_between = _clone_signature(days_between)
+"""
+Compute the number of millisecond boundaries between two timestamps.
+
+Returns the number of millisecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the millisecond.
+Null values emit null.
+In [152]: print(pc.minutes_between.__doc__)
+Compute the number of minute boundaries between two timestamps.
+
+Returns the number of minute boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the minute.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def month_day_nano_interval_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.MonthDayNanoIntervalScalar | lib.MonthDayNanoIntervalArray:
+    """
+    Compute the number of months, days and nanoseconds between two timestamps.
+
+    Returns the number of months, days, and nanoseconds from `start` to `end`.
+    That is, first the difference in months is computed as if both timestamps
+    were truncated to the months, then the difference between the days
+    is computed, and finally the difference between the times of the two
+    timestamps is computed as if both times were truncated to the nanosecond.
+    Null values return null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def month_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Compute the number of months between two timestamps.
+
+    Returns the number of month boundaries crossed from `start` to `end`.
+    That is, the difference is calculated as if the timestamps were
+    truncated to the month.
+    Null values emit null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+nanoseconds_between = _clone_signature(days_between)
+"""
+Compute the number of nanoseconds between two timestamps.
+
+Returns the number of nanosecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the nanosecond.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+quarters_between = _clone_signature(days_between)
+"""
+Compute the number of quarters between two timestamps.
+
+Returns the number of quarter start boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the quarter.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+seconds_between = _clone_signature(days_between)
+"""
+Compute the number of seconds between two timestamps.
+
+Returns the number of second boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the second.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def weeks_between(
+    start,
+    end,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array:
+    """
+    Compute the number of weeks between two timestamps.
+
+    Returns the number of week boundaries crossed from `start` to `end`.
+    That is, the difference is calculated as if the timestamps were
+    truncated to the week.
+    Null values emit null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    count_from_zero : bool, default True
+        If True, number days from 0, otherwise from 1.
+    week_start : int, default 1
+        Which day does the week start with (Monday=1, Sunday=7).
+        How this value is numbered is unaffected by `count_from_zero`.
+    options : pyarrow.compute.DayOfWeekOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+years_between = _clone_signature(days_between)
+"""
+Compute the number of years between two timestamps.
+
+Returns the number of year boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the year.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.25 Timezone handling =========================
+@overload
+def assume_timezone(
+    timestamps: lib.TimestampScalar,
+    /,
+    timezone: str,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: AssumeTimezoneOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampScalar: ...
+@overload
+def assume_timezone(
+    timestamps: lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar],
+    /,
+    timezone: str,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: AssumeTimezoneOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampArray: ...
+@overload
+def assume_timezone(
+    timestamps: Expression,
+    /,
+    timezone: str,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: AssumeTimezoneOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def assume_timezone(*args, **kwargs):
+    """
+    Convert naive timestamp to timezone-aware timestamp.
+
+    Input timestamps are assumed to be relative to the timezone given in the
+    `timezone` option. They are converted to UTC-relative timestamps and
+    the output type has its timezone set to the value of the `timezone`
+    option. Null values emit null.
+    This function is meant to be used when an external system produces
+    "timezone-naive" timestamps which need to be converted to
+    "timezone-aware" timestamps. An error is returned if the timestamps
+    already have a defined timezone.
+
+    Parameters
+    ----------
+    timestamps : Array-like or scalar-like
+        Argument to compute function.
+    timezone : str
+        Timezone to assume for the input.
+    ambiguous : str, default "raise"
+        How to handle timestamps that are ambiguous in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    nonexistent : str, default "raise"
+        How to handle timestamps that don't exist in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    options : pyarrow.compute.AssumeTimezoneOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def local_timestamp(
+    timestamps: lib.TimestampScalar, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.TimestampScalar: ...
+@overload
+def local_timestamp(
+    timestamps: lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampArray: ...
+@overload
+def local_timestamp(
+    timestamps: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def local_timestamp(*args, **kwargs):
+    """
+    Convert timestamp to a timezone-naive local time timestamp.
+
+    LocalTimestamp converts timezone-aware timestamp to local timestamp
+    of the given timestamp's timezone and removes timezone metadata.
+    Alternative name for this timestamp is also wall clock time.
+    If input is in UTC or without timezone, then unchanged input values
+    without timezone metadata are returned.
+    Null values emit null.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.26 Random number generation =========================
+def random(
+    n: int,
+    *,
+    initializer: Literal["system"] | int = "system",
+    options: RandomOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray:
+    """
+    Generate numbers in the range [0, 1).
+
+    Generated values are uniformly-distributed, double-precision
+    in range [0, 1). Algorithm and seed can be changed via RandomOptions.
+
+    Parameters
+    ----------
+    n : int
+        Number of values to generate, must be greater than or equal to 0
+    initializer : int or str
+        How to initialize the underlying random generator.
+        If an integer is given, it is used as a seed.
+        If "system" is given, the random generator is initialized with
+        a system-specific source of (hopefully true) randomness.
+        Other values are invalid.
+    options : pyarrow.compute.RandomOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3. Array-wise (“vector”) functions =========================
+
+# ========================= 3.1 Cumulative Functions =========================
+@overload
+def cumulative_sum(
+    values: _NumericArrayT,
+    /,
+    start: lib.Scalar | None = None,
+    *,
+    skip_nulls: bool = False,
+    options: CumulativeSumOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT: ...
+@overload
+def cumulative_sum(
+    values: Expression,
+    /,
+    start: lib.Scalar | None = None,
+    *,
+    skip_nulls: bool = False,
+    options: CumulativeSumOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def cumulative_sum(*args, **kwargs):
+    """
+    Compute the cumulative sum over a numeric input.
+
+    `values` must be numeric. Return an array/chunked array which is the
+    cumulative sum computed over `values`. Results will wrap around on
+    integer overflow. Use function "cumulative_sum_checked" if you want
+    overflow to return an error. The default start is 0.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    start : Scalar, default None
+        Starting value for the cumulative operation. If none is given,
+        a default value depending on the operation and input type is used.
+    skip_nulls : bool, default False
+        When false, the first encountered null is propagated.
+    options : pyarrow.compute.CumulativeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+cumulative_sum_checked = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative sum over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative sum computed over `values`. This function returns an error
+on overflow. For a variant that doesn't fail on overflow, use
+function "cumulative_sum". The default start is 0.
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_prod = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative product over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative product computed over `values`. Results will wrap around on
+integer overflow. Use function "cumulative_prod_checked" if you want
+overflow to return an error. The default start is 1.
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_prod_checked = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative product over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative product computed over `values`. This function returns an error
+on overflow. For a variant that doesn't fail on overflow, use
+function "cumulative_prod". The default start is 1.
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_max = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative max over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative max computed over `values`. The default start is the minimum
+value of input type (so that any other value will replace the
+start as the new maximum).
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_min = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative min over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative min computed over `values`. The default start is the maximum
+value of input type (so that any other value will replace the
+start as the new minimum).
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_mean = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative max over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative max computed over `values`. The default start is the minimum
+value of input type (so that any other value will replace the
+start as the new maximum).
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+# ========================= 3.2 Associative transforms =========================
+
+@overload
+def dictionary_encode(
+    array: _ScalarOrArrayT,
+    /,
+    null_encoding: Literal["mask", "encode"] = "mask",
+    *,
+    options=None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarOrArrayT: ...
+@overload
+def dictionary_encode(
+    array: Expression,
+    /,
+    null_encoding: Literal["mask", "encode"] = "mask",
+    *,
+    options=None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def unique(array: _ArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT: ...
+@overload
+def unique(array: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+@overload
+def value_counts(
+    array: lib.Array | lib.ChunkedArray, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructArray: ...
+@overload
+def value_counts(
+    array: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+
+# ========================= 3.3 Selections =========================
+@overload
+def array_filter(
+    array: _ArrayT,
+    selection_filter: list[bool] | list[bool | None] | BooleanArray,
+    /,
+    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    *,
+    options: FilterOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT: ...
+@overload
+def array_filter(
+    array: Expression,
+    selection_filter: list[bool] | list[bool | None] | BooleanArray,
+    /,
+    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    *,
+    options: FilterOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def array_take(
+    array: _ArrayT,
+    indices: list[int]
+    | list[int | None]
+    | lib.Int16Array
+    | lib.Int32Array
+    | lib.Int64Array
+    | lib.ChunkedArray[lib.Int16Scalar]
+    | lib.ChunkedArray[lib.Int32Scalar]
+    | lib.ChunkedArray[lib.Int64Scalar],
+    /,
+    *,
+    boundscheck: bool = True,
+    options: TakeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT: ...
+@overload
+def array_take(
+    array: Expression,
+    indices: list[int]
+    | list[int | None]
+    | lib.Int16Array
+    | lib.Int32Array
+    | lib.Int64Array
+    | lib.ChunkedArray[lib.Int16Scalar]
+    | lib.ChunkedArray[lib.Int32Scalar]
+    | lib.ChunkedArray[lib.Int64Scalar],
+    /,
+    *,
+    boundscheck: bool = True,
+    options: TakeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def drop_null(input: _ArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT: ...
+@overload
+def drop_null(
+    input: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+
+filter = array_filter
+take = array_take
+"""
+Select values (or records) from array- or table-like data given integer
+selection indices.
+
+The result will be of the same type(s) as the input, with elements taken
+from the input array (or record batch / table fields) at the given
+indices. If an index is null then the corresponding value in the output
+will be null.
+
+Parameters
+----------
+data : Array, ChunkedArray, RecordBatch, or Table
+indices : Array, ChunkedArray
+    Must be of integer type
+boundscheck : boolean, default True
+    Whether to boundscheck the indices. If False and there is an out of
+    bounds index, will likely cause the process to crash.
+memory_pool : MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+
+Returns
+-------
+result : depends on inputs
+    Selected values for the given indices
+
+Examples
+--------
+>>> import pyarrow as pa
+>>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+>>> indices = pa.array([0, None, 4, 3])
+>>> arr.take(indices)
+<pyarrow.lib.StringArray object at ...>
+[
+    "a",
+    null,
+    "e",
+    null
+]
+"""
+
+# ========================= 3.4 Containment tests  =========================
+@overload
+def indices_nonzero(
+    values: lib.BooleanArray
+    | lib.NullArray
+    | NumericArray
+    | lib.Decimal128Array
+    | lib.Decimal256Array,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def indices_nonzero(
+    values: Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def indices_nonzero(*args, **kwargs):
+    """
+    Return the indices of the values in the array that are non-zero.
+
+    For each input value, check if it's zero, false or null. Emit the index
+    of the value in the array if it's none of the those.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3.5 Sorts and partitions  =========================
+@overload
+def array_sort_indices(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    order: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: ArraySortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def array_sort_indices(
+    array: Expression,
+    /,
+    order: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: ArraySortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def array_sort_indices(*args, **kwargs):
+    """
+    Return the indices that would sort an array.
+
+    This function computes an array of indices that define a stable sort
+    of the input array.  By default, Null values are considered greater
+    than any other value and are therefore sorted at the end of the array.
+    For floating-point types, NaNs are considered greater than any
+    other non-null value, but smaller than null values.
+
+    The handling of nulls and NaNs can be changed in ArraySortOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    order : str, default "ascending"
+        Which order to sort values in.
+        Accepted values are "ascending", "descending".
+    null_placement : str, default "at_end"
+        Where nulls in the input should be sorted.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.ArraySortOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def partition_nth_indices(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    pivot: int,
+    *,
+    null_placement: _Placement = "at_end",
+    options: PartitionNthOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def partition_nth_indices(
+    array: Expression,
+    /,
+    pivot: int,
+    *,
+    null_placement: _Placement = "at_end",
+    options: PartitionNthOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def partition_nth_indices(*args, **kwargs):
+    """
+    Return the indices that would partition an array around a pivot.
+
+    This functions computes an array of indices that define a non-stable
+    partial sort of the input array.
+
+    The output is such that the `N`'th index points to the `N`'th element
+    of the input in sorted order, and all indices before the `N`'th point
+    to elements in the input less or equal to elements at or after the `N`'th.
+
+    By default, null values are considered greater than any other value
+    and are therefore partitioned towards the end of the array.
+    For floating-point types, NaNs are considered greater than any
+    other non-null value, but smaller than null values.
+
+    The pivot index `N` must be given in PartitionNthOptions.
+    The handling of nulls and NaNs can also be changed in PartitionNthOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    pivot : int
+        Index into the equivalent sorted array of the pivot element.
+    null_placement : str, default "at_end"
+        Where nulls in the input should be partitioned.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.PartitionNthOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def rank(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    sort_keys: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    options: RankOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array:
+    """
+    Compute ordinal ranks of an array (1-based).
+
+    This function computes a rank of the input array.
+    By default, null values are considered greater than any other value and
+    are therefore sorted at the end of the input. For floating-point types,
+    NaNs are considered greater than any other non-null value, but smaller
+    than null values. The default tiebreaker is to assign ranks in order of
+    when ties appear in the input.
+
+    The handling of nulls, NaNs and tiebreakers can be changed in RankOptions.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    tiebreaker : str, default "first"
+        Configure how ties between equal values are handled.
+        Accepted values are:
+
+        - "min": Ties get the smallest possible rank in sorted order.
+        - "max": Ties get the largest possible rank in sorted order.
+        - "first": Ranks are assigned in order of when ties appear in the
+                   input. This ensures the ranks are a stable permutation
+                   of the input.
+        - "dense": The ranks span a dense [1, M] interval where M is the
+                   number of distinct values in the input.
+    options : pyarrow.compute.RankOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def select_k_unstable(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    k: int,
+    sort_keys: list[tuple[str, _Order]],
+    *,
+    options: SelectKOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def select_k_unstable(
+    input: Expression,
+    /,
+    k: int,
+    sort_keys: list[tuple[str, _Order]],
+    *,
+    options: SelectKOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def select_k_unstable(*args, **kwargs):
+    """
+    Select the indices of the first `k` ordered elements from the input.
+
+    This function selects an array of indices of the first `k` ordered elements
+    from the `input` array, record batch or table specified in the column keys
+    (`options.sort_keys`). Output is not guaranteed to be stable.
+    Null values are considered greater than any other value and are
+    therefore ordered at the end. For floating-point types, NaNs are considered
+    greater than any other non-null value, but smaller than null values.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    k : int
+        Number of leading values to select in sorted order
+        (i.e. the largest values if sort order is "descending",
+        the smallest otherwise).
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    options : pyarrow.compute.SelectKOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def sort_indices(
+    input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    /,
+    sort_keys: Sequence[tuple[str, _Order]] = (),
+    *,
+    null_placement: _Placement = "at_end",
+    options: SortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def sort_indices(
+    input: Expression,
+    /,
+    sort_keys: Sequence[tuple[str, _Order]] = (),
+    *,
+    null_placement: _Placement = "at_end",
+    options: SortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def sort_indices(*args, **kwargs):
+    """
+    Return the indices that would sort an array, record batch or table.
+
+    This function computes an array of indices that define a stable sort
+    of the input array, record batch or table.  By default, null values are
+    considered greater than any other value and are therefore sorted at the
+    end of the input. For floating-point types, NaNs are considered greater
+    than any other non-null value, but smaller than null values.
+
+    The handling of nulls and NaNs can be changed in SortOptions.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted, only applying to
+        columns/fields mentioned in `sort_keys`.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.SortOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3.6 Structural transforms =========================
+@overload
+def list_element(
+    lists: Expression, index: ScalarLike, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def list_element(
+    lists: lib.Array[ListScalar[_DataTypeT]],
+    index: ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array[lib.Scalar[_DataTypeT]]: ...
+@overload
+def list_element(
+    lists: lib.ChunkedArray[ListScalar[_DataTypeT]],
+    index: ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ChunkedArray[lib.Scalar[_DataTypeT]]: ...
+@overload
+def list_element(
+    lists: ListScalar[_DataTypeT],
+    index: ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _DataTypeT: ...
+def list_element(*args, **kwargs):
+    """
+    Compute elements using of nested list values using an index.
+
+    `lists` must have a list-like type.
+    For each value in each list of `lists`, the element at `index`
+    is emitted. Null values emit a null in the output.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    index : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def list_flatten(
+    lists: Expression,
+    /,
+    recursive: bool = False,
+    *,
+    options: ListFlattenOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def list_flatten(
+    lists: ArrayOrChunkedArray[ListScalar[Any]],
+    /,
+    recursive: bool = False,
+    *,
+    options: ListFlattenOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any]: ...
+def list_flatten(*args, **kwargs):
+    """
+    Flatten list values.
+
+    `lists` must have a list-like type (lists, list-views, and
+    fixed-size lists).
+    Return an array with the top list level flattened unless
+    `recursive` is set to true in ListFlattenOptions. When that
+    is that case, flattening happens recursively until a non-list
+    array is formed.
+
+    Null list values do not emit anything to the output.
+
+    Parameters
+    ----------
+    lists : Array-like
+        Argument to compute function.
+    recursive : bool, default False
+        When True, the list array is flattened recursively until an array
+        of non-list values is formed.
+    options : pyarrow.compute.ListFlattenOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def list_parent_indices(
+    lists: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+@overload
+def list_parent_indices(
+    lists: ArrayOrChunkedArray[Any], /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Array: ...
+def list_parent_indices(*args, **kwargs):
+    """
+    Compute parent indices of nested list values.
+
+    `lists` must have a list-like or list-view type.
+    For each value in each list of `lists`, the top-level list index
+    is emitted.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+@overload
+def list_slice(
+    lists: Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    return_fixed_size_list: bool | None = None,
+    *,
+    options: ListSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def list_slice(
+    lists: ArrayOrChunkedArray[Any],
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    return_fixed_size_list: bool | None = None,
+    *,
+    options: ListSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any]: ...
+def list_slice(*args, **kwargs):
+    """
+    Compute slice of list-like array.
+
+    `lists` must have a list-like type.
+    For each list element, compute a slice, returning a new list array.
+    A variable or fixed size list array is returned, depending on options.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing inner list elements (inclusive).
+    stop : Optional[int], default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end. (NotImplemented)
+    step : int, default 1
+        Slice step.
+    return_fixed_size_list : Optional[bool], default None
+        Whether to return a FixedSizeListArray. If true _and_ stop is after
+        a list element's length, nulls will be appended to create the
+        requested slice size. The default of `None` will return the same
+        type which was passed in.
+    options : pyarrow.compute.ListSliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def map_lookup(
+    container,
+    /,
+    query_key,
+    occurrence: str,
+    *,
+    options: MapLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+):
+    """
+    Find the items corresponding to a given key in a Map.
+
+    For a given query key (passed via MapLookupOptions), extract
+    either the FIRST, LAST or ALL items from a Map that have
+    matching keys.
+
+    Parameters
+    ----------
+    container : Array-like or scalar-like
+        Argument to compute function.
+    query_key : Scalar or Object can be converted to Scalar
+        The key to search for.
+    occurrence : str
+        The occurrence(s) to return from the Map
+        Accepted values are "first", "last", or "all".
+    options : pyarrow.compute.MapLookupOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def struct_field(
+    values,
+    /,
+    indices,
+    *,
+    options: StructFieldOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+):
+    """
+    Extract children of a struct or union by index.
+
+    Given a list of indices (passed via StructFieldOptions), extract
+    the child array or scalar with the given child index, recursively.
+
+    For union inputs, nulls are emitted for union values that reference
+    a different child than specified. Also, the indices are always
+    in physical order, not logical type codes - for example, the first
+    child is always index 0.
+
+    An empty list of indices returns the argument unchanged.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
+        List of indices for chained field lookup, for example `[4, 1]`
+        will look up the second nested field in the fifth outer field.
+    options : pyarrow.compute.StructFieldOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def fill_null_backward(values, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Carry non-null values backward to fill null slots.
+
+    Given an array, propagate next valid observation backward to previous valid
+    or nothing if all next values are null.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def fill_null_forward(values, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Carry non-null values forward to fill null slots.
+
+    Given an array, propagate last valid observation forward to next valid
+    or nothing if all previous values are null.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def replace_with_mask(
+    values,
+    mask: list[bool] | list[bool | None] | BooleanArray,
+    replacements,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+):
+    """
+    Replace items selected with a mask.
+
+    Given an array and a boolean mask (either scalar or of equal length),
+    along with replacement values (either scalar or array),
+    each element of the array for which the corresponding mask element is
+    true will be replaced by the next value from the replacements,
+    or with null if the mask is null.
+    Hence, for replacement arrays, len(replacements) == sum(mask == true).
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    mask : Array-like
+        Argument to compute function.
+    replacements : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3.7 Pairwise functions =========================
+@overload
+def pairwise_diff(
+    input: _NumericOrTemporalArrayT,
+    /,
+    period: int = 1,
+    *,
+    options: PairwiseOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT: ...
+@overload
+def pairwise_diff(
+    input: Expression,
+    /,
+    period: int = 1,
+    *,
+    options: PairwiseOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def pairwise_diff(*args, **kwargs):
+    """
+    Compute first order difference of an array.
+
+    Computes the first order difference of an array, It internally calls
+    the scalar function "subtract" to compute
+     differences, so its
+    behavior and supported types are the same as
+    "subtract". The period can be specified in :struct:`PairwiseOptions`.
+
+    Results will wrap around on integer overflow. Use function
+    "pairwise_diff_checked" if you want overflow to return an error.
+
+    Parameters
+    ----------
+    input : Array-like
+        Argument to compute function.
+    period : int, default 1
+        Period for applying the period function.
+    options : pyarrow.compute.PairwiseOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+pairwise_diff_checked = _clone_signature(pairwise_diff)
+"""
+Compute first order difference of an array.
+
+Computes the first order difference of an array, It internally calls
+the scalar function "subtract_checked" (or the checked variant) to compute
+differences, so its behavior and supported types are the same as
+"subtract_checked". The period can be specified in :struct:`PairwiseOptions`.
+
+This function returns an error on overflow. For a variant that doesn't
+fail on overflow, use function "pairwise_diff".
+
+Parameters
+----------
+input : Array-like
+    Argument to compute function.
+period : int, default 1
+    Period for applying the period function.
+options : pyarrow.compute.PairwiseOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
diff --git a/python/pyarrow/csv.pyi b/python/pyarrow/csv.pyi
new file mode 100644
index 00000000000..510229d7e72
--- /dev/null
+++ b/python/pyarrow/csv.pyi
@@ -0,0 +1,27 @@
+from pyarrow._csv import (
+    ISO8601,
+    ConvertOptions,
+    CSVStreamingReader,
+    CSVWriter,
+    InvalidRow,
+    ParseOptions,
+    ReadOptions,
+    WriteOptions,
+    open_csv,
+    read_csv,
+    write_csv,
+)
+
+__all__ = [
+    "ISO8601",
+    "ConvertOptions",
+    "CSVStreamingReader",
+    "CSVWriter",
+    "InvalidRow",
+    "ParseOptions",
+    "ReadOptions",
+    "WriteOptions",
+    "open_csv",
+    "read_csv",
+    "write_csv",
+]
diff --git a/python/pyarrow/cuda.pyi b/python/pyarrow/cuda.pyi
new file mode 100644
index 00000000000..e11baf7d4e7
--- /dev/null
+++ b/python/pyarrow/cuda.pyi
@@ -0,0 +1,25 @@
+from pyarrow._cuda import (
+    BufferReader,
+    BufferWriter,
+    Context,
+    CudaBuffer,
+    HostBuffer,
+    IpcMemHandle,
+    new_host_buffer,
+    read_message,
+    read_record_batch,
+    serialize_record_batch,
+)
+
+__all__ = [
+    "BufferReader",
+    "BufferWriter",
+    "Context",
+    "CudaBuffer",
+    "HostBuffer",
+    "IpcMemHandle",
+    "new_host_buffer",
+    "read_message",
+    "read_record_batch",
+    "serialize_record_batch",
+]
diff --git a/python/pyarrow/dataset.pyi b/python/pyarrow/dataset.pyi
new file mode 100644
index 00000000000..98f1a38aa85
--- /dev/null
+++ b/python/pyarrow/dataset.pyi
@@ -0,0 +1,229 @@
+from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
+
+from _typeshed import StrPath
+from pyarrow._dataset import (
+    CsvFileFormat,
+    CsvFragmentScanOptions,
+    Dataset,
+    DatasetFactory,
+    DirectoryPartitioning,
+    FeatherFileFormat,
+    FileFormat,
+    FileFragment,
+    FilenamePartitioning,
+    FileSystemDataset,
+    FileSystemDatasetFactory,
+    FileSystemFactoryOptions,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    HivePartitioning,
+    InMemoryDataset,
+    IpcFileFormat,
+    IpcFileWriteOptions,
+    JsonFileFormat,
+    JsonFragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+    Scanner,
+    TaggedRecordBatch,
+    UnionDataset,
+    UnionDatasetFactory,
+    WrittenFile,
+    get_partition_keys,
+)
+from pyarrow._dataset_orc import OrcFileFormat
+from pyarrow._dataset_parquet import (
+    ParquetDatasetFactory,
+    ParquetFactoryOptions,
+    ParquetFileFormat,
+    ParquetFileFragment,
+    ParquetFileWriteOptions,
+    ParquetFragmentScanOptions,
+    ParquetReadOptions,
+    RowGroupInfo,
+)
+from pyarrow._dataset_parquet_encryption import (
+    ParquetDecryptionConfig,
+    ParquetEncryptionConfig,
+)
+from pyarrow.compute import Expression, field, scalar
+from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
+
+from ._fs import SupportedFileSystem
+
+_orc_available: bool
+_parquet_available: bool
+
+__all__ = [
+    "CsvFileFormat",
+    "CsvFragmentScanOptions",
+    "Dataset",
+    "DatasetFactory",
+    "DirectoryPartitioning",
+    "FeatherFileFormat",
+    "FileFormat",
+    "FileFragment",
+    "FilenamePartitioning",
+    "FileSystemDataset",
+    "FileSystemDatasetFactory",
+    "FileSystemFactoryOptions",
+    "FileWriteOptions",
+    "Fragment",
+    "FragmentScanOptions",
+    "HivePartitioning",
+    "InMemoryDataset",
+    "IpcFileFormat",
+    "IpcFileWriteOptions",
+    "JsonFileFormat",
+    "JsonFragmentScanOptions",
+    "Partitioning",
+    "PartitioningFactory",
+    "Scanner",
+    "TaggedRecordBatch",
+    "UnionDataset",
+    "UnionDatasetFactory",
+    "WrittenFile",
+    "get_partition_keys",
+    # Orc
+    "OrcFileFormat",
+    # Parquet
+    "ParquetDatasetFactory",
+    "ParquetFactoryOptions",
+    "ParquetFileFormat",
+    "ParquetFileFragment",
+    "ParquetFileWriteOptions",
+    "ParquetFragmentScanOptions",
+    "ParquetReadOptions",
+    "RowGroupInfo",
+    # Parquet Encryption
+    "ParquetDecryptionConfig",
+    "ParquetEncryptionConfig",
+    # Compute
+    "Expression",
+    "field",
+    "scalar",
+    # Dataset
+    "partitioning",
+    "parquet_dataset",
+    "write_dataset",
+]
+
+_DatasetFormat: TypeAlias = Literal["parquet", "ipc", "arrow", "feather", "csv"]
+
+@overload
+def partitioning(
+    schema: Schema,
+) -> Partitioning: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["filename"],
+    dictionaries: dict[str, Array] | None = None,
+) -> Partitioning: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["filename"],
+    dictionaries: Literal["infer"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    field_names: list[str],
+    *,
+    flavor: Literal["filename"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["hive"],
+    dictionaries: Literal["infer"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    *,
+    flavor: Literal["hive"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["hive"],
+    dictionaries: dict[str, Array] | None = None,
+) -> Partitioning: ...
+def parquet_dataset(
+    metadata_path: StrPath,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    format: ParquetFileFormat | None = None,
+    partitioning: Partitioning | PartitioningFactory | None = None,
+    partition_base_dir: str | None = None,
+) -> FileSystemDataset: ...
+@overload
+def dataset(
+    source: StrPath | Sequence[StrPath],
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> FileSystemDataset: ...
+@overload
+def dataset(
+    source: list[Dataset],
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> UnionDataset: ...
+@overload
+def dataset(
+    source: Iterable[RecordBatch] | Iterable[Table] | RecordBatchReader,
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> InMemoryDataset: ...
+@overload
+def dataset(
+    source: RecordBatch | Table,
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> InMemoryDataset: ...
+def write_dataset(
+    data: Dataset | Table | RecordBatch | RecordBatchReader | list[Table] | Iterable[RecordBatch],
+    base_dir: StrPath,
+    *,
+    basename_template: str | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    partitioning: Partitioning | list[str] | None = None,
+    partitioning_flavor: str | None = None,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    file_options: FileWriteOptions | None = None,
+    use_threads: bool = True,
+    max_partitions: int = 1024,
+    max_open_files: int = 1024,
+    max_rows_per_file: int = 0,
+    min_rows_per_group: int = 0,
+    max_rows_per_group: int = 1024 * 1024,
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"] = "error",
+    create_dir: bool = True,
+): ...
diff --git a/python/pyarrow/feather.pyi b/python/pyarrow/feather.pyi
new file mode 100644
index 00000000000..9451ee15763
--- /dev/null
+++ b/python/pyarrow/feather.pyi
@@ -0,0 +1,50 @@
+from typing import IO, Literal
+
+import pandas as pd
+
+from _typeshed import StrPath
+from pyarrow._feather import FeatherError
+from pyarrow.lib import Table
+
+__all__ = [
+    "FeatherError",
+    "FeatherDataset",
+    "check_chunked_overflow",
+    "write_feather",
+    "read_feather",
+    "read_table",
+]
+
+class FeatherDataset:
+    path_or_paths: str | list[str]
+    validate_schema: bool
+
+    def __init__(self, path_or_paths: str | list[str], validate_schema: bool = True) -> None: ...
+    def read_table(self, columns: list[str] | None = None) -> Table: ...
+    def validate_schemas(self, piece, table: Table) -> None: ...
+    def read_pandas(
+        self, columns: list[str] | None = None, use_threads: bool = True
+    ) -> pd.DataFrame: ...
+
+def check_chunked_overflow(name: str, col) -> None: ...
+def write_feather(
+    df: pd.DataFrame | Table,
+    dest: StrPath | IO,
+    compression: Literal["zstd", "lz4", "uncompressed"] | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: Literal[1, 2] = 2,
+) -> None: ...
+def read_feather(
+    source: StrPath | IO,
+    columns: list[str] | None = None,
+    use_threads: bool = True,
+    memory_map: bool = False,
+    **kwargs,
+) -> pd.DataFrame: ...
+def read_table(
+    source: StrPath | IO,
+    columns: list[str] | None = None,
+    memory_map: bool = False,
+    use_threads: bool = True,
+) -> Table: ...
diff --git a/python/pyarrow/flight.pyi b/python/pyarrow/flight.pyi
new file mode 100644
index 00000000000..9b806ccf305
--- /dev/null
+++ b/python/pyarrow/flight.pyi
@@ -0,0 +1,95 @@
+from pyarrow._flight import (
+    Action,
+    ActionType,
+    BasicAuth,
+    CallInfo,
+    CertKeyPair,
+    ClientAuthHandler,
+    ClientMiddleware,
+    ClientMiddlewareFactory,
+    DescriptorType,
+    FlightCallOptions,
+    FlightCancelledError,
+    FlightClient,
+    FlightDataStream,
+    FlightDescriptor,
+    FlightEndpoint,
+    FlightError,
+    FlightInfo,
+    FlightInternalError,
+    FlightMetadataReader,
+    FlightMetadataWriter,
+    FlightMethod,
+    FlightServerBase,
+    FlightServerError,
+    FlightStreamChunk,
+    FlightStreamReader,
+    FlightStreamWriter,
+    FlightTimedOutError,
+    FlightUnauthenticatedError,
+    FlightUnauthorizedError,
+    FlightUnavailableError,
+    FlightWriteSizeExceededError,
+    GeneratorStream,
+    Location,
+    MetadataRecordBatchReader,
+    MetadataRecordBatchWriter,
+    RecordBatchStream,
+    Result,
+    SchemaResult,
+    ServerAuthHandler,
+    ServerCallContext,
+    ServerMiddleware,
+    ServerMiddlewareFactory,
+    Ticket,
+    TracingServerMiddlewareFactory,
+    connect,
+)
+
+__all__ = [
+    "Action",
+    "ActionType",
+    "BasicAuth",
+    "CallInfo",
+    "CertKeyPair",
+    "ClientAuthHandler",
+    "ClientMiddleware",
+    "ClientMiddlewareFactory",
+    "DescriptorType",
+    "FlightCallOptions",
+    "FlightCancelledError",
+    "FlightClient",
+    "FlightDataStream",
+    "FlightDescriptor",
+    "FlightEndpoint",
+    "FlightError",
+    "FlightInfo",
+    "FlightInternalError",
+    "FlightMetadataReader",
+    "FlightMetadataWriter",
+    "FlightMethod",
+    "FlightServerBase",
+    "FlightServerError",
+    "FlightStreamChunk",
+    "FlightStreamReader",
+    "FlightStreamWriter",
+    "FlightTimedOutError",
+    "FlightUnauthenticatedError",
+    "FlightUnauthorizedError",
+    "FlightUnavailableError",
+    "FlightWriteSizeExceededError",
+    "GeneratorStream",
+    "Location",
+    "MetadataRecordBatchReader",
+    "MetadataRecordBatchWriter",
+    "RecordBatchStream",
+    "Result",
+    "SchemaResult",
+    "ServerAuthHandler",
+    "ServerCallContext",
+    "ServerMiddleware",
+    "ServerMiddlewareFactory",
+    "Ticket",
+    "TracingServerMiddlewareFactory",
+    "connect",
+]
diff --git a/python/pyarrow/fs.pyi b/python/pyarrow/fs.pyi
new file mode 100644
index 00000000000..6bf75616c13
--- /dev/null
+++ b/python/pyarrow/fs.pyi
@@ -0,0 +1,77 @@
+from pyarrow._fs import (  # noqa
+    FileSelector,
+    FileType,
+    FileInfo,
+    FileSystem,
+    LocalFileSystem,
+    SubTreeFileSystem,
+    _MockFileSystem,
+    FileSystemHandler,
+    PyFileSystem,
+    SupportedFileSystem,
+)
+from pyarrow._azurefs import AzureFileSystem
+from pyarrow._hdfs import HadoopFileSystem
+from pyarrow._gcsfs import GcsFileSystem
+from pyarrow._s3fs import (  # noqa
+    AwsDefaultS3RetryStrategy,
+    AwsStandardS3RetryStrategy,
+    S3FileSystem,
+    S3LogLevel,
+    S3RetryStrategy,
+    ensure_s3_initialized,
+    finalize_s3,
+    ensure_s3_finalized,
+    initialize_s3,
+    resolve_s3_region,
+)
+
+FileStats = FileInfo
+
+def copy_files(
+    source: str,
+    destination: str,
+    source_filesystem: SupportedFileSystem | None = None,
+    destination_filesystem: SupportedFileSystem | None = None,
+    *,
+    chunk_size: int = 1024 * 1024,
+    use_threads: bool = True,
+) -> None: ...
+
+class FSSpecHandler(FileSystemHandler):  # type: ignore[misc]
+    fs: SupportedFileSystem
+    def __init__(self, fs: SupportedFileSystem) -> None: ...
+
+__all__ = [
+    # _fs
+    "FileSelector",
+    "FileType",
+    "FileInfo",
+    "FileSystem",
+    "LocalFileSystem",
+    "SubTreeFileSystem",
+    "_MockFileSystem",
+    "FileSystemHandler",
+    "PyFileSystem",
+    # _azurefs
+    "AzureFileSystem",
+    # _hdfs
+    "HadoopFileSystem",
+    # _gcsfs
+    "GcsFileSystem",
+    # _s3fs
+    "AwsDefaultS3RetryStrategy",
+    "AwsStandardS3RetryStrategy",
+    "S3FileSystem",
+    "S3LogLevel",
+    "S3RetryStrategy",
+    "ensure_s3_initialized",
+    "finalize_s3",
+    "ensure_s3_finalized",
+    "initialize_s3",
+    "resolve_s3_region",
+    # fs
+    "FileStats",
+    "copy_files",
+    "FSSpecHandler",
+]
diff --git a/python/pyarrow/gandiva.pyi b/python/pyarrow/gandiva.pyi
new file mode 100644
index 00000000000..a344f885b29
--- /dev/null
+++ b/python/pyarrow/gandiva.pyi
@@ -0,0 +1,65 @@
+from typing import Iterable, Literal
+
+from .lib import Array, DataType, Field, MemoryPool, RecordBatch, Schema, _Weakrefable
+
+class Node(_Weakrefable):
+    def return_type(self) -> DataType: ...
+
+class Expression(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+class Condition(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+class SelectionVector(_Weakrefable):
+    def to_array(self) -> Array: ...
+
+class Projector(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+    def evaluate(
+        self, batch: RecordBatch, selection: SelectionVector | None = None
+    ) -> list[Array]: ...
+
+class Filter(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+    def evaluate(
+        self, batch: RecordBatch, pool: MemoryPool, dtype: DataType | str = "int32"
+    ) -> SelectionVector: ...
+
+class TreeExprBuilder(_Weakrefable):
+    def make_literal(self, value: float | str | bytes | bool, dtype: DataType) -> Node: ...
+    def make_expression(self, root_node: Node, return_field: Field) -> Expression: ...
+    def make_function(self, name: str, children: list[Node], return_type: DataType) -> Node: ...
+    def make_field(self, field: Field) -> Node: ...
+    def make_if(
+        self, condition: Node, this_node: Node, else_node: Node, return_type: DataType
+    ) -> Node: ...
+    def make_and(self, children: list[Node]) -> Node: ...
+    def make_or(self, children: list[Node]) -> Node: ...
+    def make_in_expression(self, node: Node, values: Iterable, dtype: DataType) -> Node: ...
+    def make_condition(self, condition: Node) -> Condition: ...
+
+class Configuration(_Weakrefable):
+    def __init__(self, optimize: bool = True, dump_ir: bool = False) -> None: ...
+
+def make_projector(
+    schema: Schema,
+    children: list[Expression],
+    pool: MemoryPool,
+    selection_mode: Literal["NONE", "UINT16", "UINT32", "UINT64"] = "NONE",
+    configuration: Configuration | None = None,
+) -> Projector: ...
+def make_filter(
+    schema: Schema, condition: Condition, configuration: Configuration | None = None
+) -> Filter: ...
+
+class FunctionSignature(_Weakrefable):
+    def return_type(self) -> DataType: ...
+    def param_types(self) -> list[DataType]: ...
+    def name(self) -> str: ...
+
+def get_registered_function_signatures() -> list[FunctionSignature]: ...
diff --git a/python/pyarrow/interchange/__init__.pyi b/python/pyarrow/interchange/__init__.pyi
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pyarrow/interchange/buffer.pyi b/python/pyarrow/interchange/buffer.pyi
new file mode 100644
index 00000000000..46673961a75
--- /dev/null
+++ b/python/pyarrow/interchange/buffer.pyi
@@ -0,0 +1,58 @@
+import enum
+
+from pyarrow.lib import Buffer
+
+class DlpackDeviceType(enum.IntEnum):
+    """Integer enum for device type codes matching DLPack."""
+
+    CPU = 1
+    CUDA = 2
+    CPU_PINNED = 3
+    OPENCL = 4
+    VULKAN = 7
+    METAL = 8
+    VPI = 9
+    ROCM = 10
+
+class _PyArrowBuffer:
+    """
+    Data in the buffer is guaranteed to be contiguous in memory.
+
+    Note that there is no dtype attribute present, a buffer can be thought of
+    as simply a block of memory. However, if the column that the buffer is
+    attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
+    implemented, then that dtype information will be contained in the return
+    value from ``__dlpack__``.
+
+    This distinction is useful to support both data exchange via DLPack on a
+    buffer and (b) dtypes like variable-length strings which do not have a
+    fixed number of bytes per element.
+    """
+    def __init__(self, x: Buffer, allow_copy: bool = True) -> None: ...
+    @property
+    def bufsize(self) -> int:
+        """
+        Buffer size in bytes.
+        """
+    @property
+    def ptr(self) -> int:
+        """
+        Pointer to start of the buffer as an integer.
+        """
+    def __dlpack__(self):
+        """
+        Produce DLPack capsule (see array API standard).
+
+        Raises:
+            - TypeError : if the buffer contains unsupported dtypes.
+            - NotImplementedError : if DLPack support is not implemented
+
+        Useful to have to connect to array libraries. Support optional because
+        it's not completely trivial to implement for a Python-only library.
+        """
+    def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]:
+        """
+        Device type and device ID for where the data in the buffer resides.
+        Uses device type codes matching DLPack.
+        Note: must be implemented even if ``__dlpack__`` is not.
+        """
diff --git a/python/pyarrow/interchange/column.pyi b/python/pyarrow/interchange/column.pyi
new file mode 100644
index 00000000000..e6662867b6b
--- /dev/null
+++ b/python/pyarrow/interchange/column.pyi
@@ -0,0 +1,252 @@
+import enum
+
+from typing import Any, Iterable, TypeAlias, TypedDict
+
+from pyarrow.lib import Array, ChunkedArray
+
+from .buffer import _PyArrowBuffer
+
+class DtypeKind(enum.IntEnum):
+    """
+    Integer enum for data types.
+
+    Attributes
+    ----------
+    INT : int
+        Matches to signed integer data type.
+    UINT : int
+        Matches to unsigned integer data type.
+    FLOAT : int
+        Matches to floating point data type.
+    BOOL : int
+        Matches to boolean data type.
+    STRING : int
+        Matches to string data type (UTF-8 encoded).
+    DATETIME : int
+        Matches to datetime data type.
+    CATEGORICAL : int
+        Matches to categorical data type.
+    """
+
+    INT = 0
+    UINT = 1
+    FLOAT = 2
+    BOOL = 20
+    STRING = 21  # UTF-8
+    DATETIME = 22
+    CATEGORICAL = 23
+
+Dtype: TypeAlias = tuple[DtypeKind, int, str, str]
+
+class ColumnNullType(enum.IntEnum):
+    """
+    Integer enum for null type representation.
+
+    Attributes
+    ----------
+    NON_NULLABLE : int
+        Non-nullable column.
+    USE_NAN : int
+        Use explicit float NaN value.
+    USE_SENTINEL : int
+        Sentinel value besides NaN.
+    USE_BITMASK : int
+        The bit is set/unset representing a null on a certain position.
+    USE_BYTEMASK : int
+        The byte is set/unset representing a null on a certain position.
+    """
+
+    NON_NULLABLE = 0
+    USE_NAN = 1
+    USE_SENTINEL = 2
+    USE_BITMASK = 3
+    USE_BYTEMASK = 4
+
+class ColumnBuffers(TypedDict):
+    data: tuple[_PyArrowBuffer, Dtype]
+    validity: tuple[_PyArrowBuffer, Dtype] | None
+    offsets: tuple[_PyArrowBuffer, Dtype] | None
+
+class CategoricalDescription(TypedDict):
+    is_ordered: bool
+    is_dictionary: bool
+    categories: _PyArrowColumn | None
+
+class Endianness(enum.Enum):
+    LITTLE = "<"
+    BIG = ">"
+    NATIVE = "="
+    NA = "|"
+
+class NoBufferPresent(Exception):
+    """Exception to signal that there is no requested buffer."""
+
+class _PyArrowColumn:
+    """
+    A column object, with only the methods and properties required by the
+    interchange protocol defined.
+
+    A column can contain one or more chunks. Each chunk can contain up to three
+    buffers - a data buffer, a mask buffer (depending on null representation),
+    and an offsets buffer (if variable-size binary; e.g., variable-length
+    strings).
+
+    TBD: Arrow has a separate "null" dtype, and has no separate mask concept.
+         Instead, it seems to use "children" for both columns with a bit mask,
+         and for nested dtypes. Unclear whether this is elegant or confusing.
+         This design requires checking the null representation explicitly.
+
+         The Arrow design requires checking:
+         1. the ARROW_FLAG_NULLABLE (for sentinel values)
+         2. if a column has two children, combined with one of those children
+            having a null dtype.
+
+         Making the mask concept explicit seems useful. One null dtype would
+         not be enough to cover both bit and byte masks, so that would mean
+         even more checking if we did it the Arrow way.
+
+    TBD: there's also the "chunk" concept here, which is implicit in Arrow as
+         multiple buffers per array (= column here). Semantically it may make
+         sense to have both: chunks were meant for example for lazy evaluation
+         of data which doesn't fit in memory, while multiple buffers per column
+         could also come from doing a selection operation on a single
+         contiguous buffer.
+
+         Given these concepts, one would expect chunks to be all of the same
+         size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),
+         while multiple buffers could have data-dependent lengths. Not an issue
+         in pandas if one column is backed by a single NumPy array, but in
+         Arrow it seems possible.
+         Are multiple chunks *and* multiple buffers per column necessary for
+         the purposes of this interchange protocol, or must producers either
+         reuse the chunk concept for this or copy the data?
+
+    Note: this Column object can only be produced by ``__dataframe__``, so
+          doesn't need its own version or ``__column__`` protocol.
+    """
+    def __init__(self, column: Array | ChunkedArray, allow_copy: bool = True) -> None: ...
+    def size(self) -> int:
+        """
+        Size of the column, in elements.
+
+        Corresponds to DataFrame.num_rows() if column is a single chunk;
+        equal to size of this current chunk otherwise.
+
+        Is a method rather than a property because it may cause a (potentially
+        expensive) computation for some dataframe implementations.
+        """
+    @property
+    def offset(self) -> int:
+        """
+        Offset of first element.
+
+        May be > 0 if using chunks; for example for a column with N chunks of
+        equal size M (only the last chunk may be shorter),
+        ``offset = n * M``, ``n = 0 .. N-1``.
+        """
+    @property
+    def dtype(self) -> tuple[DtypeKind, int, str, str]:
+        """
+        Dtype description as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+
+        Bit-width : the number of bits as an integer
+        Format string : data type description format string in Apache Arrow C
+                        Data Interface format.
+        Endianness : current only native endianness (``=``) is supported
+
+        Notes:
+            - Kind specifiers are aligned with DLPack where possible (hence the
+              jump to 20, leave enough room for future extension)
+            - Masks must be specified as boolean with either bit width 1 (for
+              bit masks) or 8 (for byte masks).
+            - Dtype width in bits was preferred over bytes
+            - Endianness isn't too useful, but included now in case in the
+              future we need to support non-native endianness
+            - Went with Apache Arrow format strings over NumPy format strings
+              because they're more complete from a dataframe perspective
+            - Format strings are mostly useful for datetime specification, and
+              for categoricals.
+            - For categoricals, the format string describes the type of the
+              categorical in the data buffer. In case of a separate encoding of
+              the categorical (e.g. an integer to string mapping), this can
+              be derived from ``self.describe_categorical``.
+            - Data types not included: complex, Arrow-style null, binary,
+              decimal, and nested (list, struct, map, union) dtypes.
+        """
+    @property
+    def describe_categorical(self) -> CategoricalDescription:
+        """
+        If the dtype is categorical, there are two options:
+        - There are only values in the data buffer.
+        - There is a separate non-categorical Column encoding categorical
+          values.
+
+        Raises TypeError if the dtype is not categorical
+
+        Returns the dictionary with description on how to interpret the
+        data buffer:
+            - "is_ordered" : bool, whether the ordering of dictionary indices
+                             is semantically meaningful.
+            - "is_dictionary" : bool, whether a mapping of
+                                categorical values to other objects exists
+            - "categories" : Column representing the (implicit) mapping of
+                             indices to category values (e.g. an array of
+                             cat1, cat2, ...). None if not a dictionary-style
+                             categorical.
+
+        TBD: are there any other in-memory representations that are needed?
+        """
+    @property
+    def describe_null(self) -> tuple[ColumnNullType, Any]:
+        """
+        Return the missing value (or "null") representation the column dtype
+        uses, as a tuple ``(kind, value)``.
+
+        Value : if kind is "sentinel value", the actual value. If kind is a bit
+        mask or a byte mask, the value (0 or 1) indicating a missing value.
+        None otherwise.
+        """
+    @property
+    def null_count(self) -> int:
+        """
+        Number of null elements, if known.
+
+        Note: Arrow uses -1 to indicate "unknown", but None seems cleaner.
+        """
+    @property
+    def metadata(self) -> dict[str, Any]:
+        """
+        The metadata for the column. See `DataFrame.metadata` for more details.
+        """
+    def num_chunks(self) -> int:
+        """
+        Return the number of chunks the column consists of.
+        """
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[_PyArrowColumn]:
+        """
+        Return an iterator yielding the chunks.
+
+        See `DataFrame.get_chunks` for details on ``n_chunks``.
+        """
+    def get_buffers(self) -> ColumnBuffers:
+        """
+        Return a dictionary containing the underlying buffers.
+
+        The returned dictionary has the following contents:
+
+            - "data": a two-element tuple whose first element is a buffer
+                      containing the data and whose second element is the data
+                      buffer's associated dtype.
+            - "validity": a two-element tuple whose first element is a buffer
+                          containing mask values indicating missing data and
+                          whose second element is the mask value buffer's
+                          associated dtype. None if the null representation is
+                          not a bit or byte mask.
+            - "offsets": a two-element tuple whose first element is a buffer
+                         containing the offset values for variable-size binary
+                         data (e.g., variable-length strings) and whose second
+                         element is the offsets buffer's associated dtype. None
+                         if the data buffer does not have an associated offsets
+                         buffer.
+        """
diff --git a/python/pyarrow/interchange/dataframe.pyi b/python/pyarrow/interchange/dataframe.pyi
new file mode 100644
index 00000000000..526a58926a9
--- /dev/null
+++ b/python/pyarrow/interchange/dataframe.pyi
@@ -0,0 +1,102 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Any, Iterable, Sequence
+
+from pyarrow.interchange.column import _PyArrowColumn
+from pyarrow.lib import RecordBatch, Table
+
+class _PyArrowDataFrame:
+    """
+    A data frame class, with only the methods required by the interchange
+    protocol defined.
+
+    A "data frame" represents an ordered collection of named columns.
+    A column's "name" must be a unique string.
+    Columns may be accessed by name or by position.
+
+    This could be a public data frame class, or an object with the methods and
+    attributes defined on this DataFrame class could be returned from the
+    ``__dataframe__`` method of a public data frame class in a library adhering
+    to the dataframe interchange protocol specification.
+    """
+
+    def __init__(
+        self, df: Table | RecordBatch, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> None: ...
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame:
+        """
+        Construct a new exchange object, potentially changing the parameters.
+        ``nan_as_null`` is a keyword intended for the consumer to tell the
+        producer to overwrite null values in the data with ``NaN``.
+        It is intended for cases where the consumer does not support the bit
+        mask or byte mask that is the producer's native representation.
+        ``allow_copy`` is a keyword that defines whether or not the library is
+        allowed to make a copy of the data. For example, copying data would be
+        necessary if a library supports strided buffers, given that this
+        protocol specifies contiguous buffers.
+        """
+    @property
+    def metadata(self) -> dict[str, Any]:
+        """
+        The metadata for the data frame, as a dictionary with string keys. The
+        contents of `metadata` may be anything, they are meant for a library
+        to store information that it needs to, e.g., roundtrip losslessly or
+        for two implementations to share data that is not (yet) part of the
+        interchange protocol specification. For avoiding collisions with other
+        entries, please add name the keys with the name of the library
+        followed by a period and the desired name, e.g, ``pandas.indexcol``.
+        """
+    def num_columns(self) -> int:
+        """
+        Return the number of columns in the DataFrame.
+        """
+    def num_rows(self) -> int:
+        """
+        Return the number of rows in the DataFrame, if available.
+        """
+    def num_chunks(self) -> int:
+        """
+        Return the number of chunks the DataFrame consists of.
+        """
+    def column_names(self) -> Iterable[str]:
+        """
+        Return an iterator yielding the column names.
+        """
+    def get_column(self, i: int) -> _PyArrowColumn:
+        """
+        Return the column at the indicated position.
+        """
+    def get_column_by_name(self, name: str) -> _PyArrowColumn:
+        """
+        Return the column whose name is the indicated name.
+        """
+    def get_columns(self) -> Iterable[_PyArrowColumn]:
+        """
+        Return an iterator yielding the columns.
+        """
+    def select_columns(self, indices: Sequence[int]) -> Self:
+        """
+        Create a new DataFrame by selecting a subset of columns by index.
+        """
+    def select_columns_by_name(self, names: Sequence[str]) -> Self:
+        """
+        Create a new DataFrame by selecting a subset of columns by name.
+        """
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[Self]:
+        """
+        Return an iterator yielding the chunks.
+
+        By default (None), yields the chunks that the data is stored as by the
+        producer. If given, ``n_chunks`` must be a multiple of
+        ``self.num_chunks()``, meaning the producer must subdivide each chunk
+        before yielding it.
+
+        Note that the producer must ensure that all columns are chunked the
+        same way.
+        """
diff --git a/python/pyarrow/interchange/from_dataframe.pyi b/python/pyarrow/interchange/from_dataframe.pyi
new file mode 100644
index 00000000000..b04b6268975
--- /dev/null
+++ b/python/pyarrow/interchange/from_dataframe.pyi
@@ -0,0 +1,244 @@
+from typing import Any, Protocol, TypeAlias
+
+from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
+
+from .column import (
+    ColumnBuffers,
+    ColumnNullType,
+    Dtype,
+    DtypeKind,
+)
+
+class DataFrameObject(Protocol):
+    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True) -> Any: ...
+
+ColumnObject: TypeAlias = Any
+
+def from_dataframe(df: DataFrameObject, allow_copy=True) -> Table:
+    """
+    Build a ``pa.Table`` from any DataFrame supporting the interchange protocol.
+
+    Parameters
+    ----------
+    df : DataFrameObject
+        Object supporting the interchange protocol, i.e. `__dataframe__`
+        method.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Table
+
+    Examples
+    --------
+    >>> import pyarrow
+    >>> from pyarrow.interchange import from_dataframe
+
+    Convert a pandas dataframe to a pyarrow table:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "n_attendees": [100, 10, 1],
+    ...         "country": ["Italy", "Spain", "Slovenia"],
+    ...     }
+    ... )
+    >>> df
+       n_attendees   country
+    0          100     Italy
+    1           10     Spain
+    2            1  Slovenia
+    >>> from_dataframe(df)
+    pyarrow.Table
+    n_attendees: int64
+    country: large_string
+    ----
+    n_attendees: [[100,10,1]]
+    country: [["Italy","Spain","Slovenia"]]
+    """
+
+def protocol_df_chunk_to_pyarrow(df: DataFrameObject, allow_copy: bool = True) -> RecordBatch:
+    """
+    Convert interchange protocol chunk to ``pa.RecordBatch``.
+
+    Parameters
+    ----------
+    df : DataFrameObject
+        Object supporting the interchange protocol, i.e. `__dataframe__`
+        method.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.RecordBatch
+    """
+
+def column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
+    """
+    Convert a column holding one of the primitive dtypes to a PyArrow array.
+    A primitive type is one of: int, uint, float, bool (1 bit).
+
+    Parameters
+    ----------
+    col : ColumnObject
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Array
+    """
+
+def bool_column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
+    """
+    Convert a column holding boolean dtype to a PyArrow array.
+
+    Parameters
+    ----------
+    col : ColumnObject
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Array
+    """
+
+def categorical_column_to_dictionary(
+    col: ColumnObject, allow_copy: bool = True
+) -> DictionaryArray:
+    """
+    Convert a column holding categorical data to a pa.DictionaryArray.
+
+    Parameters
+    ----------
+    col : ColumnObject
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.DictionaryArray
+    """
+
+def parse_datetime_format_str(format_str: str) -> tuple[str, str]:
+    """Parse datetime `format_str` to interpret the `data`."""
+
+def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> DataType:
+    """Map column date type to pyarrow date type."""
+
+def buffers_to_array(
+    buffers: ColumnBuffers,
+    data_type: tuple[DtypeKind, int, str, str],
+    length: int,
+    describe_null: ColumnNullType,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Array:
+    """
+    Build a PyArrow array from the passed buffer.
+
+    Parameters
+    ----------
+    buffer : ColumnBuffers
+        Dictionary containing tuples of underlying buffers and
+        their associated dtype.
+    data_type : Tuple[DtypeKind, int, str, str],
+        Dtype description of the column as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+    length : int
+        The number of values in the array.
+    describe_null: ColumnNullType
+        Null representation the column dtype uses,
+        as a tuple ``(kind, value)``
+    offset : int, default: 0
+        Number of elements to offset from the start of the buffer.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Array
+
+    Notes
+    -----
+    The returned array doesn't own the memory. The caller of this function
+    is responsible for keeping the memory owner object alive as long as
+    the returned PyArrow array is being used.
+    """
+
+def validity_buffer_from_mask(
+    validity_buff: Buffer,
+    validity_dtype: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer:
+    """
+    Build a PyArrow buffer from the passed mask buffer.
+
+    Parameters
+    ----------
+    validity_buff : BufferObject
+        Tuple of underlying validity buffer and associated dtype.
+    validity_dtype : Dtype
+        Dtype description as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+    describe_null : ColumnNullType
+        Null representation the column dtype uses,
+        as a tuple ``(kind, value)``
+    length : int
+        The number of values in the array.
+    offset : int, default: 0
+        Number of elements to offset from the start of the buffer.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Buffer
+    """
+
+def validity_buffer_nan_sentinel(
+    data_pa_buffer: Buffer,
+    data_type: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer:
+    """
+    Build a PyArrow buffer from NaN or sentinel values.
+
+    Parameters
+    ----------
+    data_pa_buffer : pa.Buffer
+        PyArrow buffer for the column data.
+    data_type : Dtype
+        Dtype description as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+    describe_null : ColumnNullType
+        Null representation the column dtype uses,
+        as a tuple ``(kind, value)``
+    length : int
+        The number of values in the array.
+    offset : int, default: 0
+        Number of elements to offset from the start of the buffer.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Buffer
+    """
diff --git a/python/pyarrow/ipc.pyi b/python/pyarrow/ipc.pyi
new file mode 100644
index 00000000000..c7f2af004d4
--- /dev/null
+++ b/python/pyarrow/ipc.pyi
@@ -0,0 +1,123 @@
+from io import IOBase
+
+import pandas as pd
+import pyarrow.lib as lib
+
+from pyarrow.lib import (
+    IpcReadOptions,
+    IpcWriteOptions,
+    Message,
+    MessageReader,
+    MetadataVersion,
+    ReadStats,
+    RecordBatchReader,
+    WriteStats,
+    _ReadPandasMixin,
+    get_record_batch_size,
+    get_tensor_size,
+    read_message,
+    read_record_batch,
+    read_schema,
+    read_tensor,
+    write_tensor,
+)
+
+class RecordBatchStreamReader(lib._RecordBatchStreamReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        *,
+        options: IpcReadOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+class RecordBatchFileReader(lib._RecordBatchFileReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        footer_offset: int | None = None,
+        *,
+        options: IpcReadOptions | None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+class RecordBatchFileWriter(lib._RecordBatchFileWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+def new_stream(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchStreamWriter: ...
+def open_stream(
+    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+    *,
+    options: IpcReadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchStreamReader: ...
+def new_file(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchFileWriter: ...
+def open_file(
+    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+    footer_offset: int | None = None,
+    *,
+    options: IpcReadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchFileReader: ...
+def serialize_pandas(
+    df: pd.DataFrame, *, nthreads: int | None = None, preserve_index: bool | None = None
+) -> lib.Buffer: ...
+def deserialize_pandas(buf: lib.Buffer, *, use_threads: bool = True) -> pd.DataFrame: ...
+
+__all__ = [
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "ReadStats",
+    "RecordBatchReader",
+    "WriteStats",
+    "_ReadPandasMixin",
+    "get_record_batch_size",
+    "get_tensor_size",
+    "read_message",
+    "read_record_batch",
+    "read_schema",
+    "read_tensor",
+    "write_tensor",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "new_stream",
+    "open_stream",
+    "new_file",
+    "open_file",
+    "serialize_pandas",
+    "deserialize_pandas",
+]
diff --git a/python/pyarrow/json.pyi b/python/pyarrow/json.pyi
new file mode 100644
index 00000000000..db1d35e0b8b
--- /dev/null
+++ b/python/pyarrow/json.pyi
@@ -0,0 +1,3 @@
+from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
+
+__all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/pyarrow/lib.pyi b/python/pyarrow/lib.pyi
new file mode 100644
index 00000000000..1698b55520b
--- /dev/null
+++ b/python/pyarrow/lib.pyi
@@ -0,0 +1,106 @@
+# ruff: noqa: F403
+from typing import NamedTuple
+
+from .__lib_pxi.array import *
+from .__lib_pxi.benchmark import *
+from .__lib_pxi.builder import *
+from .__lib_pxi.compat import *
+from .__lib_pxi.config import *
+from .__lib_pxi.device import *
+from .__lib_pxi.error import *
+from .__lib_pxi.io import *
+from .__lib_pxi.ipc import *
+from .__lib_pxi.memory import *
+from .__lib_pxi.pandas_shim import *
+from .__lib_pxi.scalar import *
+from .__lib_pxi.table import *
+from .__lib_pxi.tensor import *
+from .__lib_pxi.types import *
+
+class MonthDayNano(NamedTuple):
+    days: int
+    months: int
+    nanoseconds: int
+
+def cpu_count() -> int:
+    """
+    Return the number of threads to use in parallel operations.
+
+    The number of threads is determined at startup by inspecting the
+    ``OMP_NUM_THREADS`` and ``OMP_THREAD_LIMIT`` environment variables.
+    If neither is present, it will default to the number of hardware threads
+    on the system. It can be modified at runtime by calling
+    :func:`set_cpu_count()`.
+
+    See Also
+    --------
+    set_cpu_count : Modify the size of this pool.
+    io_thread_count : The analogous function for the I/O thread pool.
+    """
+
+def set_cpu_count(count: int) -> None:
+    """
+    Set the number of threads to use in parallel operations.
+
+    Parameters
+    ----------
+    count : int
+        The number of concurrent threads that should be used.
+
+    See Also
+    --------
+    cpu_count : Get the size of this pool.
+    set_io_thread_count : The analogous function for the I/O thread pool.
+    """
+
+def is_threading_enabled() -> bool:
+    """
+    Returns True if threading is enabled in libarrow.
+
+    If it isn't enabled, then python shouldn't create any
+    threads either, because we're probably on a system where
+    threading doesn't work (e.g. Emscripten).
+    """
+
+Type_NA: int
+Type_BOOL: int
+Type_UINT8: int
+Type_INT8: int
+Type_UINT16: int
+Type_INT16: int
+Type_UINT32: int
+Type_INT32: int
+Type_UINT64: int
+Type_INT64: int
+Type_HALF_FLOAT: int
+Type_FLOAT: int
+Type_DOUBLE: int
+Type_DECIMAL128: int
+Type_DECIMAL256: int
+Type_DATE32: int
+Type_DATE64: int
+Type_TIMESTAMP: int
+Type_TIME32: int
+Type_TIME64: int
+Type_DURATION: int
+Type_INTERVAL_MONTH_DAY_NANO: int
+Type_BINARY: int
+Type_STRING: int
+Type_LARGE_BINARY: int
+Type_LARGE_STRING: int
+Type_FIXED_SIZE_BINARY: int
+Type_BINARY_VIEW: int
+Type_STRING_VIEW: int
+Type_LIST: int
+Type_LARGE_LIST: int
+Type_LIST_VIEW: int
+Type_LARGE_LIST_VIEW: int
+Type_MAP: int
+Type_FIXED_SIZE_LIST: int
+Type_STRUCT: int
+Type_SPARSE_UNION: int
+Type_DENSE_UNION: int
+Type_DICTIONARY: int
+Type_RUN_END_ENCODED: int
+UnionMode_SPARSE: int
+UnionMode_DENSE: int
diff --git a/python/pyarrow/orc.pyi b/python/pyarrow/orc.pyi
new file mode 100644
index 00000000000..2eba8d40a11
--- /dev/null
+++ b/python/pyarrow/orc.pyi
@@ -0,0 +1,279 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Literal
+
+from _typeshed import StrPath
+
+from . import _orc
+from ._fs import SupportedFileSystem
+from .lib import KeyValueMetadata, NativeFile, RecordBatch, Schema, Table
+
+class ORCFile:
+    """
+    Reader interface for a single ORC file
+
+    Parameters
+    ----------
+    source : str or pyarrow.NativeFile
+        Readable source. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
+    """
+
+    reader: _orc.ORCReader
+    def __init__(self, source: StrPath | NativeFile | IO) -> None: ...
+    @property
+    def metadata(self) -> KeyValueMetadata:
+        """The file metadata, as an arrow KeyValueMetadata"""
+    @property
+    def schema(self) -> Schema:
+        """The file schema, as an arrow schema"""
+    @property
+    def nrows(self) -> int:
+        """The number of rows in the file"""
+    @property
+    def nstripes(self) -> int:
+        """The number of stripes in the file"""
+    @property
+    def file_version(self) -> str:
+        """Format version of the ORC file, must be 0.11 or 0.12"""
+    @property
+    def software_version(self) -> str:
+        """Software instance and version that wrote this file"""
+    @property
+    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]:
+        """Compression codec of the file"""
+    @property
+    def compression_size(self) -> int:
+        """Number of bytes to buffer for the compression codec in the file"""
+    @property
+    def writer(self) -> str:
+        """Name of the writer that wrote this file.
+        If the writer is unknown then its Writer ID
+        (a number) is returned"""
+    @property
+    def writer_version(self) -> str:
+        """Version of the writer"""
+    @property
+    def row_index_stride(self) -> int:
+        """Number of rows per an entry in the row index or 0
+        if there is no row index"""
+    @property
+    def nstripe_statistics(self) -> int:
+        """Number of stripe statistics"""
+    @property
+    def content_length(self) -> int:
+        """Length of the data stripes in the file in bytes"""
+    @property
+    def stripe_statistics_length(self) -> int:
+        """The number of compressed bytes in the file stripe statistics"""
+    @property
+    def file_footer_length(self) -> int:
+        """The number of compressed bytes in the file footer"""
+    @property
+    def file_postscript_length(self) -> int:
+        """The number of bytes in the file postscript"""
+    @property
+    def file_length(self) -> int:
+        """The number of bytes in the file"""
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch:
+        """Read a single stripe from the file.
+
+        Parameters
+        ----------
+        n : int
+            The stripe index
+        columns : list
+            If not None, only these columns will be read from the stripe. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+            Content of the stripe as a RecordBatch.
+        """
+    def read(self, columns: list[str] | None = None) -> Table:
+        """Read the whole file.
+
+        Parameters
+        ----------
+        columns : list
+            If not None, only these columns will be read from the file. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'. Output always follows the
+            ordering of the file and not the `columns` list.
+
+        Returns
+        -------
+        pyarrow.Table
+            Content of the file as a Table.
+        """
+
+class ORCWriter:
+    """
+    Writer interface for a single ORC file
+
+    Parameters
+    ----------
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    file_version : {"0.11", "0.12"}, default "0.12"
+        Determine which ORC file version to use.
+        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
+        is the older version
+        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
+        is the newer one.
+    batch_size : int, default 1024
+        Number of rows the ORC writer writes at a time.
+    stripe_size : int, default 64 * 1024 * 1024
+        Size of each ORC stripe in bytes.
+    compression : string, default 'uncompressed'
+        The compression codec.
+        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
+        Note that LZ0 is currently not supported.
+    compression_block_size : int, default 64 * 1024
+        Size of each compression block in bytes.
+    compression_strategy : string, default 'speed'
+        The compression strategy i.e. speed vs size reduction.
+        Valid values: {'SPEED', 'COMPRESSION'}
+    row_index_stride : int, default 10000
+        The row index stride i.e. the number of rows per
+        an entry in the row index.
+    padding_tolerance : double, default 0.0
+        The padding tolerance.
+    dictionary_key_size_threshold : double, default 0.0
+        The dictionary key size threshold. 0 to disable dictionary encoding.
+        1 to always enable dictionary encoding.
+    bloom_filter_columns : None, set-like or list-like, default None
+        Columns that use the bloom filter.
+    bloom_filter_fpp : double, default 0.05
+        Upper limit of the false-positive rate of the bloom filter.
+    """
+
+    writer: _orc.ORCWriter
+    is_open: bool
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        *,
+        file_version: str = "0.12",
+        batch_size: int = 1024,
+        stripe_size: int = 64 * 1024 * 1024,
+        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+        compression_block_size: int = 65536,
+        compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
+        row_index_stride: int = 10000,
+        padding_tolerance: float = 0.0,
+        dictionary_key_size_threshold: float = 0.0,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float = 0.05,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    def write(self, table: Table) -> None:
+        """
+        Write the table into an ORC file. The schema of the table must
+        be equal to the schema used when opening the ORC file.
+
+        Parameters
+        ----------
+        table : pyarrow.Table
+            The table to be written into the ORC file
+        """
+    def close(self) -> None:
+        """
+        Close the ORC file
+        """
+
+def read_table(
+    source: StrPath | NativeFile | IO,
+    columns: list[str] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> Table:
+    """
+    Read a Table from an ORC file.
+
+    Parameters
+    ----------
+    source : str, pyarrow.NativeFile, or file-like object
+        If a string passed, can be a single file name. For file-like objects,
+        only read a single file. Use pyarrow.BufferReader to read a file
+        contained in a bytes or buffer-like object.
+    columns : list
+        If not None, only these columns will be read from the file. A column
+        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+        'a.c', and 'a.d.e'. Output always follows the ordering of the file and
+        not the `columns` list. If empty, no columns will be read. Note
+        that the table will still have the correct num_rows set despite having
+        no columns.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    """
+
+def write_table(
+    table: Table,
+    where: StrPath | NativeFile | IO,
+    *,
+    file_version: str = "0.12",
+    batch_size: int = 1024,
+    stripe_size: int = 64 * 1024 * 1024,
+    compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+    compression_block_size: int = 65536,
+    compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
+    row_index_stride: int = 10000,
+    padding_tolerance: float = 0.0,
+    dictionary_key_size_threshold: float = 0.0,
+    bloom_filter_columns: list[int] | None = None,
+    bloom_filter_fpp: float = 0.05,
+) -> None:
+    """
+    Write a table into an ORC file.
+
+    Parameters
+    ----------
+    table : pyarrow.lib.Table
+        The table to be written into the ORC file
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    file_version : {"0.11", "0.12"}, default "0.12"
+        Determine which ORC file version to use.
+        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
+        is the older version
+        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
+        is the newer one.
+    batch_size : int, default 1024
+        Number of rows the ORC writer writes at a time.
+    stripe_size : int, default 64 * 1024 * 1024
+        Size of each ORC stripe in bytes.
+    compression : string, default 'uncompressed'
+        The compression codec.
+        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
+        Note that LZ0 is currently not supported.
+    compression_block_size : int, default 64 * 1024
+        Size of each compression block in bytes.
+    compression_strategy : string, default 'speed'
+        The compression strategy i.e. speed vs size reduction.
+        Valid values: {'SPEED', 'COMPRESSION'}
+    row_index_stride : int, default 10000
+        The row index stride i.e. the number of rows per
+        an entry in the row index.
+    padding_tolerance : double, default 0.0
+        The padding tolerance.
+    dictionary_key_size_threshold : double, default 0.0
+        The dictionary key size threshold. 0 to disable dictionary encoding.
+        1 to always enable dictionary encoding.
+    bloom_filter_columns : None, set-like or list-like, default None
+        Columns that use the bloom filter.
+    bloom_filter_fpp : double, default 0.05
+        Upper limit of the false-positive rate of the bloom filter.
+    """
diff --git a/python/pyarrow/pandas_compat.pyi b/python/pyarrow/pandas_compat.pyi
new file mode 100644
index 00000000000..efbd05ac2fe
--- /dev/null
+++ b/python/pyarrow/pandas_compat.pyi
@@ -0,0 +1,54 @@
+from typing import Any, TypedDict, TypeVar
+
+import numpy as np
+import pandas as pd
+
+from pandas import DatetimeTZDtype
+
+from .lib import Array, DataType, Schema, Table
+
+_T = TypeVar("_T")
+
+def get_logical_type_map() -> dict[int, str]: ...
+def get_logical_type(arrow_type: DataType) -> str: ...
+def get_numpy_logical_type_map() -> dict[type[np.generic], str]: ...
+def get_logical_type_from_numpy(pandas_collection) -> str: ...
+def get_extension_dtype_info(column) -> tuple[str, dict[str, Any]]: ...
+
+class _ColumnMetadata(TypedDict):
+    name: str
+    field_name: str
+    pandas_type: int
+    numpy_type: str
+    metadata: dict | None
+
+def get_column_metadata(
+    column: pd.Series | pd.Index, name: str, arrow_type: DataType, field_name: str
+) -> _ColumnMetadata: ...
+def construct_metadata(
+    columns_to_convert: list[pd.Series],
+    df: pd.DataFrame,
+    column_names: list[str],
+    index_levels: list[pd.Index],
+    index_descriptors: list[dict],
+    preserve_index: bool,
+    types: list[DataType],
+    column_field_names: list[str] = ...,
+) -> dict[bytes, bytes]: ...
+def dataframe_to_types(
+    df: pd.DataFrame, preserve_index: bool | None, columns: list[str] | None = None
+) -> tuple[list[str], list[DataType], dict[bytes, bytes]]: ...
+def dataframe_to_arrays(
+    df: pd.DataFrame,
+    schema: Schema,
+    preserve_index: bool | None,
+    nthreads: int = 1,
+    columns: list[str] | None = None,
+    safe: bool = True,
+) -> tuple[Array, Schema, int]: ...
+def get_datetimetz_type(values: _T, dtype, type_) -> tuple[_T, DataType]: ...
+def make_datetimetz(unit: str, tz: str) -> DatetimeTZDtype: ...
+def table_to_dataframe(
+    options, table: Table, categories=None, ignore_metadata: bool = False, types_mapper=None
+) -> pd.DataFrame: ...
+def make_tz_aware(series: pd.Series, tz: str) -> pd.Series: ...
diff --git a/python/pyarrow/parquet/__init__.pyi b/python/pyarrow/parquet/__init__.pyi
new file mode 100644
index 00000000000..4ef88705809
--- /dev/null
+++ b/python/pyarrow/parquet/__init__.pyi
@@ -0,0 +1 @@
+from .core import *  # noqa
diff --git a/python/pyarrow/parquet/core.pyi b/python/pyarrow/parquet/core.pyi
new file mode 100644
index 00000000000..56b2c8447d9
--- /dev/null
+++ b/python/pyarrow/parquet/core.pyi
@@ -0,0 +1,2061 @@
+import sys
+
+from pathlib import Path
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Callable, Iterator, Literal, Sequence
+
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from pyarrow import _parquet
+from pyarrow._compute import Expression
+from pyarrow._fs import FileSystem, SupportedFileSystem
+from pyarrow._parquet import (
+    ColumnChunkMetaData,
+    ColumnSchema,
+    FileDecryptionProperties,
+    FileEncryptionProperties,
+    FileMetaData,
+    ParquetLogicalType,
+    ParquetReader,
+    ParquetSchema,
+    RowGroupMetaData,
+    SortingColumn,
+    Statistics,
+)
+from pyarrow._stubs_typing import FilterTuple, SingleOrList
+from pyarrow.dataset import ParquetFileFragment, Partitioning
+from pyarrow.lib import NativeFile, RecordBatch, Schema, Table
+from typing_extensions import deprecated
+
+__all__ = (
+    "ColumnChunkMetaData",
+    "ColumnSchema",
+    "FileDecryptionProperties",
+    "FileEncryptionProperties",
+    "FileMetaData",
+    "ParquetDataset",
+    "ParquetFile",
+    "ParquetLogicalType",
+    "ParquetReader",
+    "ParquetSchema",
+    "ParquetWriter",
+    "RowGroupMetaData",
+    "SortingColumn",
+    "Statistics",
+    "read_metadata",
+    "read_pandas",
+    "read_schema",
+    "read_table",
+    "write_metadata",
+    "write_table",
+    "write_to_dataset",
+    "_filters_to_expression",
+    "filters_to_expression",
+)
+
+def filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression:
+    """
+    Check if filters are well-formed and convert to an ``Expression``.
+
+    Parameters
+    ----------
+    filters : List[Tuple] or List[List[Tuple]]
+
+    Notes
+    -----
+    See internal ``pyarrow._DNF_filter_doc`` attribute for more details.
+
+    Examples
+    --------
+
+    >>> filters_to_expression([("foo", "==", "bar")])
+    <pyarrow.compute.Expression (foo == "bar")>
+
+    Returns
+    -------
+    pyarrow.compute.Expression
+        An Expression representing the filters
+    """
+
+@deprecated("use filters_to_expression")
+def _filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
+_Compression: TypeAlias = Literal["gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"]
+
+class ParquetFile:
+    """
+    Reader interface for a single Parquet file.
+
+    Parameters
+    ----------
+    source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
+        Readable source. For passing bytes or buffer-like file containing a
+        Parquet file, use pyarrow.BufferReader.
+    metadata : FileMetaData, default None
+        Use existing metadata object, rather than reading from file.
+    common_metadata : FileMetaData, default None
+        Will be used in reads for pandas schema metadata if not found in the
+        main file's metadata, no other uses at the moment.
+    read_dictionary : list
+        List of column names to read directly as DictionaryArray.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    pre_buffer : bool, default False
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    decryption_properties : FileDecryptionProperties, default None
+        File decryption properties for Parquet Modular Encryption.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    page_checksum_verification : bool, default False
+        If True, verify the checksum for each page read from the file.
+
+    Examples
+    --------
+
+    Generate an example PyArrow Table and write it to Parquet file:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_table(table, "example.parquet")
+
+    Create a ``ParquetFile`` object from the Parquet file:
+
+    >>> parquet_file = pq.ParquetFile("example.parquet")
+
+    Read the data:
+
+    >>> parquet_file.read()
+    pyarrow.Table
+    n_legs: int64
+    animal: string
+    ----
+    n_legs: [[2,2,4,4,5,100]]
+    animal: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
+
+    Create a ParquetFile object with "animal" column as DictionaryArray:
+
+    >>> parquet_file = pq.ParquetFile("example.parquet", read_dictionary=["animal"])
+    >>> parquet_file.read()
+    pyarrow.Table
+    n_legs: int64
+    animal: dictionary<values=string, indices=int32, ordered=0>
+    ----
+    n_legs: [[2,2,4,4,5,100]]
+    animal: [  -- dictionary:
+    ["Flamingo","Parrot",...,"Brittle stars","Centipede"]  -- indices:
+    [0,1,2,3,4,5]]
+    """
+
+    reader: ParquetReader
+    common_metadata: FileMetaData
+
+    def __init__(
+        self,
+        source: str | Path | NativeFile | IO,
+        *,
+        metadata: FileMetaData | None = None,
+        common_metadata: FileMetaData | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData:
+        """
+        Return the Parquet metadata.
+        """
+    @property
+    def schema(self) -> ParquetSchema:
+        """
+        Return the Parquet schema, unconverted to Arrow types
+        """
+    @property
+    def schema_arrow(self) -> Schema:
+        """
+        Return the inferred Arrow schema, converted from the whole Parquet
+        file's schema
+
+        Examples
+        --------
+        Generate an example Parquet file:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        Read the Arrow schema:
+
+        >>> parquet_file.schema_arrow
+        n_legs: int64
+        animal: string
+        """
+    @property
+    def num_row_groups(self) -> int:
+        """
+        Return the number of row groups of the Parquet file.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.num_row_groups
+        1
+        """
+    def close(self, force: bool = False) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+    def read_row_group(
+        self,
+        i: int,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read a single row group from a Parquet file.
+
+        Parameters
+        ----------
+        i : int
+            Index of the individual row group that we want to read.
+        columns : list
+            If not None, only these columns will be read from the row group. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.table.Table
+            Content of the row group as a table (of columns)
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.read_row_group(0)
+        pyarrow.Table
+        n_legs: int64
+        animal: string
+        ----
+        n_legs: [[2,2,4,4,5,100]]
+        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
+        """
+    def read_row_groups(
+        self,
+        row_groups: list,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read a multiple row groups from a Parquet file.
+
+        Parameters
+        ----------
+        row_groups : list
+            Only these row groups will be read from the file.
+        columns : list
+            If not None, only these columns will be read from the row group. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.table.Table
+            Content of the row groups as a table (of columns).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.read_row_groups([0, 0])
+        pyarrow.Table
+        n_legs: int64
+        animal: string
+        ----
+        n_legs: [[2,2,4,4,5,...,2,4,4,5,100]]
+        animal: [["Flamingo","Parrot","Dog",...,"Brittle stars","Centipede"]]
+        """
+    def iter_batches(
+        self,
+        batch_size: int = 65536,
+        row_groups: list | None = None,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Iterator[RecordBatch]:
+        """
+        Read streaming batches from a Parquet file.
+
+        Parameters
+        ----------
+        batch_size : int, default 64K
+            Maximum number of records to yield per batch. Batches may be
+            smaller if there aren't enough rows in the file.
+        row_groups : list
+            Only these row groups will be read from the file.
+        columns : list
+            If not None, only these columns will be read from the file. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : boolean, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : boolean, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Yields
+        ------
+        pyarrow.RecordBatch
+            Contents of each batch as a record batch
+
+        Examples
+        --------
+        Generate an example Parquet file:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+        >>> for i in parquet_file.iter_batches():
+        ...     print("RecordBatch")
+        ...     print(i.to_pandas())
+        RecordBatch
+           n_legs         animal
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+        """
+    def read(
+        self,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read a Table from Parquet format.
+
+        Parameters
+        ----------
+        columns : list
+            If not None, only these columns will be read from the file. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.table.Table
+            Content of the file as a table (of columns).
+
+        Examples
+        --------
+        Generate an example Parquet file:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        Read a Table:
+
+        >>> parquet_file.read(columns=["animal"])
+        pyarrow.Table
+        animal: string
+        ----
+        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
+        """
+    def scan_contents(self, columns: list | None = None, batch_size: int = 65536) -> int:
+        """
+        Read contents of file for the given columns and batch size.
+
+        Notes
+        -----
+        This function's primary purpose is benchmarking.
+        The scan is executed on a single thread.
+
+        Parameters
+        ----------
+        columns : list of integers, default None
+            Select columns to read, if None scan all columns.
+        batch_size : int, default 64K
+            Number of rows to read at a time internally.
+
+        Returns
+        -------
+        num_rows : int
+            Number of rows in file
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.scan_contents()
+        6
+        """
+
+class ParquetWriter:
+    """
+    Class for incrementally building a Parquet file for Arrow tables.
+
+    Parameters
+    ----------
+    where : path or file-like object
+    schema : pyarrow.Schema
+    version : {"1.0", "2.4", "2.6"}, default "2.6"
+        Determine which Parquet logical types are available for use, whether the
+        reduced set from the Parquet 1.x.x format or the expanded logical types
+        added in later format versions.
+        Files written with version='2.4' or '2.6' may not be readable in all
+        Parquet implementations, so version='1.0' is likely the choice that
+        maximizes file compatibility.
+        UINT32 and some logical types are only available with version '2.4'.
+        Nanosecond timestamps are only available with version '2.6'.
+        Other features such as compression algorithms or the new serialized
+        data page format must be enabled separately (see 'compression' and
+        'data_page_version').
+    use_dictionary : bool or list, default True
+        Specify if we should use dictionary encoding in general or only for
+        some columns.
+        When encoding the column, if the dictionary size is too large, the
+        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
+        doesn't support dictionary encoding.
+    compression : str or dict, default 'snappy'
+        Specify the compression codec, either on a general basis or per-column.
+        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
+    write_statistics : bool or list, default True
+        Specify if we should write statistics in general (default is True) or only
+        for some columns.
+    use_deprecated_int96_timestamps : bool, default None
+        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
+        by flavor argument. This take priority over the coerce_timestamps option.
+    coerce_timestamps : str, default None
+        Cast timestamps to a particular resolution. If omitted, defaults are chosen
+        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
+        nanoseconds are cast to microseconds ('us'), while for
+        ``version='2.6'`` (the default), they are written natively without loss
+        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
+        as Parquet does not have any temporal type with seconds resolution.
+        If the casting results in loss of data, it will raise an exception
+        unless ``allow_truncated_timestamps=True`` is given.
+        Valid values: {None, 'ms', 'us'}
+    allow_truncated_timestamps : bool, default False
+        Allow loss of data when coercing timestamps to a particular
+        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
+        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
+        will NOT result in the truncation exception being ignored unless
+        ``coerce_timestamps`` is not None.
+    data_page_size : int, default None
+        Set a target threshold for the approximate encoded size of data
+        pages within a column chunk (in bytes). If None, use the default data page
+        size of 1MByte.
+    flavor : {'spark'}, default None
+        Sanitize schema or set other compatibility options to work with
+        various target systems.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred from `where` if path-like, else
+        `where` is already a file-like object so no filesystem is needed.
+    compression_level : int or dict, default None
+        Specify the compression level for a codec, either on a general basis or
+        per-column. If None is passed, arrow selects the compression level for
+        the compression codec in use. The compression level has a different
+        meaning for each codec, so you have to read the documentation of the
+        codec you are using.
+        An exception is thrown if the compression codec does not allow specifying
+        a compression level.
+    use_byte_stream_split : bool or list, default False
+        Specify if the byte_stream_split encoding should be used in general or
+        only for some columns. If both dictionary and byte_stream_stream are
+        enabled, then dictionary is preferred.
+        The byte_stream_split encoding is valid for integer, floating-point
+        and fixed-size binary data types (including decimals); it should be
+        combined with a compression codec so as to achieve size reduction.
+    column_encoding : string or dict, default None
+        Specify the encoding scheme on a per column basis.
+        Can only be used when ``use_dictionary`` is set to False, and
+        cannot be used in combination with ``use_byte_stream_split``.
+        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
+        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
+        Certain encodings are only compatible with certain data types.
+        Please refer to the encodings section of `Reading and writing Parquet
+        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
+    data_page_version : {"1.0", "2.0"}, default "1.0"
+        The serialized Parquet data page format version to write, defaults to
+        1.0. This does not impact the file schema logical types and Arrow to
+        Parquet type casting behavior; for that use the "version" option.
+    use_compliant_nested_type : bool, default True
+        Whether to write compliant Parquet nested type (lists) as defined
+        `here <https://github.com/apache/parquet-format/blob/master/
+        LogicalTypes.md#nested-types>`_, defaults to ``True``.
+        For ``use_compliant_nested_type=True``, this will write into a list
+        with 3-level structure where the middle level, named ``list``,
+        is a repeated group with a single field named ``element``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                        <element-repetition> <element-type> element;
+                }
+            }
+
+        For ``use_compliant_nested_type=False``, this will also write into a list
+        with 3-level structure, where the name of the single field of the middle
+        level ``list`` is taken from the element name for nested columns in Arrow,
+        which defaults to ``item``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                    <element-repetition> <element-type> item;
+                }
+            }
+    encryption_properties : FileEncryptionProperties, default None
+        File encryption properties for Parquet Modular Encryption.
+        If None, no encryption will be done.
+        The encryption properties can be created using:
+        ``CryptoFactory.file_encryption_properties()``.
+    write_batch_size : int, default None
+        Number of values to write to a page at a time. If None, use the default of
+        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
+        are exceeding the ``data_page_size`` due to large column values, lowering
+        the batch size can help keep page sizes closer to the intended size.
+    dictionary_pagesize_limit : int, default None
+        Specify the dictionary page size limit per row group. If None, use the
+        default 1MB.
+    store_schema : bool, default True
+        By default, the Arrow schema is serialized and stored in the Parquet
+        file metadata (in the "ARROW:schema" key). When reading the file,
+        if this key is available, it will be used to more faithfully recreate
+        the original Arrow data. For example, for tz-aware timestamp columns
+        it will restore the timezone (Parquet only stores the UTC values without
+        timezone), or columns with duration type will be restored from the int64
+        Parquet column.
+    write_page_index : bool, default False
+        Whether to write a page index in general for all columns.
+        Writing statistics to the page index disables the old method of writing
+        statistics to each data page header. The page index makes statistics-based
+        filtering more efficient than the page header, as it gathers all the
+        statistics for a Parquet file in a single place, avoiding scattered I/O.
+        Note that the page index is not yet used on the read size by PyArrow.
+    write_page_checksum : bool, default False
+        Whether to write page checksums in general for all columns.
+        Page checksums enable detection of data corruption, which might occur during
+        transmission or in the storage.
+    sorting_columns : Sequence of SortingColumn, default None
+        Specify the sort order of the data being written. The writer does not sort
+        the data nor does it verify that the data is sorted. The sort order is
+        written to the row group metadata, which can then be used by readers.
+    store_decimal_as_integer : bool, default False
+        Allow decimals with 1 <= precision <= 18 to be stored as integers.
+        In Parquet, DECIMAL can be stored in any of the following physical types:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: precision is limited by the array size.
+            Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
+        - binary: precision is unlimited. The minimum number of bytes to store the
+            unscaled value is used.
+
+        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
+        When enabled, the writer will use the following physical types to store decimals:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: for precision > 18.
+
+        As a consequence, decimal columns stored in integer types are more compact.
+    writer_engine_version : unused
+    **options : dict
+        If options contains a key `metadata_collector` then the
+        corresponding value is assumed to be a list (or any object with
+        `.append` method) that will be filled with the file metadata instance
+        of the written file.
+
+    Examples
+    --------
+    Generate an example PyArrow Table and RecordBatch:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> batch = pa.record_batch(
+    ...     [
+    ...         [2, 2, 4, 4, 5, 100],
+    ...         ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     ],
+    ...     names=["n_legs", "animal"],
+    ... )
+
+    create a ParquetWriter object:
+
+    >>> import pyarrow.parquet as pq
+    >>> writer = pq.ParquetWriter("example.parquet", table.schema)
+
+    and write the Table into the Parquet file:
+
+    >>> writer.write_table(table)
+    >>> writer.close()
+
+    >>> pq.read_table("example.parquet").to_pandas()
+       n_legs         animal
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    create a ParquetWriter object for the RecordBatch:
+
+    >>> writer2 = pq.ParquetWriter("example2.parquet", batch.schema)
+
+    and write the RecordBatch into the Parquet file:
+
+    >>> writer2.write_batch(batch)
+    >>> writer2.close()
+
+    >>> pq.read_table("example2.parquet").to_pandas()
+       n_legs         animal
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+    """
+
+    flavor: str
+    schema_changed: bool
+    schema: ParquetSchema
+    where: str | Path | IO
+    file_handler: NativeFile | None
+    writer: _parquet.ParquetWriter
+    is_open: bool
+
+    def __init__(
+        self,
+        where: str | Path | IO | NativeFile,
+        schema: Schema,
+        filesystem: SupportedFileSystem | None = None,
+        flavor: str | None = None,
+        version: Literal["1.0", "2.4", "2.6"] = ...,
+        use_dictionary: bool = True,
+        compression: _Compression | dict[str, _Compression] = "snappy",
+        write_statistics: bool | list = True,
+        use_deprecated_int96_timestamps: bool | None = None,
+        compression_level: int | dict | None = None,
+        use_byte_stream_split: bool | list = False,
+        column_encoding: str | dict | None = None,
+        writer_engine_version=None,
+        data_page_version: Literal["1.0", "2.0"] = ...,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileEncryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: Sequence[SortingColumn] | None = None,
+        store_decimal_as_integer: bool = False,
+        **options,
+    ) -> None: ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> Literal[False]: ...
+    def write(
+        self, table_or_batch: RecordBatch | Table, row_group_size: int | None = None
+    ) -> None:
+        """
+        Write RecordBatch or Table to the Parquet file.
+
+        Parameters
+        ----------
+        table_or_batch : {RecordBatch, Table}
+        row_group_size : int, default None
+            Maximum number of rows in each written row group. If None,
+            the row group size will be the minimum of the input
+            table or batch length and 1024 * 1024.
+        """
+    def write_batch(self, batch: RecordBatch, row_group_size: int | None = None) -> None:
+        """
+        Write RecordBatch to the Parquet file.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+        row_group_size : int, default None
+            Maximum number of rows in written row group. If None, the
+            row group size will be the minimum of the RecordBatch
+            size and 1024 * 1024.  If set larger than 64Mi then 64Mi
+            will be used instead.
+        """
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None:
+        """
+        Write Table to the Parquet file.
+
+        Parameters
+        ----------
+        table : Table
+        row_group_size : int, default None
+            Maximum number of rows in each written row group. If None,
+            the row group size will be the minimum of the Table size
+            and 1024 * 1024.  If set larger than 64Mi then 64Mi will
+            be used instead.
+
+        """
+    def close(self) -> None:
+        """
+        Close the connection to the Parquet file.
+        """
+    def add_key_value_metadata(self, key_value_metadata: dict[str, str]) -> None:
+        """
+        Add key-value metadata to the file.
+        This will overwrite any existing metadata with the same key.
+
+        Parameters
+        ----------
+        key_value_metadata : dict
+            Keys and values must be string-like / coercible to bytes.
+        """
+
+class ParquetDataset:
+    """
+    Encapsulates details of reading a complete Parquet dataset possibly
+    consisting of multiple files and partitions in subdirectories.
+
+    Parameters
+    ----------
+    path_or_paths : str or List[str]
+        A directory name, single file name, or list of file names.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    schema : pyarrow.parquet.Schema
+        Optionally provide the Schema for the Dataset, in which case it will
+        not be inferred from the source.
+    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
+        Rows which do not match the filter predicate will be removed from scanned
+        data. Partition keys embedded in a nested directory structure will be
+        exploited to avoid loading files at all if they contain no matching rows.
+        Within-file level filtering and different partitioning schemes are supported.
+
+        Predicates are expressed using an ``Expression`` or using
+        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
+        DNF allows arbitrary boolean logical combinations of single column predicates.
+        The innermost tuples each describe a single column predicate. The list of inner
+        predicates is interpreted as a conjunction (AND), forming a more selective and
+        multiple column predicate. Finally, the most outer list combines these filters
+        as a disjunction (OR).
+
+        Predicates may also be passed as List[Tuple]. This form is interpreted
+        as a single conjunction. To express OR in predicates, one must
+        use the (preferred) List[List[Tuple]] notation.
+
+        Each tuple has format: (``key``, ``op``, ``value``) and compares the
+        ``key`` with the ``value``.
+        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
+        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
+        ``value`` must be a collection such as a ``list``, a ``set`` or a
+        ``tuple``.
+
+        Examples:
+
+        Using the ``Expression`` API:
+
+        .. code-block:: python
+
+            import pyarrow.compute as pc
+            pc.field('x') = 0
+            pc.field('y').isin(['a', 'b', 'c'])
+            ~pc.field('y').isin({'a', 'b'})
+
+        Using the DNF format:
+
+        .. code-block:: python
+
+            ("x", "=", 0)
+            ("y", "in", ["a", "b", "c"])
+            ("z", "not in", {"a", "b"})
+
+
+    read_dictionary : list, default None
+        List of names or column paths (for nested types) to read directly
+        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+        a flat column as dictionary-encoded pass the column name. For
+        nested types, you must pass the full column "path", which could be
+        something like level1.level2.list.item. Refer to the Parquet
+        file's schema to obtain the paths.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
+        The partitioning scheme for a partitioned dataset. The default of "hive"
+        assumes directory names with key=value pairs like "/year=2009/month=11".
+        In addition, a scheme like "/2009/11" is also supported, in which case
+        you need to specify the field names or a full schema. See the
+        ``pyarrow.dataset.partitioning()`` function for more details.
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process.
+        This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+    pre_buffer : bool, default True
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3, GCS). If True, Arrow will use a
+        background I/O thread pool. If using a filesystem layer that itself
+        performs readahead (e.g. fsspec's S3FS), disable readahead for best
+        results. Set to False if you want to prioritize minimal memory usage
+        over maximum speed.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular resolution
+        (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
+        timestamps will be inferred as timestamps in nanoseconds.
+    decryption_properties : FileDecryptionProperties or None
+        File-level decryption properties.
+        The decryption properties can be created using
+        ``CryptoFactory.file_decryption_properties()``.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    page_checksum_verification : bool, default False
+        If True, verify the page checksum for each page read from the file.
+
+    Examples
+    --------
+    Generate an example PyArrow Table and write it to a partitioned dataset:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, root_path="dataset_v2", partition_cols=["year"])
+
+    create a ParquetDataset object from the dataset source:
+
+    >>> dataset = pq.ParquetDataset("dataset_v2/")
+
+    and read the data:
+
+    >>> dataset.read().to_pandas()
+       n_legs         animal  year
+    0       5  Brittle stars  2019
+    1       2       Flamingo  2020
+    2       4            Dog  2021
+    3     100      Centipede  2021
+    4       2         Parrot  2022
+    5       4          Horse  2022
+
+    create a ParquetDataset object with filter:
+
+    >>> dataset = pq.ParquetDataset("dataset_v2/", filters=[("n_legs", "=", 4)])
+    >>> dataset.read().to_pandas()
+       n_legs animal  year
+    0       4    Dog  2021
+    1       4  Horse  2022
+    """
+    def __init__(
+        self,
+        path_or_paths: SingleOrList[str]
+        | SingleOrList[Path]
+        | SingleOrList[NativeFile]
+        | SingleOrList[IO],
+        filesystem: SupportedFileSystem | None = None,
+        schema: Schema | None = None,
+        *,
+        filters: Expression | FilterTuple | list[FilterTuple] | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        partitioning: str | list[str] | Partitioning | None = "hive",
+        ignore_prefixes: list[str] | None = None,
+        pre_buffer: bool = True,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def equals(self, other: ParquetDataset) -> bool: ...
+    @property
+    def schema(self) -> Schema:
+        """
+        Schema of the Dataset.
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_schema", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_schema/")
+
+        Read the schema:
+
+        >>> dataset.schema
+        n_legs: int64
+        animal: string
+        year: dictionary<values=int32, indices=int32, ordered=0>
+        """
+    def read(
+        self,
+        columns: list[str] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read (multiple) Parquet files as a single pyarrow.Table.
+
+        Parameters
+        ----------
+        columns : List[str]
+            Names of columns to read from the dataset. The partition fields
+            are not automatically included.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.Table
+            Content of the file as a table (of columns).
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_read", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_read/")
+
+        Read the dataset:
+
+        >>> dataset.read(columns=["n_legs"])
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[5],[2],[4,100],[2,4]]
+        """
+    def read_pandas(self, **kwargs) -> Table:
+        """
+        Read dataset including pandas metadata, if any. Other arguments passed
+        through to :func:`read`, see docstring for further details.
+
+        Parameters
+        ----------
+        **kwargs : optional
+            Additional options for :func:`read`
+
+        Examples
+        --------
+        Generate an example parquet file:
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "table_V2.parquet")
+        >>> dataset = pq.ParquetDataset("table_V2.parquet")
+
+        Read the dataset with pandas metadata:
+
+        >>> dataset.read_pandas(columns=["n_legs"])
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,2,4,4,5,100]]
+
+        >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata
+        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...}
+        """
+    @property
+    def fragments(self) -> list[ParquetFileFragment]:
+        """
+        A list of the Dataset source fragments or pieces with absolute
+        file paths.
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_fragments", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_fragments/")
+
+        List the fragments:
+
+        >>> dataset.fragments
+        [<pyarrow.dataset.ParquetFileFragment path=dataset_v2_fragments/...
+        """
+    @property
+    def files(self) -> list[str]:
+        """
+        A list of absolute Parquet file paths in the Dataset source.
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_files", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_files/")
+
+        List the files:
+
+        >>> dataset.files
+        ['dataset_v2_files/year=2019/...-0.parquet', ...
+        """
+    @property
+    def filesystem(self) -> FileSystem:
+        """
+        The filesystem type of the Dataset source.
+        """
+    @property
+    def partitioning(self) -> Partitioning:
+        """
+        The partitioning of the Dataset source, if discovered.
+        """
+
+def read_table(
+    source: SingleOrList[str] | SingleOrList[Path] | SingleOrList[NativeFile] | SingleOrList[IO],
+    *,
+    columns: list | None = None,
+    use_threads: bool = True,
+    schema: Schema | None = None,
+    use_pandas_metadata: bool = False,
+    read_dictionary: list[str] | None = None,
+    memory_map: bool = False,
+    buffer_size: int = 0,
+    partitioning: str | list[str] | Partitioning | None = "hive",
+    filesystem: SupportedFileSystem | None = None,
+    filters: Expression | FilterTuple | list[FilterTuple] | None = None,
+    ignore_prefixes: list[str] | None = None,
+    pre_buffer: bool = True,
+    coerce_int96_timestamp_unit: str | None = None,
+    decryption_properties: FileDecryptionProperties | None = None,
+    thrift_string_size_limit: int | None = None,
+    thrift_container_size_limit: int | None = None,
+    page_checksum_verification: bool = False,
+) -> Table:
+    """
+    Read a Table from Parquet format
+
+    Parameters
+    ----------
+    source : str, pyarrow.NativeFile, or file-like object
+        If a string passed, can be a single file name or directory name. For
+        file-like objects, only read a single file. Use pyarrow.BufferReader to
+        read a file contained in a bytes or buffer-like object.
+    columns : list
+        If not None, only these columns will be read from the file. A column
+        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
+        that the table will still have the correct num_rows set despite having
+        no columns.
+    use_threads : bool, default True
+        Perform multi-threaded column reads.
+    schema : Schema, optional
+        Optionally provide the Schema for the parquet dataset, in which case it
+        will not be inferred from the source.
+    use_pandas_metadata : bool, default False
+        If True and file has custom pandas schema metadata, ensure that
+        index columns are also loaded.
+    read_dictionary : list, default None
+        List of names or column paths (for nested types) to read directly
+        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+        a flat column as dictionary-encoded pass the column name. For
+        nested types, you must pass the full column "path", which could be
+        something like level1.level2.list.item. Refer to the Parquet
+        file's schema to obtain the paths.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
+        The partitioning scheme for a partitioned dataset. The default of "hive"
+        assumes directory names with key=value pairs like "/year=2009/month=11".
+        In addition, a scheme like "/2009/11" is also supported, in which case
+        you need to specify the field names or a full schema. See the
+        ``pyarrow.dataset.partitioning()`` function for more details.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
+        Rows which do not match the filter predicate will be removed from scanned
+        data. Partition keys embedded in a nested directory structure will be
+        exploited to avoid loading files at all if they contain no matching rows.
+        Within-file level filtering and different partitioning schemes are supported.
+
+        Predicates are expressed using an ``Expression`` or using
+        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
+        DNF allows arbitrary boolean logical combinations of single column predicates.
+        The innermost tuples each describe a single column predicate. The list of inner
+        predicates is interpreted as a conjunction (AND), forming a more selective and
+        multiple column predicate. Finally, the most outer list combines these filters
+        as a disjunction (OR).
+
+        Predicates may also be passed as List[Tuple]. This form is interpreted
+        as a single conjunction. To express OR in predicates, one must
+        use the (preferred) List[List[Tuple]] notation.
+
+        Each tuple has format: (``key``, ``op``, ``value``) and compares the
+        ``key`` with the ``value``.
+        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
+        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
+        ``value`` must be a collection such as a ``list``, a ``set`` or a
+        ``tuple``.
+
+        Examples:
+
+        Using the ``Expression`` API:
+
+        .. code-block:: python
+
+            import pyarrow.compute as pc
+            pc.field('x') = 0
+            pc.field('y').isin(['a', 'b', 'c'])
+            ~pc.field('y').isin({'a', 'b'})
+
+        Using the DNF format:
+
+        .. code-block:: python
+
+            ("x", "=", 0)
+            ("y", "in", ["a", "b", "c"])
+            ("z", "not in", {"a", "b"})
+
+
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process.
+        This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+    pre_buffer : bool, default True
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool. If using a filesystem layer that itself
+        performs readahead (e.g. fsspec's S3FS), disable readahead for best
+        results.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    decryption_properties : FileDecryptionProperties or None
+        File-level decryption properties.
+        The decryption properties can be created using
+        ``CryptoFactory.file_decryption_properties()``.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    page_checksum_verification : bool, default False
+        If True, verify the checksum for each page read from the file.
+
+    Returns
+    -------
+    pyarrow.Table
+        Content of the file as a table (of columns)
+
+
+    Examples
+    --------
+
+    Generate an example PyArrow Table and write it to a partitioned dataset:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, root_path="dataset_name_2", partition_cols=["year"])
+
+    Read the data:
+
+    >>> pq.read_table("dataset_name_2").to_pandas()
+       n_legs         animal  year
+    0       5  Brittle stars  2019
+    1       2       Flamingo  2020
+    2       4            Dog  2021
+    3     100      Centipede  2021
+    4       2         Parrot  2022
+    5       4          Horse  2022
+
+
+    Read only a subset of columns:
+
+    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"])
+    pyarrow.Table
+    n_legs: int64
+    animal: string
+    ----
+    n_legs: [[5],[2],[4,100],[2,4]]
+    animal: [["Brittle stars"],["Flamingo"],["Dog","Centipede"],["Parrot","Horse"]]
+
+    Read a subset of columns and read one column as DictionaryArray:
+
+    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"], read_dictionary=["animal"])
+    pyarrow.Table
+    n_legs: int64
+    animal: dictionary<values=string, indices=int32, ordered=0>
+    ----
+    n_legs: [[5],[2],[4,100],[2,4]]
+    animal: [  -- dictionary:
+    ["Brittle stars"]  -- indices:
+    [0],  -- dictionary:
+    ["Flamingo"]  -- indices:
+    [0],  -- dictionary:
+    ["Dog","Centipede"]  -- indices:
+    [0,1],  -- dictionary:
+    ["Parrot","Horse"]  -- indices:
+    [0,1]]
+
+    Read the table with filter:
+
+    >>> pq.read_table(
+    ...     "dataset_name_2", columns=["n_legs", "animal"], filters=[("n_legs", "<", 4)]
+    ... ).to_pandas()
+       n_legs    animal
+    0       2  Flamingo
+    1       2    Parrot
+
+    Read data from a single Parquet file:
+
+    >>> pq.write_table(table, "example.parquet")
+    >>> pq.read_table("dataset_name_2").to_pandas()
+       n_legs         animal  year
+    0       5  Brittle stars  2019
+    1       2       Flamingo  2020
+    2       4            Dog  2021
+    3     100      Centipede  2021
+    4       2         Parrot  2022
+    5       4          Horse  2022
+    """
+
+def read_pandas(
+    source: str | Path | NativeFile | IO, columns: list | None = None, **kwargs
+) -> Table:
+    """
+
+    Read a Table from Parquet format, also reading DataFrame
+    index values if known in the file metadata
+
+    Parameters
+    ----------
+    source : str, pyarrow.NativeFile, or file-like object
+        If a string passed, can be a single file name or directory name. For
+        file-like objects, only read a single file. Use pyarrow.BufferReader to
+        read a file contained in a bytes or buffer-like object.
+    columns : list
+        If not None, only these columns will be read from the file. A column
+        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
+        that the table will still have the correct num_rows set despite having
+        no columns.
+    use_threads : bool, default True
+        Perform multi-threaded column reads.
+    schema : Schema, optional
+        Optionally provide the Schema for the parquet dataset, in which case it
+        will not be inferred from the source.
+    read_dictionary : list, default None
+        List of names or column paths (for nested types) to read directly
+        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+        a flat column as dictionary-encoded pass the column name. For
+        nested types, you must pass the full column "path", which could be
+        something like level1.level2.list.item. Refer to the Parquet
+        file's schema to obtain the paths.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
+        The partitioning scheme for a partitioned dataset. The default of "hive"
+        assumes directory names with key=value pairs like "/year=2009/month=11".
+        In addition, a scheme like "/2009/11" is also supported, in which case
+        you need to specify the field names or a full schema. See the
+        ``pyarrow.dataset.partitioning()`` function for more details.
+    **kwargs
+        additional options for :func:`read_table`
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
+        Rows which do not match the filter predicate will be removed from scanned
+        data. Partition keys embedded in a nested directory structure will be
+        exploited to avoid loading files at all if they contain no matching rows.
+        Within-file level filtering and different partitioning schemes are supported.
+
+        Predicates are expressed using an ``Expression`` or using
+        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
+        DNF allows arbitrary boolean logical combinations of single column predicates.
+        The innermost tuples each describe a single column predicate. The list of inner
+        predicates is interpreted as a conjunction (AND), forming a more selective and
+        multiple column predicate. Finally, the most outer list combines these filters
+        as a disjunction (OR).
+
+        Predicates may also be passed as List[Tuple]. This form is interpreted
+        as a single conjunction. To express OR in predicates, one must
+        use the (preferred) List[List[Tuple]] notation.
+
+        Each tuple has format: (``key``, ``op``, ``value``) and compares the
+        ``key`` with the ``value``.
+        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
+        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
+        ``value`` must be a collection such as a ``list``, a ``set`` or a
+        ``tuple``.
+
+        Examples:
+
+        Using the ``Expression`` API:
+
+        .. code-block:: python
+
+            import pyarrow.compute as pc
+            pc.field('x') = 0
+            pc.field('y').isin(['a', 'b', 'c'])
+            ~pc.field('y').isin({'a', 'b'})
+
+        Using the DNF format:
+
+        .. code-block:: python
+
+            ("x", "=", 0)
+            ("y", "in", ["a", "b", "c"])
+            ("z", "not in", {"a", "b"})
+
+
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process.
+        This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+    pre_buffer : bool, default True
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool. If using a filesystem layer that itself
+        performs readahead (e.g. fsspec's S3FS), disable readahead for best
+        results.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    decryption_properties : FileDecryptionProperties or None
+        File-level decryption properties.
+        The decryption properties can be created using
+        ``CryptoFactory.file_decryption_properties()``.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    page_checksum_verification : bool, default False
+        If True, verify the checksum for each page read from the file.
+
+    Returns
+    -------
+    pyarrow.Table
+        Content of the file as a Table of Columns, including DataFrame
+        indexes as columns
+    """
+
+def write_table(
+    table: Table,
+    where: str | Path | NativeFile | IO,
+    row_group_size: int | None = None,
+    version: Literal["1.0", "2.4", "2.6"] = "2.6",
+    use_dictionary: bool = True,
+    compression: _Compression | dict[str, _Compression] = "snappy",
+    write_statistics: bool | list = True,
+    use_deprecated_int96_timestamps: bool | None = None,
+    coerce_timestamps: str | None = None,
+    allow_truncated_timestamps: bool = False,
+    data_page_size: int | None = None,
+    flavor: str | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    compression_level: int | dict | None = None,
+    use_byte_stream_split: bool = False,
+    column_encoding: str | dict | None = None,
+    data_page_version: Literal["1.0", "2.0"] = ...,
+    use_compliant_nested_type: bool = True,
+    encryption_properties: FileEncryptionProperties | None = None,
+    write_batch_size: int | None = None,
+    dictionary_pagesize_limit: int | None = None,
+    store_schema: bool = True,
+    write_page_index: bool = False,
+    write_page_checksum: bool = False,
+    sorting_columns: Sequence[SortingColumn] | None = None,
+    store_decimal_as_integer: bool = False,
+    **kwargs,
+) -> None:
+    """
+
+    Write a Table to Parquet format.
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+    where : string or pyarrow.NativeFile
+    row_group_size : int
+        Maximum number of rows in each written row group. If None, the
+        row group size will be the minimum of the Table size and
+        1024 * 1024.
+    version : {"1.0", "2.4", "2.6"}, default "2.6"
+        Determine which Parquet logical types are available for use, whether the
+        reduced set from the Parquet 1.x.x format or the expanded logical types
+        added in later format versions.
+        Files written with version='2.4' or '2.6' may not be readable in all
+        Parquet implementations, so version='1.0' is likely the choice that
+        maximizes file compatibility.
+        UINT32 and some logical types are only available with version '2.4'.
+        Nanosecond timestamps are only available with version '2.6'.
+        Other features such as compression algorithms or the new serialized
+        data page format must be enabled separately (see 'compression' and
+        'data_page_version').
+    use_dictionary : bool or list, default True
+        Specify if we should use dictionary encoding in general or only for
+        some columns.
+        When encoding the column, if the dictionary size is too large, the
+        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
+        doesn't support dictionary encoding.
+    compression : str or dict, default 'snappy'
+        Specify the compression codec, either on a general basis or per-column.
+        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
+    write_statistics : bool or list, default True
+        Specify if we should write statistics in general (default is True) or only
+        for some columns.
+    use_deprecated_int96_timestamps : bool, default None
+        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
+        by flavor argument. This take priority over the coerce_timestamps option.
+    coerce_timestamps : str, default None
+        Cast timestamps to a particular resolution. If omitted, defaults are chosen
+        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
+        nanoseconds are cast to microseconds ('us'), while for
+        ``version='2.6'`` (the default), they are written natively without loss
+        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
+        as Parquet does not have any temporal type with seconds resolution.
+        If the casting results in loss of data, it will raise an exception
+        unless ``allow_truncated_timestamps=True`` is given.
+        Valid values: {None, 'ms', 'us'}
+    allow_truncated_timestamps : bool, default False
+        Allow loss of data when coercing timestamps to a particular
+        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
+        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
+        will NOT result in the truncation exception being ignored unless
+        ``coerce_timestamps`` is not None.
+    data_page_size : int, default None
+        Set a target threshold for the approximate encoded size of data
+        pages within a column chunk (in bytes). If None, use the default data page
+        size of 1MByte.
+    flavor : {'spark'}, default None
+        Sanitize schema or set other compatibility options to work with
+        various target systems.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred from `where` if path-like, else
+        `where` is already a file-like object so no filesystem is needed.
+    compression_level : int or dict, default None
+        Specify the compression level for a codec, either on a general basis or
+        per-column. If None is passed, arrow selects the compression level for
+        the compression codec in use. The compression level has a different
+        meaning for each codec, so you have to read the documentation of the
+        codec you are using.
+        An exception is thrown if the compression codec does not allow specifying
+        a compression level.
+    use_byte_stream_split : bool or list, default False
+        Specify if the byte_stream_split encoding should be used in general or
+        only for some columns. If both dictionary and byte_stream_stream are
+        enabled, then dictionary is preferred.
+        The byte_stream_split encoding is valid for integer, floating-point
+        and fixed-size binary data types (including decimals); it should be
+        combined with a compression codec so as to achieve size reduction.
+    column_encoding : string or dict, default None
+        Specify the encoding scheme on a per column basis.
+        Can only be used when ``use_dictionary`` is set to False, and
+        cannot be used in combination with ``use_byte_stream_split``.
+        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
+        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
+        Certain encodings are only compatible with certain data types.
+        Please refer to the encodings section of `Reading and writing Parquet
+        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
+    data_page_version : {"1.0", "2.0"}, default "1.0"
+        The serialized Parquet data page format version to write, defaults to
+        1.0. This does not impact the file schema logical types and Arrow to
+        Parquet type casting behavior; for that use the "version" option.
+    use_compliant_nested_type : bool, default True
+        Whether to write compliant Parquet nested type (lists) as defined
+        `here <https://github.com/apache/parquet-format/blob/master/
+        LogicalTypes.md#nested-types>`_, defaults to ``True``.
+        For ``use_compliant_nested_type=True``, this will write into a list
+        with 3-level structure where the middle level, named ``list``,
+        is a repeated group with a single field named ``element``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                      <element-repetition> <element-type> element;
+                }
+            }
+
+        For ``use_compliant_nested_type=False``, this will also write into a list
+        with 3-level structure, where the name of the single field of the middle
+        level ``list`` is taken from the element name for nested columns in Arrow,
+        which defaults to ``item``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                    <element-repetition> <element-type> item;
+                }
+            }
+    encryption_properties : FileEncryptionProperties, default None
+        File encryption properties for Parquet Modular Encryption.
+        If None, no encryption will be done.
+        The encryption properties can be created using:
+        ``CryptoFactory.file_encryption_properties()``.
+    write_batch_size : int, default None
+        Number of values to write to a page at a time. If None, use the default of
+        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
+        are exceeding the ``data_page_size`` due to large column values, lowering
+        the batch size can help keep page sizes closer to the intended size.
+    dictionary_pagesize_limit : int, default None
+        Specify the dictionary page size limit per row group. If None, use the
+        default 1MB.
+    store_schema : bool, default True
+        By default, the Arrow schema is serialized and stored in the Parquet
+        file metadata (in the "ARROW:schema" key). When reading the file,
+        if this key is available, it will be used to more faithfully recreate
+        the original Arrow data. For example, for tz-aware timestamp columns
+        it will restore the timezone (Parquet only stores the UTC values without
+        timezone), or columns with duration type will be restored from the int64
+        Parquet column.
+    write_page_index : bool, default False
+        Whether to write a page index in general for all columns.
+        Writing statistics to the page index disables the old method of writing
+        statistics to each data page header. The page index makes statistics-based
+        filtering more efficient than the page header, as it gathers all the
+        statistics for a Parquet file in a single place, avoiding scattered I/O.
+        Note that the page index is not yet used on the read size by PyArrow.
+    write_page_checksum : bool, default False
+        Whether to write page checksums in general for all columns.
+        Page checksums enable detection of data corruption, which might occur during
+        transmission or in the storage.
+    sorting_columns : Sequence of SortingColumn, default None
+        Specify the sort order of the data being written. The writer does not sort
+        the data nor does it verify that the data is sorted. The sort order is
+        written to the row group metadata, which can then be used by readers.
+    store_decimal_as_integer : bool, default False
+        Allow decimals with 1 <= precision <= 18 to be stored as integers.
+        In Parquet, DECIMAL can be stored in any of the following physical types:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: precision is limited by the array size.
+          Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
+        - binary: precision is unlimited. The minimum number of bytes to store the
+          unscaled value is used.
+
+        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
+        When enabled, the writer will use the following physical types to store decimals:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: for precision > 18.
+
+        As a consequence, decimal columns stored in integer types are more compact.
+
+    **kwargs : optional
+        Additional options for ParquetWriter
+
+    Examples
+    --------
+    Generate an example PyArrow Table:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    and write the Table into Parquet file:
+
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_table(table, "example.parquet")
+
+    Defining row group size for the Parquet file:
+
+    >>> pq.write_table(table, "example.parquet", row_group_size=3)
+
+    Defining row group compression (default is Snappy):
+
+    >>> pq.write_table(table, "example.parquet", compression="none")
+
+    Defining row group compression and encoding per-column:
+
+    >>> pq.write_table(
+    ...     table,
+    ...     "example.parquet",
+    ...     compression={"n_legs": "snappy", "animal": "gzip"},
+    ...     use_dictionary=["n_legs", "animal"],
+    ... )
+
+    Defining column encoding per-column:
+
+    >>> pq.write_table(
+    ...     table, "example.parquet", column_encoding={"animal": "PLAIN"}, use_dictionary=False
+    ... )
+    """
+
+def write_to_dataset(
+    table: Table,
+    root_path: str | Path,
+    partition_cols: list[str] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    schema: Schema | None = None,
+    partitioning: Partitioning | list[str] | None = None,
+    basename_template: str | None = None,
+    use_threads: bool | None = None,
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior: Literal["overwrite_or_ignore", "error", "delete_matching"]
+    | None = None,
+    **kwargs,
+) -> None:
+    """
+    Wrapper around dataset.write_dataset for writing a Table to
+    Parquet format by partitions.
+    For each combination of partition columns and values,
+    a subdirectories are created in the following
+        manner:
+
+        root_dir/
+          group1=value1
+            group2=value1
+              <uuid>.parquet
+            group2=value2
+              <uuid>.parquet
+          group1=valueN
+            group2=value1
+              <uuid>.parquet
+            group2=valueN
+              <uuid>.parquet
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+    root_path : str, pathlib.Path
+        The root directory of the dataset.
+    partition_cols : list,
+        Column names by which to partition the dataset.
+        Columns are partitioned in the order they are given.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    schema : Schema, optional
+        This Schema of the dataset.
+    partitioning : Partitioning or list[str], optional
+        The partitioning scheme specified with the
+        ``pyarrow.dataset.partitioning()`` function or a list of field names.
+        When providing a list of field names, you can use
+        ``partitioning_flavor`` to drive which partitioning type should be
+        used.
+    basename_template : str, optional
+        A template string used to generate basenames of written data files.
+        The token '{i}' will be replaced with an automatically incremented
+        integer. If not specified, it defaults to "guid-{i}.parquet".
+    use_threads : bool, default True
+        Write files in parallel. If enabled, then maximum parallelism will be
+        used determined by the number of available CPU cores.
+    file_visitor : function
+        If set, this function will be called with a WrittenFile instance
+        for each file created during the call.  This object will have both
+        a path attribute and a metadata attribute.
+
+        The path attribute will be a string containing the path to
+        the created file.
+
+        The metadata attribute will be the parquet metadata of the file.
+        This metadata will have the file path attribute set and can be used
+        to build a _metadata file.  The metadata attribute will be None if
+        the format is not parquet.
+
+        Example visitor which simple collects the filenames created::
+
+            visited_paths = []
+
+            def file_visitor(written_file):
+                visited_paths.append(written_file.path)
+
+    existing_data_behavior : 'overwrite_or_ignore' | 'error' | 'delete_matching'
+        Controls how the dataset will handle data that already exists in
+        the destination. The default behaviour is 'overwrite_or_ignore'.
+
+        'overwrite_or_ignore' will ignore any existing data and will
+        overwrite files with the same name as an output file.  Other
+        existing files will be ignored.  This behavior, in combination
+        with a unique basename_template for each write, will allow for
+        an append workflow.
+
+        'error' will raise an error if any data exists in the destination.
+
+        'delete_matching' is useful when you are writing a partitioned
+        dataset.  The first time each partition directory is encountered
+        the entire directory will be deleted.  This allows you to overwrite
+        old partitions completely.
+    **kwargs : dict,
+        Used as additional kwargs for :func:`pyarrow.dataset.write_dataset`
+        function for matching kwargs, and remainder to
+        :func:`pyarrow.dataset.ParquetFileFormat.make_write_options`.
+        See the docstring of :func:`write_table` and
+        :func:`pyarrow.dataset.write_dataset` for the available options.
+        Using `metadata_collector` in kwargs allows one to collect the
+        file metadata instances of dataset pieces. The file paths in the
+        ColumnChunkMetaData will be set relative to `root_path`.
+
+    Examples
+    --------
+    Generate an example PyArrow Table:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    and write it to a partitioned dataset:
+
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, root_path="dataset_name_3", partition_cols=["year"])
+    >>> pq.ParquetDataset("dataset_name_3").files
+    ['dataset_name_3/year=2019/...-0.parquet', ...
+
+    Write a single Parquet file into the root folder:
+
+    >>> pq.write_to_dataset(table, root_path="dataset_name_4")
+    >>> pq.ParquetDataset("dataset_name_4/").files
+    ['dataset_name_4/...-0.parquet']
+    """
+
+def write_metadata(
+    schema: Schema,
+    where: str | NativeFile,
+    metadata_collector: list[FileMetaData] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    **kwargs,
+) -> None:
+    """
+    Write metadata-only Parquet file from schema. This can be used with
+    `write_to_dataset` to generate `_common_metadata` and `_metadata` sidecar
+    files.
+
+    Parameters
+    ----------
+    schema : pyarrow.Schema
+    where : string or pyarrow.NativeFile
+    metadata_collector : list
+        where to collect metadata information.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred from `where` if path-like, else
+        `where` is already a file-like object so no filesystem is needed.
+    **kwargs : dict,
+        Additional kwargs for ParquetWriter class. See docstring for
+        `ParquetWriter` for more information.
+
+    Examples
+    --------
+    Generate example data:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    Write a dataset and collect metadata information.
+
+    >>> metadata_collector = []
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, "dataset_metadata", metadata_collector=metadata_collector)
+
+    Write the `_common_metadata` parquet file without row groups statistics.
+
+    >>> pq.write_metadata(table.schema, "dataset_metadata/_common_metadata")
+
+    Write the `_metadata` parquet file with row groups statistics.
+
+    >>> pq.write_metadata(
+    ...     table.schema, "dataset_metadata/_metadata", metadata_collector=metadata_collector
+    ... )
+    """
+
+def read_metadata(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> FileMetaData:
+    """
+    Read FileMetaData from footer of a single Parquet file.
+
+    Parameters
+    ----------
+    where : str (file path) or file-like object
+    memory_map : bool, default False
+        Create memory map when the source is a file path.
+    decryption_properties : FileDecryptionProperties, default None
+        Decryption properties for reading encrypted Parquet files.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+
+    Returns
+    -------
+    metadata : FileMetaData
+        The metadata of the Parquet file
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.parquet as pq
+    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
+    >>> pq.write_table(table, "example.parquet")
+
+    >>> pq.read_metadata("example.parquet")
+    <pyarrow._parquet.FileMetaData object at ...>
+      created_by: parquet-cpp-arrow version ...
+      num_columns: 2
+      num_rows: 3
+      num_row_groups: 1
+      format_version: 2.6
+      serialized_size: ...
+    """
+
+def read_schema(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> Schema:
+    """
+    Read effective Arrow schema from Parquet file metadata.
+
+    Parameters
+    ----------
+    where : str (file path) or file-like object
+    memory_map : bool, default False
+        Create memory map when the source is a file path.
+    decryption_properties : FileDecryptionProperties, default None
+        Decryption properties for reading encrypted Parquet files.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+
+    Returns
+    -------
+    schema : pyarrow.Schema
+        The schema of the Parquet file
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.parquet as pq
+    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
+    >>> pq.write_table(table, "example.parquet")
+
+    >>> pq.read_schema("example.parquet")
+    n_legs: int64
+    animal: string
+    """
diff --git a/python/pyarrow/parquet/encryption.pyi b/python/pyarrow/parquet/encryption.pyi
new file mode 100644
index 00000000000..5a77dae7ef7
--- /dev/null
+++ b/python/pyarrow/parquet/encryption.pyi
@@ -0,0 +1,15 @@
+from pyarrow._parquet_encryption import (
+    CryptoFactory,
+    DecryptionConfiguration,
+    EncryptionConfiguration,
+    KmsClient,
+    KmsConnectionConfig,
+)
+
+__all__ = [
+    "CryptoFactory",
+    "DecryptionConfiguration",
+    "EncryptionConfiguration",
+    "KmsClient",
+    "KmsConnectionConfig",
+]
diff --git a/python/pyarrow/substrait.pyi b/python/pyarrow/substrait.pyi
new file mode 100644
index 00000000000..a56a8a5b40f
--- /dev/null
+++ b/python/pyarrow/substrait.pyi
@@ -0,0 +1,21 @@
+from pyarrow._substrait import (
+    BoundExpressions,
+    SubstraitSchema,
+    deserialize_expressions,
+    deserialize_schema,
+    get_supported_functions,
+    run_query,
+    serialize_expressions,
+    serialize_schema,
+)
+
+__all__ = [
+    "BoundExpressions",
+    "get_supported_functions",
+    "run_query",
+    "deserialize_expressions",
+    "serialize_expressions",
+    "deserialize_schema",
+    "serialize_schema",
+    "SubstraitSchema",
+]
diff --git a/python/pyarrow/types.pyi b/python/pyarrow/types.pyi
new file mode 100644
index 00000000000..0cb4f6171d3
--- /dev/null
+++ b/python/pyarrow/types.pyi
@@ -0,0 +1,194 @@
+import sys
+
+from typing import Any
+
+if sys.version_info >= (3, 13):
+    from typing import TypeIs
+else:
+    from typing_extensions import TypeIs
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from pyarrow.lib import (
+    BinaryType,
+    BinaryViewType,
+    BoolType,
+    DataType,
+    Date32Type,
+    Date64Type,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    DenseUnionType,
+    DictionaryType,
+    DurationType,
+    FixedSizeBinaryType,
+    FixedSizeListType,
+    Float16Type,
+    Float32Type,
+    Float64Type,
+    Int8Type,
+    Int16Type,
+    Int32Type,
+    Int64Type,
+    LargeBinaryType,
+    LargeListType,
+    LargeListViewType,
+    LargeStringType,
+    ListType,
+    ListViewType,
+    MapType,
+    MonthDayNanoIntervalType,
+    NullType,
+    RunEndEncodedType,
+    SparseUnionType,
+    StringType,
+    StringViewType,
+    StructType,
+    Time32Type,
+    Time64Type,
+    TimestampType,
+    UInt8Type,
+    UInt16Type,
+    Uint32Type,
+    UInt64Type,
+)
+
+_SignedInteger: TypeAlias = Int8Type | Int16Type | Int32Type | Int64Type
+_UnsignedInteger: TypeAlias = UInt8Type | UInt16Type | Uint32Type | UInt64Type
+_Integer: TypeAlias = _SignedInteger | _UnsignedInteger
+_Floating: TypeAlias = Float16Type | Float32Type | Float64Type
+_Decimal: TypeAlias = (
+    Decimal32Type[Any, Any]
+    | Decimal64Type[Any, Any]
+    | Decimal128Type[Any, Any]
+    | Decimal256Type[Any, Any]
+)
+_Date: TypeAlias = Date32Type | Date64Type
+_Time: TypeAlias = Time32Type[Any] | Time64Type[Any]
+_Interval: TypeAlias = MonthDayNanoIntervalType
+_Temporal: TypeAlias = TimestampType[Any, Any] | DurationType[Any] | _Time | _Date | _Interval
+_Union: TypeAlias = SparseUnionType | DenseUnionType
+_Nested: TypeAlias = (
+    ListType[Any]
+    | FixedSizeListType[Any, Any]
+    | LargeListType[Any]
+    | ListViewType[Any]
+    | LargeListViewType[Any]
+    | StructType
+    | MapType[Any, Any, Any]
+    | _Union
+)
+
+def is_null(t: DataType) -> TypeIs[NullType]: ...
+def is_boolean(t: DataType) -> TypeIs[BoolType]: ...
+def is_integer(t: DataType) -> TypeIs[_Integer]: ...
+def is_signed_integer(t: DataType) -> TypeIs[_SignedInteger]: ...
+def is_unsigned_integer(t: DataType) -> TypeIs[_UnsignedInteger]: ...
+def is_int8(t: DataType) -> TypeIs[Int8Type]: ...
+def is_int16(t: DataType) -> TypeIs[Int16Type]: ...
+def is_int32(t: DataType) -> TypeIs[Int32Type]: ...
+def is_int64(t: DataType) -> TypeIs[Int64Type]: ...
+def is_uint8(t: DataType) -> TypeIs[UInt8Type]: ...
+def is_uint16(t: DataType) -> TypeIs[UInt16Type]: ...
+def is_uint32(t: DataType) -> TypeIs[Uint32Type]: ...
+def is_uint64(t: DataType) -> TypeIs[UInt64Type]: ...
+def is_floating(t: DataType) -> TypeIs[_Floating]: ...
+def is_float16(t: DataType) -> TypeIs[Float16Type]: ...
+def is_float32(t: DataType) -> TypeIs[Float32Type]: ...
+def is_float64(t: DataType) -> TypeIs[Float64Type]: ...
+def is_list(t: DataType) -> TypeIs[ListType[Any]]: ...
+def is_large_list(t: DataType) -> TypeIs[LargeListType[Any]]: ...
+def is_fixed_size_list(t: DataType) -> TypeIs[FixedSizeListType[Any, Any]]: ...
+def is_list_view(t: DataType) -> TypeIs[ListViewType[Any]]: ...
+def is_large_list_view(t: DataType) -> TypeIs[LargeListViewType[Any]]: ...
+def is_struct(t: DataType) -> TypeIs[StructType]: ...
+def is_union(t: DataType) -> TypeIs[_Union]: ...
+def is_nested(t: DataType) -> TypeIs[_Nested]: ...
+def is_run_end_encoded(t: DataType) -> TypeIs[RunEndEncodedType[Any, Any]]: ...
+def is_temporal(t: DataType) -> TypeIs[_Temporal]: ...
+def is_timestamp(t: DataType) -> TypeIs[TimestampType[Any, Any]]: ...
+def is_duration(t: DataType) -> TypeIs[DurationType[Any]]: ...
+def is_time(t: DataType) -> TypeIs[_Time]: ...
+def is_time32(t: DataType) -> TypeIs[Time32Type[Any]]: ...
+def is_time64(t: DataType) -> TypeIs[Time64Type[Any]]: ...
+def is_binary(t: DataType) -> TypeIs[BinaryType]: ...
+def is_large_binary(t: DataType) -> TypeIs[LargeBinaryType]: ...
+def is_unicode(t: DataType) -> TypeIs[StringType]: ...
+def is_string(t: DataType) -> TypeIs[StringType]: ...
+def is_large_unicode(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_large_string(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_fixed_size_binary(t: DataType) -> TypeIs[FixedSizeBinaryType]: ...
+def is_binary_view(t: DataType) -> TypeIs[BinaryViewType]: ...
+def is_string_view(t: DataType) -> TypeIs[StringViewType]: ...
+def is_date(t: DataType) -> TypeIs[_Date]: ...
+def is_date32(t: DataType) -> TypeIs[Date32Type]: ...
+def is_date64(t: DataType) -> TypeIs[Date64Type]: ...
+def is_map(t: DataType) -> TypeIs[MapType[Any, Any, Any]]: ...
+def is_decimal(t: DataType) -> TypeIs[_Decimal]: ...
+def is_decimal32(t: DataType) -> TypeIs[Decimal32Type[Any, Any]]: ...
+def is_decimal64(t: DataType) -> TypeIs[Decimal64Type[Any, Any]]: ...
+def is_decimal128(t: DataType) -> TypeIs[Decimal128Type[Any, Any]]: ...
+def is_decimal256(t: DataType) -> TypeIs[Decimal256Type[Any, Any]]: ...
+def is_dictionary(t: DataType) -> TypeIs[DictionaryType[Any, Any, Any]]: ...
+def is_interval(t: DataType) -> TypeIs[_Interval]: ...
+def is_primitive(t: DataType) -> bool: ...
+
+__all__ = [
+    "is_binary",
+    "is_binary_view",
+    "is_boolean",
+    "is_date",
+    "is_date32",
+    "is_date64",
+    "is_decimal",
+    "is_decimal128",
+    "is_decimal256",
+    "is_decimal32",
+    "is_decimal64",
+    "is_dictionary",
+    "is_duration",
+    "is_fixed_size_binary",
+    "is_fixed_size_list",
+    "is_float16",
+    "is_float32",
+    "is_float64",
+    "is_floating",
+    "is_int16",
+    "is_int32",
+    "is_int64",
+    "is_int8",
+    "is_integer",
+    "is_interval",
+    "is_large_binary",
+    "is_large_list",
+    "is_large_list_view",
+    "is_large_string",
+    "is_large_unicode",
+    "is_list",
+    "is_list_view",
+    "is_map",
+    "is_nested",
+    "is_null",
+    "is_primitive",
+    "is_run_end_encoded",
+    "is_signed_integer",
+    "is_string",
+    "is_string_view",
+    "is_struct",
+    "is_temporal",
+    "is_time",
+    "is_time32",
+    "is_time64",
+    "is_timestamp",
+    "is_uint16",
+    "is_uint32",
+    "is_uint64",
+    "is_uint8",
+    "is_unicode",
+    "is_union",
+    "is_unsigned_integer",
+]
diff --git a/python/pyarrow/util.pyi b/python/pyarrow/util.pyi
new file mode 100644
index 00000000000..c2ecf7d6b61
--- /dev/null
+++ b/python/pyarrow/util.pyi
@@ -0,0 +1,27 @@
+from collections.abc import Callable
+from os import PathLike
+from typing import Any, Protocol, Sequence, TypeVar
+
+_F = TypeVar("_F", bound=Callable)
+_N = TypeVar("_N")
+
+class _DocStringComponents(Protocol):
+    _docstring_components: list[str]
+
+def doc(
+    *docstrings: str | _DocStringComponents | Callable | None, **params: Any
+) -> Callable[[_F], _F]: ...
+def _is_iterable(obj) -> bool: ...
+def _is_path_like(path) -> bool: ...
+def _stringify_path(path: str | PathLike) -> str: ...
+def product(seq: Sequence[_N]) -> _N: ...
+def get_contiguous_span(
+    shape: tuple[int, ...], strides: tuple[int, ...], itemsize: int
+) -> tuple[int, int]: ...
+def find_free_port() -> int: ...
+def guid() -> str: ...
+def _download_urllib(url, out_path) -> None: ...
+def _download_requests(url, out_path) -> None: ...
+def download_tzdata_on_windows() -> None: ...
+def _deprecate_api(old_name, new_name, api, next_version, type=...): ...
+def _deprecate_class(old_name, new_class, next_version, instancecheck=True): ...

From 881f4b9c2c6acaf6d1bd0bf5deb08a7f960b4add Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 19 Aug 2025 13:14:57 +0200
Subject: [PATCH 02/26] Fix test_compute.py

---
 python/pyarrow/tests/test_compute.py | 323 +++++++++++++++------------
 python/pyproject.toml                |  13 ++
 2 files changed, 199 insertions(+), 137 deletions(-)

diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 5441dd493d3..7820111b70f 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -28,31 +28,17 @@
 import sys
 import textwrap
 
-try:
-    import numpy as np
-except ImportError:
-    np = None
-
-try:
-    import pandas as pd
-except ImportError:
-    pd = None
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.lib import ArrowNotImplementedError
-
-try:
-    import pyarrow.substrait as pas
-except ImportError:
-    pas = None
+from pyarrow.lib import ArrowNotImplementedError, ArrowTypeError
 
 exported_functions = [
-    func for (name, func) in sorted(pc.__dict__.items())
+    func for (_, func) in sorted(pc.__dict__.items())
     if hasattr(func, '__arrow_compute_function__')]
 
 exported_option_classes = [
-    cls for (name, cls) in sorted(pc.__dict__.items())
+    cls for (_, cls) in sorted(pc.__dict__.items())
     if (isinstance(cls, type) and
         cls is not pc.FunctionOptions and
         issubclass(cls, pc.FunctionOptions))]
@@ -217,7 +203,7 @@ def test_option_class_equality(request):
             and cls != pc.AssumeTimezoneOptions
         ):
             try:
-                options.append(cls())
+                options.append(cls())  # type: ignore[reportArgumentType]
             except TypeError:
                 pytest.fail(f"Options class is not tested: {cls}")
 
@@ -276,6 +262,8 @@ def test_get_function_hash_aggregate():
 
 @pytest.mark.numpy
 def test_call_function_with_memory_pool():
+    import numpy as np
+
     arr = pa.array(["foo", "bar", "baz"])
     indices = np.array([2, 2, 1])
     result1 = arr.take(indices)
@@ -799,11 +787,11 @@ def test_min_max():
     # Wrong options type
     options = pc.TakeOptions()
     with pytest.raises(TypeError):
-        s = pc.min_max(data, options=options)
+        s = pc.min_max(data, options=options)  # type: ignore[reportCallIssue]
 
     # Missing argument
     with pytest.raises(TypeError, match="min_max takes 1 positional"):
-        s = pc.min_max()
+        s = pc.min_max()  # type: ignore[reportCallIssue]
 
 
 def test_any():
@@ -854,11 +842,11 @@ def test_all():
 
 def test_is_valid():
     # An example generated function wrapper without options
-    data = [4, 5, None]
+    data = pa.array([4, 5, None])
     assert pc.is_valid(data).to_pylist() == [True, True, False]
 
     with pytest.raises(TypeError):
-        pc.is_valid(data, options=None)
+        pc.is_valid(data, options=None)  # type: ignore[no-matching-overload]
 
 
 def test_generated_docstrings():
@@ -1037,21 +1025,6 @@ def find_new_unicode_codepoints():
     0x2097, 0x2098, 0x2099, 0x209a, 0x209b, 0x209c,
     0x2c7c, 0x2c7d, 0xa69c, 0xa69d, 0xa770, 0xa7f8,
     0xa7f9, 0xab5c, 0xab5d, 0xab5e, 0xab5f, }
-# utf8proc does not store if a codepoint is numeric
-numeric_info_missing = {
-    0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
-    0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
-    0x4ebf, 0x4ec0, 0x4edf, 0x4ee8, 0x4f0d, 0x4f70,
-    0x5104, 0x5146, 0x5169, 0x516b, 0x516d, 0x5341,
-    0x5343, 0x5344, 0x5345, 0x534c, 0x53c1, 0x53c2,
-    0x53c3, 0x53c4, 0x56db, 0x58f1, 0x58f9, 0x5e7a,
-    0x5efe, 0x5eff, 0x5f0c, 0x5f0d, 0x5f0e, 0x5f10,
-    0x62fe, 0x634c, 0x67d2, 0x6f06, 0x7396, 0x767e,
-    0x8086, 0x842c, 0x8cae, 0x8cb3, 0x8d30, 0x9621,
-    0x9646, 0x964c, 0x9678, 0x96f6, 0xf96b, 0xf973,
-    0xf978, 0xf9b2, 0xf9d1, 0xf9d3, 0xf9fd, 0x10fc5,
-    0x10fc6, 0x10fc7, 0x10fc8, 0x10fc9, 0x10fca,
-    0x10fcb, }
 # utf8proc has no no digit/numeric information
 digit_info_missing = {
     0xb2, 0xb3, 0xb9, 0x1369, 0x136a, 0x136b, 0x136c,
@@ -1070,6 +1043,7 @@ def find_new_unicode_codepoints():
     0x278f, 0x2790, 0x2791, 0x2792, 0x10a40, 0x10a41,
     0x10a42, 0x10a43, 0x10e60, 0x10e61, 0x10e62, 0x10e63,
     0x10e64, 0x10e65, 0x10e66, 0x10e67, 0x10e68, }
+# utf8proc does not store if a codepoint is numeric
 numeric_info_missing = {
     0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
     0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
@@ -1104,7 +1078,8 @@ def test_string_py_compat_boolean(function_name, variant):
     py_name = function_name.replace('_', '')
     ignore = codepoints_ignore.get(function_name, set()) | \
         find_new_unicode_codepoints()
-    for i in range(128 if ascii else 0x11000):
+    for i in range(128 if ascii  # type: ignore[reportUnnecessaryComparison]
+                   else 0x11000):
         if i in range(0xD800, 0xE000):
             continue  # bug? pyarrow doesn't allow utf16 surrogates
         # the issues we know of, we skip
@@ -1170,6 +1145,8 @@ def test_utf8_zfill():
 
 @pytest.mark.pandas
 def test_replace_slice():
+    import numpy as np
+
     offsets = range(-3, 4)
 
     arr = pa.array([None, '', 'a', 'ab', 'abc', 'abcd', 'abcde'])
@@ -1246,6 +1223,7 @@ def test_binary_join():
     expected = pa.array(['a1b', 'c2d'], type=pa.binary())
     ar_list = pa.array([['a', 'b'], ['c', 'd']], type=pa.list_(pa.binary()))
     assert pc.binary_join(ar_list, separator_array).equals(expected)
+    assert expected.equals(pc.binary_join(ar_list, separator_array))
 
 
 def test_binary_join_element_wise():
@@ -1309,7 +1287,8 @@ def test_take_indices_types():
 
     for indices_type in ['uint8', 'int8', 'uint16', 'int16',
                          'uint32', 'int32', 'uint64', 'int64']:
-        indices = pa.array([0, 4, 2, None], type=indices_type)
+        indices = pa.array(
+            [0, 4, 2, None], type=indices_type)  # type: ignore[reportArgumentType]
         result = arr.take(indices)
         result.validate()
         expected = pa.array([0, 4, 2, None])
@@ -1318,7 +1297,7 @@ def test_take_indices_types():
     for indices_type in [pa.float32(), pa.float64()]:
         indices = pa.array([0, 4, 2], type=indices_type)
         with pytest.raises(NotImplementedError):
-            arr.take(indices)
+            arr.take(indices)  # type: ignore[reportArgumentType]
 
 
 def test_take_on_chunked_array():
@@ -1486,6 +1465,8 @@ def test_filter(ty, values):
 @pytest.mark.numpy
 @pytest.mark.parametrize(('ty', 'values'), all_array_types)
 def test_filter_numpy_array_mask(ty, values):
+    import numpy as np
+
     arr = pa.array(values, type=ty)
     # same test as test_filter with different array type
     mask = np.array([True, False, False, True, None])
@@ -1562,7 +1543,7 @@ def test_filter_errors():
         # non-boolean dtype
         mask = pa.array([0, 1, 0, 1, 0])
         with pytest.raises(NotImplementedError):
-            obj.filter(mask)
+            obj.filter(mask)  # type: ignore[reportArgumentType]
 
         # wrong length
         mask = pa.array([True, False, True])
@@ -1573,7 +1554,7 @@ def test_filter_errors():
     scalar = pa.scalar(True)
     for filt in [batch, table, scalar]:
         with pytest.raises(TypeError):
-            table.filter(filt)
+            table.filter(filt)  # type: ignore[reportArgumentType]
 
 
 def test_filter_null_type():
@@ -1592,11 +1573,10 @@ def test_filter_null_type():
 
 @pytest.mark.parametrize("typ", ["array", "chunked_array"])
 def test_compare_array(typ):
-    if typ == "array":
-        def con(values):
+    def con(values):
+        if typ == "array":
             return pa.array(values)
-    else:
-        def con(values):
+        else:
             return pa.chunked_array([values])
 
     arr1 = con([1, 2, 3, 4, None])
@@ -1623,11 +1603,10 @@ def con(values):
 
 @pytest.mark.parametrize("typ", ["array", "chunked_array"])
 def test_compare_string_scalar(typ):
-    if typ == "array":
-        def con(values):
+    def con(values):
+        if typ == "array":
             return pa.array(values)
-    else:
-        def con(values):
+        else:
             return pa.chunked_array([values])
 
     arr = con(['a', 'b', 'c', None])
@@ -1660,11 +1639,10 @@ def con(values):
 
 @pytest.mark.parametrize("typ", ["array", "chunked_array"])
 def test_compare_scalar(typ):
-    if typ == "array":
-        def con(values):
+    def con(values):
+        if typ == "array":
             return pa.array(values)
-    else:
-        def con(values):
+        else:
             return pa.chunked_array([values])
 
     arr = con([1, 2, 3, None])
@@ -1757,14 +1735,17 @@ def test_round_to_integer(ty):
         "half_to_odd": [3, 3, 4, 5, -3, -3, -4, None],
     }
     for round_mode, expected in rmode_and_expected.items():
-        options = RoundOptions(round_mode=round_mode)
-        result = round(values, options=options)
+        options = RoundOptions(  # type: ignore[reportPossiblyUnboundVariable]
+            round_mode=round_mode)  # type: ignore[reportArgumentType]
+        result = round(values, options=options)  # type: ignore[reportArgumentType]
         expected_array = pa.array(expected, type=pa.float64())
         assert expected_array.equals(result)
 
 
 @pytest.mark.numpy
 def test_round():
+    import numpy as np
+
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     ndigits_and_expected = {
         -2: [300, 0, 0, 0, -0, -0, -0, None],
@@ -1784,6 +1765,8 @@ def test_round():
 
 @pytest.mark.numpy
 def test_round_to_multiple():
+    import numpy as np
+
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     multiple_and_expected = {
         0.05: [320, 3.5, 3.1, 4.5, -3.2, -35.1, -3.05, None],
@@ -1807,7 +1790,8 @@ def test_round_to_multiple():
 
     for multiple in [object, 99999999999999999999999]:
         with pytest.raises(TypeError, match="is not a valid multiple type"):
-            pc.round_to_multiple(values, multiple=multiple)
+            pc.round_to_multiple(
+                values, multiple=multiple)  # type: ignore[reportArgumentType]
 
 
 def test_round_binary():
@@ -1878,7 +1862,7 @@ def test_fill_null():
     fill_value = pa.array([5], type=pa.int8())
     with pytest.raises(pa.ArrowInvalid,
                        match="Array arguments must all be the same length"):
-        arr.fill_null(fill_value)
+        arr.fill_null(fill_value)  # type: ignore[reportArgumentType]
 
     arr = pa.array([None, None, None, None], type=pa.null())
     fill_value = pa.scalar(None, type=pa.null())
@@ -2075,14 +2059,16 @@ def test_fsl_to_fsl_cast(value_type):
     # Different sized FSL
     cast_type = pa.list_(pa.field("element", value_type), 3)
     err_msg = 'Size of FixedSizeList is not the same.'
-    with pytest.raises(pa.lib.ArrowTypeError, match=err_msg):
+    with pytest.raises(ArrowTypeError, match=err_msg):
         fsl.cast(cast_type)
 
 
 DecimalTypeTraits = namedtuple('DecimalTypeTraits',
+                               # type: ignore[reportUntypedNamedTuple]
                                ('name', 'factory', 'max_precision'))
 
 FloatToDecimalCase = namedtuple('FloatToDecimalCase',
+                                # type: ignore[reportUntypedNamedTuple]
                                 ('precision', 'scale', 'float_val'))
 
 decimal_type_traits = [DecimalTypeTraits('decimal32', pa.decimal32, 9),
@@ -2095,6 +2081,8 @@ def largest_scaled_float_not_above(val, scale):
     """
     Find the largest float f such as `f * 10**scale <= val`
     """
+    import numpy as np
+
     assert val >= 0
     assert scale >= 0
     float_val = float(val) / 10**scale
@@ -2155,7 +2143,7 @@ def random_float_to_decimal_cast_cases(float_ty, max_precision):
     r = random.Random(42)
     for precision in range(1, max_precision, 6):
         for scale in range(0, precision, 4):
-            for i in range(20):
+            for _ in range(20):
                 unscaled = r.randrange(0, 10**precision)
                 float_val = scaled_float(unscaled, scale)
                 assert float_val * 10**scale < 10**precision
@@ -2212,6 +2200,8 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits):
     """
     Test float-to-decimal conversion against exactly generated values.
     """
+    import numpy as np
+
     r = random.Random(43)
     np_float_ty = {
         pa.float32(): np.float32,
@@ -2252,10 +2242,13 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits):
             float_exp = (-mantissa_bits +
                          math.floor(math.log2(10**(precision - scale))))
             assert float_exp_min <= float_exp <= float_exp_max
-            for i in range(5):
+            for _ in range(5):
                 mantissa = r.randrange(0, 2**mantissa_bits)
-                float_val = np.ldexp(np_float_ty(mantissa), float_exp)
-                assert isinstance(float_val, np_float_ty)
+                float_val = np.ldexp(
+                    np_float_ty(mantissa), float_exp
+                )
+                assert isinstance(
+                    float_val, np_float_ty)  # type: ignore[reportArgumentType]
                 # Make sure we compute the exact expected value and
                 # round by half-to-even when converting to the expected precision.
                 if float_exp >= 0:
@@ -2301,6 +2294,8 @@ def test_strptime():
 @pytest.mark.pandas
 @pytest.mark.timezone_data
 def test_strftime():
+    import pandas as pd
+
     times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
     timezones = ["CET", "UTC", "Europe/Ljubljana"]
 
@@ -2311,7 +2306,8 @@ def test_strftime():
         formats.extend(["%c", "%x", "%X"])
 
     for timezone in timezones:
-        ts = pd.to_datetime(times).tz_localize(timezone)
+        ts = pd.to_datetime(times  # type: ignore[reportArgumentType]
+                            ).tz_localize(timezone)
         for unit in ["s", "ms", "us", "ns"]:
             tsa = pa.array(ts, type=pa.timestamp(unit, timezone))
             for fmt in formats:
@@ -2358,7 +2354,7 @@ def test_strftime():
 
     # Test timestamps without timezone
     fmt = "%Y-%m-%dT%H:%M:%S"
-    ts = pd.to_datetime(times)
+    ts = pd.to_datetime(times)  # type: ignore[reportArgumentType]
     tsa = pa.array(ts, type=pa.timestamp("s"))
     result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
     expected = pa.array(ts.strftime(fmt)).cast(result.type)
@@ -2377,6 +2373,7 @@ def test_strftime():
 
 def _check_datetime_components(timestamps, timezone=None):
     from pyarrow.vendored.version import Version
+    import pandas as pd
 
     ts = pd.to_datetime(timestamps).tz_localize(
         "UTC").tz_convert(timezone).to_series()
@@ -2392,9 +2389,15 @@ def _check_datetime_components(timestamps, timezone=None):
 
     if Version(pd.__version__) < Version("1.1.0"):
         # https://github.com/pandas-dev/pandas/issues/33206
-        iso_year = ts.map(lambda x: x.isocalendar()[0]).astype("int64")
-        iso_week = ts.map(lambda x: x.isocalendar()[1]).astype("int64")
-        iso_day = ts.map(lambda x: x.isocalendar()[2]).astype("int64")
+        iso_year = ts.map(
+            lambda x: x.isocalendar()[0]  # type: ignore[reportUnknownLambdaType]
+        ).astype("int64")
+        iso_week = ts.map(
+            lambda x: x.isocalendar()[1]  # type: ignore[reportUnknownLambdaType]
+        ).astype("int64")
+        iso_day = ts.map(
+            lambda x: x.isocalendar()[2]  # type: ignore[reportUnknownLambdaType]
+        ).astype("int64")
     else:
         # Casting is required because pandas isocalendar returns int32
         # while arrow isocalendar returns int64.
@@ -2444,7 +2447,8 @@ def _check_datetime_components(timestamps, timezone=None):
             # datetime with utc returns None for dst()
             is_dst = [False] * len(ts)
         else:
-            is_dst = ts.apply(lambda x: x.dst().seconds > 0)
+            is_dst = ts.apply(
+                lambda x: x.dst().seconds > 0)  # type: ignore[reportUnknownLambdaType]
         assert pc.is_dst(tsa).equals(pa.array(is_dst))
 
     day_of_week_options = pc.DayOfWeekOptions(
@@ -2505,6 +2509,9 @@ def test_iso_calendar_longer_array(unit):
 @pytest.mark.pandas
 @pytest.mark.timezone_data
 def test_assume_timezone():
+    import numpy as np
+    import pandas as pd
+
     ts_type = pa.timestamp("ns")
     timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789",
                                  "2000-02-29T23:23:23.999999999",
@@ -2529,9 +2536,9 @@ def test_assume_timezone():
     ambiguous_array = pa.array(ambiguous, type=ts_type)
     nonexistent_array = pa.array(nonexistent, type=ts_type)
 
+    ta = pa.array(timestamps, type=ts_type)
     for timezone in ["UTC", "America/Chicago", "Asia/Kolkata"]:
         options = pc.AssumeTimezoneOptions(timezone)
-        ta = pa.array(timestamps, type=ts_type)
         expected = timestamps.tz_localize(timezone)
         result = pc.assume_timezone(ta, options=options)
         assert result.equals(pa.array(expected))
@@ -2540,7 +2547,8 @@ def test_assume_timezone():
 
         ta_zoned = pa.array(timestamps, type=pa.timestamp("ns", timezone))
         with pytest.raises(pa.ArrowInvalid, match="already have a timezone:"):
-            pc.assume_timezone(ta_zoned, options=options)
+            pc.assume_timezone(
+                ta_zoned, options=options)  # type: ignore[reportArgumentType]
 
     invalid_options = pc.AssumeTimezoneOptions("Europe/Brusselsss")
     with pytest.raises(ValueError, match="not found in timezone database"):
@@ -2583,18 +2591,22 @@ def test_assume_timezone():
                              f"timezone '{timezone}'"):
         pc.assume_timezone(ambiguous_array, options=options_ambiguous_raise)
 
-    expected = ambiguous.tz_localize(timezone, ambiguous=[True, True, True])
+    expected = ambiguous.tz_localize(timezone, ambiguous=np.array([True, True, True]))
     result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_earliest)
     result.equals(pa.array(expected))
 
-    expected = ambiguous.tz_localize(timezone, ambiguous=[False, False, False])
+    expected = ambiguous.tz_localize(
+        timezone, ambiguous=np.array([False, False, False]))
     result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_latest)
     result.equals(pa.array(expected))
 
 
 def _check_temporal_rounding(ts, values, unit):
+    import numpy as np
+    import pandas as pd
+
     unit_shorthand = {
         "nanosecond": "ns",
         "microsecond": "us",
@@ -2638,7 +2650,7 @@ def _check_temporal_rounding(ts, values, unit):
                 value, unit, calendar_based_origin=True)
             origin = ts.dt.floor(greater_unit[unit])
 
-            if ta.type.tz is None:
+            if not hasattr(ta.type, "tz"):
                 result = pc.ceil_temporal(ta, options=options).to_pandas()
                 expected = (ts - origin).dt.ceil(frequency) + origin
                 np.testing.assert_array_equal(result, expected)
@@ -2669,16 +2681,20 @@ def _check_temporal_rounding(ts, values, unit):
     # to regular ceiled timestamp if it is equal to the original timestamp.
     # This does not work if timestamp is zoned since our logic will not
     # account for DST jumps.
-    if ta.type.tz is None:
+    if not hasattr(ta.type, "tz"):
         options = pc.RoundTemporalOptions(
-            value, unit, ceil_is_strictly_greater=True)
+            value,  # type: ignore[reportPossiblyUnboundVariable]
+            ceil_is_strictly_greater=True,
+            unit=unit)  # type: ignore[reportPossiblyUnboundVariable]
         result = pc.ceil_temporal(ta, options=options)
-        expected = ts.dt.ceil(frequency)
+        expected = ts.dt.ceil(frequency)  # type: ignore[reportPossiblyUnboundVariable]
 
         expected = np.where(
             expected == ts,
-            expected + pd.Timedelta(value, unit_shorthand[unit]),
-            expected)
+            expected + pd.Timedelta(
+                value,  # type: ignore[reportPossiblyUnboundVariable]
+                unit=unit_shorthand[unit]), expected  # type: ignore[reportArgumentType]
+        )
         np.testing.assert_array_equal(result, expected)
 
     # Check RoundTemporalOptions defaults
@@ -2703,8 +2719,10 @@ def _check_temporal_rounding(ts, values, unit):
                                   "second", "minute", "hour", "day"))
 @pytest.mark.pandas
 def test_round_temporal(unit):
+    import pandas as pd
+
     values = (1, 2, 3, 4, 5, 6, 7, 10, 15, 24, 60, 250, 500, 750)
-    timestamps = [
+    timestamps = pd.Series([
         "1923-07-07 08:52:35.203790336",
         "1931-03-17 10:45:00.641559040",
         "1932-06-16 01:16:42.911994368",
@@ -2717,7 +2735,7 @@ def test_round_temporal(unit):
         "1982-01-21 18:43:44.517366784",
         "1992-01-01 00:00:00.100000000",
         "1999-12-04 05:55:34.794991104",
-        "2026-10-26 08:39:00.316686848"]
+        "2026-10-26 08:39:00.316686848"])
     ts = pd.Series([pd.Timestamp(x, unit="ns") for x in timestamps])
     _check_temporal_rounding(ts, values, unit)
 
@@ -2739,7 +2757,7 @@ def test_count():
 
     with pytest.raises(ValueError,
                        match='"something else" is not a valid count mode'):
-        pc.count(arr, 'something else')
+        pc.count(arr, 'something else')  # type: ignore[invalid-argument-type]
 
 
 def test_index():
@@ -2789,7 +2807,7 @@ def test_partition_nth():
     with pytest.raises(
             ValueError,
             match="'partition_nth_indices' cannot be called without options"):
-        pc.partition_nth_indices(data)
+        pc.partition_nth_indices(data)  # type: ignore[no-matching-overload]
 
 
 def test_partition_nth_null_placement():
@@ -2816,10 +2834,13 @@ def validate_select_k(select_k_indices, arr, order, stable_sort=False):
 
     arr = pa.array([1, 2, None, 0])
     for k in [0, 2, 4]:
-        for order in ["descending", "ascending"]:
-            result = pc.select_k_unstable(
-                arr, k=k, sort_keys=[("dummy", order)])
-            validate_select_k(result, arr, order)
+        result = pc.select_k_unstable(
+            arr, k=k, sort_keys=[("dummy", "ascending")])
+        validate_select_k(result, arr, "ascending")
+
+        result = pc.select_k_unstable(
+            arr, k=k, sort_keys=[("dummy", "descending")])
+        validate_select_k(result, arr, "descending")
 
         result = pc.top_k_unstable(arr, k=k)
         validate_select_k(result, arr, "descending")
@@ -2876,7 +2897,7 @@ def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
     with pytest.raises(
             ValueError,
             match="'select_k_unstable' cannot be called without options"):
-        pc.select_k_unstable(table)
+        pc.select_k_unstable(table)  # type: ignore[no-matching-overload]
 
     with pytest.raises(ValueError,
                        match="select_k_unstable requires a nonnegative `k`"):
@@ -2885,14 +2906,19 @@ def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
     with pytest.raises(ValueError,
                        match="select_k_unstable requires a "
                              "non-empty `sort_keys`"):
-        pc.select_k_unstable(table, k=2, sort_keys=[])
+        pc.select_k_unstable(table, sort_keys=[],
+                             k=2  # type: ignore[reportPossiblyUnboundVariable]
+                             )
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.select_k_unstable(table, k=k, sort_keys=[("a", "nonscending")])
+        pc.select_k_unstable(
+            table, k=k,  # type: ignore[reportPossiblyUnboundVariable]
+            sort_keys=[("a", "nonscending")])  # type: ignore[reportArgumentType]
 
     with pytest.raises(ValueError,
                        match="Invalid sort key column: No match for.*unknown"):
-        pc.select_k_unstable(table, k=k, sort_keys=[("unknown", "ascending")])
+        pc.select_k_unstable(table, k=k,  # type: ignore[reportPossiblyUnboundVariable]
+                             sort_keys=[("unknown", "ascending")])
 
 
 def test_array_sort_indices():
@@ -2911,7 +2937,9 @@ def test_array_sort_indices():
     assert result.to_pylist() == [2, 1, 0, 3]
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.array_sort_indices(arr, order="nonscending")
+        pc.array_sort_indices(arr,
+                              order="nonscending"  # type: ignore[reportArgumentType]
+                              )
 
 
 def test_sort_indices_array():
@@ -2967,14 +2995,19 @@ def test_sort_indices_table():
     assert result.to_pylist() == [2, 1, 0, 3]
 
     with pytest.raises(ValueError, match="Must specify one or more sort keys"):
-        pc.sort_indices(table)
+        pc.sort_indices(table)  # type: ignore[reportArgumentType]
 
     with pytest.raises(ValueError,
                        match="Invalid sort key column: No match for.*unknown"):
-        pc.sort_indices(table, sort_keys=[("unknown", "ascending")])
+        pc.sort_indices(
+            table,
+            sort_keys=[("unknown", "ascending")]  # type: ignore[reportArgumentType]
+        )
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.sort_indices(table, sort_keys=[("a", "nonscending")])
+        pc.sort_indices(
+            table, sort_keys=[("a", "nonscending")]  # type: ignore[reportArgumentType]
+        )
 
 
 def test_is_in():
@@ -3052,9 +3085,9 @@ def test_quantile():
     assert result.to_pylist() == [1.25, 1.5, 1.75]
 
     with pytest.raises(ValueError, match="Quantile must be between 0 and 1"):
-        pc.quantile(arr, q=1.1)
+        pc.quantile(arr, q=1.1)  # type: ignore[invalid-argument-type]
     with pytest.raises(ValueError, match="not a valid quantile interpolation"):
-        pc.quantile(arr, interpolation='zzz')
+        pc.quantile(arr, interpolation='zzz')  # type: ignore[invalid-argument-type]
 
 
 def test_tdigest():
@@ -3120,6 +3153,8 @@ def test_min_max_element_wise():
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_sum(start, skip_nulls):
+    import numpy as np
+
     # Exact tests (e.g., integral types)
     start_int = int(start)
     starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
@@ -3168,13 +3203,15 @@ def test_cumulative_sum(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_sum([1, 2, 3], start=strt)
+            pc.cumulative_sum([1, 2, 3], start=strt)  # type: ignore[reportArgumentType]
 
 
 @pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_prod(start, skip_nulls):
+    import numpy as np
+
     # Exact tests (e.g., integral types)
     start_int = int(start)
     starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
@@ -3223,13 +3260,17 @@ def test_cumulative_prod(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_prod([1, 2, 3], start=strt)
+            pc.cumulative_prod(
+                [1, 2, 3], start=strt  # type: ignore[reportArgumentType]
+            )
 
 
 @pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_max(start, skip_nulls):
+    import numpy as np
+
     # Exact tests (e.g., integral types)
     start_int = int(start)
     starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
@@ -3281,13 +3322,15 @@ def test_cumulative_max(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_max([1, 2, 3], start=strt)
+            pc.cumulative_max([1, 2, 3], start=strt)  # type: ignore[reportArgumentType]
 
 
 @pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_min(start, skip_nulls):
+    import numpy as np
+
     # Exact tests (e.g., integral types)
     start_int = int(start)
     starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
@@ -3335,11 +3378,12 @@ def test_cumulative_min(start, skip_nulls):
                 expected_arrays[i], strt if strt is not None else 1e9,
                 skip_nulls=False)
             np.testing.assert_array_almost_equal(result.to_numpy(
+                # type: ignore[reportAttributeAccessIssue]
                 zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_max([1, 2, 3], start=strt)
+            pc.cumulative_max([1, 2, 3], start=strt)  # type: ignore[reportArgumentType]
 
 
 def test_make_struct():
@@ -3431,12 +3475,12 @@ def test_list_element():
     lists = pa.array([l1, l2], list_type)
 
     index = 1
-    result = pa.compute.list_element(lists, index)
+    result = pc.list_element(lists, index)
     expected = pa.array([None, {'a': 0.52, 'b': 3}], element_type)
     assert result.equals(expected)
 
     index = 4
-    result = pa.compute.list_element(lists, index)
+    result = pc.list_element(lists, index)
     expected = pa.array([{'a': 5.6, 'b': 6}, {'a': .6, 'b': 8}], element_type)
     assert result.equals(expected)
 
@@ -3475,7 +3519,7 @@ def test_random():
             pa.array([], type=pa.float64())
 
     # System random initialization => outputs all distinct
-    arrays = [tuple(pc.random(100).to_pylist()) for i in range(10)]
+    arrays = [tuple(pc.random(100).to_pylist()) for _ in range(10)]
     assert len(set(arrays)) == len(arrays)
 
     arrays = [tuple(pc.random(100, initializer=i % 7).to_pylist())
@@ -3484,15 +3528,14 @@ def test_random():
 
     # Arbitrary hashable objects can be given as initializer
     initializers = [object(), (4, 5, 6), "foo"]
-    initializers.extend(os.urandom(10) for i in range(10))
-    arrays = [tuple(pc.random(100, initializer=i).to_pylist())
-              for i in initializers]
+    initializers.extend(os.urandom(10) for _ in range(10))
+    arrays = [tuple(pc.random(100, initializer=i).to_pylist()) for i in initializers]
     assert len(set(arrays)) == len(arrays)
 
     with pytest.raises(TypeError,
                        match=r"initializer should be 'system', an integer, "
                              r"or a hashable object; got \[\]"):
-        pc.random(100, initializer=[])
+        pc.random(100, initializer=[])  # type: ignore[invalid-argument-type]
 
 
 @pytest.mark.parametrize(
@@ -3542,7 +3585,7 @@ def test_rank_options():
                        match=r'"NonExisting" is not a valid tiebreaker'):
         pc.RankOptions(sort_keys="descending",
                        null_placement="at_end",
-                       tiebreaker="NonExisting")
+                       tiebreaker="NonExisting")  # type: ignore[invalid-argument-type]
 
 
 def test_rank_quantile_options():
@@ -3572,7 +3615,7 @@ def test_rank_quantile_options():
     assert result.equals(expected_descending)
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.rank_quantile(arr, sort_keys="XXX")
+        pc.rank_quantile(arr, sort_keys="XXX")  # type: ignore[reportArgumentType]
 
 
 def test_rank_normal_options():
@@ -3600,6 +3643,8 @@ def test_rank_normal_options():
 
 
 def create_sample_expressions():
+    import numpy as np
+
     # We need a schema for substrait conversion
     schema = pa.schema([pa.field("i64", pa.int64()), pa.field(
         "foo", pa.struct([pa.field("bar", pa.string())]))])
@@ -3614,7 +3659,7 @@ def create_sample_expressions():
     e = pc.scalar(None)
     f = pc.scalar({'a': 1})
     g = pc.scalar(pa.scalar(1))
-    h = pc.scalar(np.int64(2))
+    h = pc.scalar(np.int64(2))  # type: ignore[reportOptionalMemberAccess]
     j = pc.scalar(False)
     k = pc.scalar(0)
 
@@ -3689,20 +3734,22 @@ def test_expression_serialization_arrow(pickle_module):
 def test_expression_serialization_substrait():
 
     exprs = create_sample_expressions()
-    schema = exprs["schema"]
+    schema = pa.schema(exprs["schema"])  # type: ignore[reportAttributeAccessIssue]
 
     # Basic literals don't change on binding and so they will round
     # trip without any change
-    for expr in exprs["literals"]:
-        serialized = expr.to_substrait(schema)
+    for expr in exprs["literals"]:  # type: ignore[reportAttributeAccessIssue]
+        serialized = \
+            expr.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
         deserialized = pc.Expression.from_substrait(serialized)
-        assert expr.equals(deserialized)
+        assert expr.equals(deserialized)  # type: ignore[reportAttributeAccessIssue]
 
     # Expressions are bound when they get serialized.  Since bound
     # expressions are not equal to their unbound variants we cannot
     # compare the round tripped with the original
-    for expr in exprs["calls"]:
-        serialized = expr.to_substrait(schema)
+    for expr in exprs["calls"]:  # type: ignore[reportAttributeAccessIssue]
+        serialized = \
+            expr.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
         deserialized = pc.Expression.from_substrait(serialized)
         # We can't compare the expressions themselves because of the bound
         # unbound difference. But we can compare the string representation
@@ -3712,7 +3759,8 @@ def test_expression_serialization_substrait():
         assert deserialized.equals(deserialized_again)
 
     for expr, expr_norm in zip(exprs["refs"], exprs["numeric_refs"]):
-        serialized = expr.to_substrait(schema)
+        serialized = \
+            expr.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
         deserialized = pc.Expression.from_substrait(serialized)
         assert str(deserialized) == str(expr_norm)
         serialized_again = deserialized.to_substrait(schema)
@@ -3722,15 +3770,16 @@ def test_expression_serialization_substrait():
     # For the special cases we get various wrinkles in serialization but we
     # should always get the same thing from round tripping twice
     for expr in exprs["special"]:
-        serialized = expr.to_substrait(schema)
+        serialized = \
+            expr.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
         deserialized = pc.Expression.from_substrait(serialized)
         serialized_again = deserialized.to_substrait(schema)
         deserialized_again = pc.Expression.from_substrait(serialized_again)
         assert deserialized.equals(deserialized_again)
 
     # Special case, we lose the field names of struct literals
-    f = exprs["special"][0]
-    serialized = f.to_substrait(schema)
+    f = exprs["special"][0]  # type: ignore[reportAttributeAccessIssue]
+    serialized = f.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
     deserialized = pc.Expression.from_substrait(serialized)
     assert deserialized.equals(pc.scalar({'': 1}))
 
@@ -3758,10 +3807,10 @@ def test_expression_construction():
     nested_field = pc.field(("nested", "field"))
     nested_field2 = pc.field("nested", "field")
 
-    zero | one == string
-    ~true == false
+    _ = zero | one == string
+    _ = ~true == false
     for typ in ("bool", pa.bool_()):
-        field.cast(typ) == true
+        _ = field.cast(typ) == true
 
     field.isin([1, 2])
     nested_mixed_types.isin(["foo", "bar"])
@@ -3769,10 +3818,10 @@ def test_expression_construction():
     nested_field2.isin(["foo", "bar"])
 
     with pytest.raises(TypeError):
-        field.isin(1)
+        field.isin(1)  # type: ignore[invalid-argument-type]
 
     with pytest.raises(pa.ArrowInvalid):
-        field != object()
+        _ = field != object()
 
 
 def test_expression_boolean_operators():
@@ -3781,16 +3830,16 @@ def test_expression_boolean_operators():
     false = pc.scalar(False)
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
-        true and false
+        _ = true and false
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
-        true or false
+        _ = true or false
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
         bool(true)
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
-        not true
+        _ = not true
 
 
 def test_expression_call_function():
@@ -3812,14 +3861,14 @@ def test_expression_call_function():
     # Invalid pc.scalar input gives original error message
     msg = "only other expressions allowed as arguments"
     with pytest.raises(TypeError, match=msg):
-        pc.add(field, object)
+        pc.add(field, object)  # type: ignore[reportArgumentType]
 
 
 def test_cast_table_raises():
     table = pa.table({'a': [1, 2]})
 
-    with pytest.raises(pa.lib.ArrowTypeError):
-        pc.cast(table, pa.int64())
+    with pytest.raises(ArrowTypeError):
+        pc.cast(table, pa.int64())  # type: ignore[reportArgumentType]
 
 
 @pytest.mark.parametrize("start,stop,expected", (
@@ -3966,31 +4015,31 @@ def test_run_end_encode(value_type, option):
 def test_pairwise_diff():
     arr = pa.array([1, 2, 3, None, 4, 5])
     expected = pa.array([None, 1, 1, None, None, 1])
-    result = pa.compute.pairwise_diff(arr, period=1)
+    result = pc.pairwise_diff(arr, period=1)
     assert result.equals(expected)
 
     arr = pa.array([1, 2, 3, None, 4, 5])
     expected = pa.array([None, None, 2, None, 1, None])
-    result = pa.compute.pairwise_diff(arr, period=2)
+    result = pc.pairwise_diff(arr, period=2)
     assert result.equals(expected)
 
     # negative period
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.int8())
     expected = pa.array([-1, -1, None, None, -1, None], type=pa.int8())
-    result = pa.compute.pairwise_diff(arr, period=-1)
+    result = pc.pairwise_diff(arr, period=-1)
     assert result.equals(expected)
 
     # wrap around overflow
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
     expected = pa.array([255, 255, None, None, 255, None], type=pa.uint8())
-    result = pa.compute.pairwise_diff(arr, period=-1)
+    result = pc.pairwise_diff(arr, period=-1)
     assert result.equals(expected)
 
     # fail on overflow
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
     with pytest.raises(pa.ArrowInvalid,
                        match="overflow"):
-        pa.compute.pairwise_diff_checked(arr, period=-1)
+        pc.pairwise_diff_checked(arr, period=-1)
 
 
 def test_pivot_wider():
diff --git a/python/pyproject.toml b/python/pyproject.toml
index fac3b25c554..598ddf7a75b 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -91,3 +91,16 @@ version_file = 'pyarrow/_generated_version.py'
 version_scheme = 'guess-next-dev'
 git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
 fallback_version = '22.0.0a0'
+
+[tool.pyright]
+typeCheckingMode           = "strict"
+reportMissingImports       = false
+reportPrivateUsage         = false
+reportUnknownParameterType = false
+reportMissingTypeArgument  = false
+reportMissingParameterType = false
+reportMissingTypeStubs     = false
+reportUnknownVariableType  = false
+reportUnknownArgumentType  = false
+reportUnknownMemberType    = false
+include = ["pyarrow/tests/test_compute.py"]

From 0fd9ee16d4c8a1ca9b064106e617f6d764c3690e Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 21 Aug 2025 19:39:56 +0200
Subject: [PATCH 03/26] Fix pyarrow-stubs

---
 python/pyarrow/__lib_pxi/array.pyi  |  19 +-
 python/pyarrow/__lib_pxi/scalar.pyi |  28 +-
 python/pyarrow/__lib_pxi/table.pyi  |  18 +-
 python/pyarrow/__lib_pxi/types.pyi  |   8 +-
 python/pyarrow/_compute.pyi         |  40 +-
 python/pyarrow/compute.pyi          | 807 +++++++++++++++++++++++-----
 6 files changed, 758 insertions(+), 162 deletions(-)

diff --git a/python/pyarrow/__lib_pxi/array.pyi b/python/pyarrow/__lib_pxi/array.pyi
index ec1cda30a88..9283f57b69f 100644
--- a/python/pyarrow/__lib_pxi/array.pyi
+++ b/python/pyarrow/__lib_pxi/array.pyi
@@ -14,6 +14,7 @@ from typing import (
     Iterable,
     Iterator,
     Literal,
+    LiteralString,
     TypeVar,
     overload,
 )
@@ -49,6 +50,7 @@ from .types import (
     DataType,
     Field,
     MapType,
+    ListType,
     _AsPyType,
     _BasicDataType,
     _BasicValueT,
@@ -1944,7 +1946,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
             or if each element should be on its own line.
         """
     format = to_string
-    def equals(self, other: Self) -> bool: ...
+    def equals(self, other: Self | Iterable[Any]) -> bool: ...
     def __len__(self) -> int: ...
     def is_null(self, *, nan_is_null: bool = False) -> BooleanArray:
         """
@@ -1972,7 +1974,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         Return BooleanArray indicating the non-null values.
         """
     def fill_null(
-        self: Array[Scalar[_BasicDataType[_AsPyType]]], fill_value: _AsPyType
+        self: Array[Scalar[_BasicDataType[_AsPyType]]] | Array[Scalar[_DataTypeT]], fill_value: Scalar[_DataTypeT] | _AsPyType | str | None
     ) -> Array[Scalar[_BasicDataType[_AsPyType]]]:
         """
         See :func:`pyarrow.compute.fill_null` for usage.
@@ -2078,7 +2080,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
     @overload
     def index(
         self: Array[Scalar[_BasicDataType[_AsPyType]]],
-        value: _AsPyType,
+        value: _AsPyType | None,
         start: int | None = None,
         end: int | None = None,
         *,
@@ -2152,7 +2154,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         array : numpy.ndarray
         """
     def to_pylist(
-        self: Array[Scalar[_BasicDataType[_AsPyType]]],
+        self: Array[Scalar[_BasicDataType[_AsPyType]]] | Array[Scalar[ListType[Any]]] | StructArray | DictionaryArray[Unknown, Unknown],
         *,
         map_as_pydicts: Literal["lossy", "strict"] | None = None,
     ) -> list[_AsPyType | None]:
@@ -3209,7 +3211,7 @@ class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _S
         cls,
         values: Array[Scalar[_DataTypeT]],
         *,
-        type: None = None,
+        type: types.FixedSizeListType[_DataTypeT, Literal[int]] | None = None,
         mask: Mask | None = None,
     ) -> FixedSizeListArray[_DataTypeT, None]: ...
     @overload
@@ -3661,7 +3663,7 @@ class DictionaryArray(Array[scalar.DictionaryScalar[_IndexT, _BasicValueT]]):
     @staticmethod
     def from_arrays(
         indices: Indices,
-        dictionary: Array | np.ndarray | pd.Series,
+        dictionary: Array | np.ndarray | pd.Series | list[Any],
         mask: np.ndarray | pd.Series | BooleanArray | None = None,
         ordered: bool = False,
         from_pandas: bool = False,
@@ -3724,8 +3726,8 @@ class StructArray(Array[scalar.StructScalar]):
         """
     @staticmethod
     def from_arrays(
-        arrays: Iterable[Array],
-        names: list[str] | None = None,
+        arrays: Iterable[Array] | list[list[Any]],
+        names: list[str] | list[LiteralString] | None = None,
         fields: list[Field] | None = None,
         mask=None,
         memory_pool: MemoryPool | None = None,
@@ -4217,6 +4219,7 @@ __all__ = [
     "repeat",
     "infer_type",
     "_PandasConvertible",
+    "_CastAs",
     "Array",
     "NullArray",
     "BooleanArray",
diff --git a/python/pyarrow/__lib_pxi/scalar.pyi b/python/pyarrow/__lib_pxi/scalar.pyi
index 81ab5012067..77368bb264b 100644
--- a/python/pyarrow/__lib_pxi/scalar.pyi
+++ b/python/pyarrow/__lib_pxi/scalar.pyi
@@ -297,6 +297,7 @@ class StructScalar(Scalar[types.StructType], collections.abc.Mapping[str, Scalar
     def __iter__(self) -> Iterator[str]: ...
     def __getitem__(self, __key: str) -> Scalar[Any]: ...  # type: ignore[override]
     def _as_py_tuple(self) -> list[tuple[str, Any]]: ...
+    def tolist(self) -> list[Any]: ...
 
 class MapScalar(Scalar[types.MapType[types._K, types._ValueT]]):
     @property
@@ -573,7 +574,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.BoolType,
+    type: types.BoolType | Literal["bool"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -581,7 +582,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.UInt8Type,
+    type: types.UInt8Type | Literal["uint8"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -589,7 +590,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.Int8Type,
+    type: types.Int8Type | Literal["int8"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -597,7 +598,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.UInt16Type,
+    type: types.UInt16Type | Literal["uint16"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -605,7 +606,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.Int16Type,
+    type: types.Int16Type | Literal["int16"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -613,7 +614,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.Uint32Type,
+    type: types.Uint32Type | Literal["uint32"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -621,7 +622,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.Int32Type,
+    type: types.Int32Type | Literal["int32"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -629,7 +630,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.UInt64Type,
+    type: types.UInt64Type | Literal["uint64"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -637,7 +638,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.Int64Type,
+    type: types.Int64Type | Literal["int64"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -645,7 +646,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.Float16Type,
+    type: types.Float16Type | Literal["f16"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -653,7 +654,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.Float32Type,
+    type: types.Float32Type | Literal["f32"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -661,7 +662,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.Float64Type,
+    type: types.Float64Type | Literal["f64"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -693,7 +694,7 @@ def scalar(
 @overload
 def scalar(
     value: Any,
-    type: types.StringType,
+    type: types.StringType | Literal["string"],
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
@@ -1014,4 +1015,5 @@ __all__ = [
     "JsonScalar",
     "OpaqueScalar",
     "scalar",
+    "NullableCollection",
 ]
diff --git a/python/pyarrow/__lib_pxi/table.pyi b/python/pyarrow/__lib_pxi/table.pyi
index ffba4262e8c..34960e2b903 100644
--- a/python/pyarrow/__lib_pxi/table.pyi
+++ b/python/pyarrow/__lib_pxi/table.pyi
@@ -53,11 +53,11 @@ from pyarrow.lib import Device, MemoryManager, MemoryPool, MonthDayNano, Schema
 from pyarrow.lib import Field as _Field
 
 from . import array, scalar, types
-from .array import Array, NullableCollection, StructArray, _CastAs, _PandasConvertible
+from .array import Array, StructArray, _CastAs, _PandasConvertible
 from .device import DeviceAllocationType
 from .io import Buffer
 from .ipc import RecordBatchReader
-from .scalar import Int64Scalar, Scalar
+from .scalar import Int64Scalar, Scalar, NullableCollection
 from .tensor import Tensor
 from .types import DataType, _AsPyType, _BasicDataType, _DataTypeT
 
@@ -389,7 +389,7 @@ class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
           ]
         ]
         """
-    def fill_null(self, fill_value: Scalar[_DataTypeT]) -> Self:
+    def fill_null(self, fill_value: Scalar[_DataTypeT] | _AsPyType | str | None) -> Self:
         """
         Replace each null element in values with fill_value.
 
@@ -423,7 +423,7 @@ class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
           ]
         ]
         """
-    def equals(self, other: Self) -> bool:
+    def equals(self, other: Self | Array[Any] | Iterable[Any]) -> bool:
         """
         Return whether the contents of two chunked arrays are equal.
 
@@ -1522,6 +1522,11 @@ def chunked_array(
     type: None = None,
 ) -> ChunkedArray[scalar.ListScalar[Any]]: ...
 @overload
+def chunked_array(
+    values: Iterable[NullableCollection[types.Decimal128Type[Any, Any]]],
+    type: types.Decimal128Type,
+) -> ChunkedArray[types.Decimal128Type]: ...
+@overload
 def chunked_array(
     values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
     type: Literal["null"] | types.NullType,
@@ -5083,8 +5088,9 @@ class Table(_Tabular[ChunkedArray[Any]]):
         """
 
 def record_batch(
-    data: dict[str, list[Any] | Array[Any]]
-    | Collection[Array[Any]]
+    data: list[ArrayOrChunkedArray[Any]]
+    | dict[str, list[Any] | Array[Any]]
+    | Iterable[Array[Any]]
     | pd.DataFrame
     | SupportArrowArray
     | SupportArrowDeviceArray,
diff --git a/python/pyarrow/__lib_pxi/types.pyi b/python/pyarrow/__lib_pxi/types.pyi
index 7fe6c36e332..5cac864c3cc 100644
--- a/python/pyarrow/__lib_pxi/types.pyi
+++ b/python/pyarrow/__lib_pxi/types.pyi
@@ -1532,7 +1532,7 @@ class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]):
     **kwargs : optional
         additional key-value metadata
     """
-    def __init__(self, __arg0__: Mapping[bytes, bytes] | None = None, **kwargs) -> None: ...
+    def __init__(self, __arg0__: Mapping[bytes, bytes] | Mapping[str, str] | None = None, **kwargs) -> None: ...
     def equals(self, other: KeyValueMetadata) -> bool: ...
     def __len__(self) -> int: ...
     def __contains__(self, __key: object) -> bool: ...
@@ -2771,9 +2771,9 @@ def string_to_tzinfo(name: str) -> dt.tzinfo:
     """
 
 @overload
-def timestamp(unit: _Unit) -> TimestampType[_Unit, _Tz]: ...
+def timestamp(unit: _Unit | str) -> TimestampType[_Unit, _Tz]: ...
 @overload
-def timestamp(unit: _Unit, tz: _Tz) -> TimestampType[_Unit, _Tz]: ...
+def timestamp(unit: _Unit | str, tz: _Tz) -> TimestampType[_Unit, _Tz]: ...
 def timestamp(*args, **kwargs):
     """
     Create instance of timestamp type with resolution and optional time zone.
@@ -4290,7 +4290,9 @@ def is_float_value(obj: Any) -> bool:
 __all__ = [
     "_Weakrefable",
     "_Metadata",
+    "_AsPyType",
     "DataType",
+    "_DataTypeT",
     "_BasicDataType",
     "NullType",
     "BoolType",
diff --git a/python/pyarrow/_compute.pyi b/python/pyarrow/_compute.pyi
index 3d61ae42787..61ccb233feb 100644
--- a/python/pyarrow/_compute.pyi
+++ b/python/pyarrow/_compute.pyi
@@ -10,6 +10,7 @@ from typing import (
 )
 
 from . import lib
+from .compute import _NumericScalarT
 
 _Order: TypeAlias = Literal["ascending", "descending"]
 _Placement: TypeAlias = Literal["at_start", "at_end"]
@@ -75,6 +76,11 @@ class Function(lib._Weakrefable):
         """
         The number of kernels implementing this function.
         """
+    @property
+    def kernels(self) -> list[ScalarKernel]:
+        """
+        A list of all kernels implementing this function.
+        """
     def call(
         self,
         args: Iterable,
@@ -307,7 +313,7 @@ class RunEndEncodeOptions(FunctionOptions):
         Accepted values are pyarrow.{int16(), int32(), int64()}.
     """
     # TODO: default is DataType(int32)
-    def __init__(self, run_end_type: lib.DataType = ...) -> None: ...
+    def __init__(self, run_end_type: lib.DataType | Literal["int16","int32","int64"] = Literal["int32"]) -> None: ...
 
 class ElementWiseAggregateOptions(FunctionOptions):
     """
@@ -589,7 +595,7 @@ class QuantileOptions(FunctionOptions):
     """
     def __init__(
         self,
-        q: float | Sequence[float],
+        q: float | Sequence[float] = 0.5,
         *,
         interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
         skip_nulls: bool = True,
@@ -859,7 +865,7 @@ class RoundToMultipleOptions(FunctionOptions):
         "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
         "half_to_even", "half_to_odd".
     """
-    def __init__(self, multiple: float = 1.0, round_mode: _RoundMode = "half_to_even") -> None: ...
+    def __init__(self, multiple: int | float | _NumericScalarT = 1.0, round_mode: _RoundMode = "half_to_even") -> None: ...
 
 class ScalarAggregateOptions(FunctionOptions):
     """
@@ -1095,6 +1101,19 @@ class Utf8NormalizeOptions(FunctionOptions):
 
     def __init__(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: ...
 
+class ZeroFillOptions(FunctionOptions):
+    """
+    Options for utf8_zero_fill.
+
+    Parameters
+    ----------
+    width : int
+        Desired string length.
+    padding : str, default "0"
+        Padding character. Should be one Unicode codepoint.
+    """
+    def __init__(self, width: int, padding: str = '0') -> None: ...
+
 class VarianceOptions(FunctionOptions):
     """
     Options for the `variance` and `stddev` functions.
@@ -1584,8 +1603,19 @@ class Expression(lib._Weakrefable):
     ], null_matching_behavior=MATCH})>
     """
 
+    def equals(self, other: Expression | lib.Array | Iterable) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.Expression
+
+        Returns
+        -------
+        bool
+        """
+
     @staticmethod
-    def from_substrait(buffer: bytes | lib.Buffer) -> Expression:
+    def from_substrait(message: bytes | lib.Buffer) -> Expression:
         """
         Deserialize an expression from Substrait
 
@@ -1678,7 +1708,7 @@ class Expression(lib._Weakrefable):
         is_nan : Expression
         """
     def cast(
-        self, type: lib.DataType, safe: bool = True, options: CastOptions | None = None
+        self, type: lib.DataType | Literal["bool"], safe: bool = True, options: CastOptions | None = None
     ) -> Expression:
         """
         Explicitly set or change the expression's data type.
diff --git a/python/pyarrow/compute.pyi b/python/pyarrow/compute.pyi
index 8d8fc35b134..cbbb9b0efcc 100644
--- a/python/pyarrow/compute.pyi
+++ b/python/pyarrow/compute.pyi
@@ -1,6 +1,23 @@
-# ruff: noqa: I001
-from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence, Hashable
 from collections.abc import Callable
+from numpy.typing import NDArray
 
 # Option classes
 from pyarrow._compute import ArraySortOptions as ArraySortOptions
@@ -68,6 +85,7 @@ from pyarrow._compute import TDigestOptions as TDigestOptions
 from pyarrow._compute import TrimOptions as TrimOptions
 from pyarrow._compute import UdfContext as UdfContext
 from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions
+from pyarrow._compute import ZeroFillOptions as ZeroFillOptions
 from pyarrow._compute import VarianceOptions as VarianceOptions
 from pyarrow._compute import VectorFunction as VectorFunction
 from pyarrow._compute import VectorKernel as VectorKernel
@@ -90,11 +108,12 @@ from pyarrow._compute import register_vector_function as register_vector_functio
 from pyarrow._compute import _Order, _Placement
 from pyarrow._stubs_typing import ArrayLike, ScalarLike
 from . import lib
+from _stubs_typing import Indices
 
 _P = ParamSpec("_P")
 _R = TypeVar("_R")
 
-def field(*name_or_index: str | tuple[str, ...] | int) -> Expression:
+def field(*name_or_index: str | bytes | tuple[str | int, ...] | int) -> Expression:
     """Reference a column of the dataset.
 
     Stores only the field's name. Type and other information is known only when
@@ -128,7 +147,7 @@ def field(*name_or_index: str | tuple[str, ...] | int) -> Expression:
     <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
     """
 
-def scalar(value: bool | float | str) -> Expression:
+def scalar(value: bool | int | float | NumericScalar | None | str | dict[bool | float | str, bool | float | str]) -> Expression:
     """Expression representing a scalar value.
 
     Creates an Expression object representing a scalar value that can be used
@@ -166,6 +185,10 @@ _ArrayT = TypeVar("_ArrayT", bound=lib.Array | lib.ChunkedArray)
 _ScalarOrArrayT = TypeVar("_ScalarOrArrayT", bound=lib.Array | lib.Scalar | lib.ChunkedArray)
 ArrayOrChunkedArray: TypeAlias = lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]
 ScalarOrArray: TypeAlias = ArrayOrChunkedArray[_Scalar_CoT] | _Scalar_CoT
+_ZonedTimestampArrayT: TypeAlias = ArrayOrChunkedArray[lib.Scalar[lib.TimestampType[Any, Any]]]
+_ZonelessTimestampArrayT: TypeAlias = ArrayOrChunkedArray[lib.Scalar[lib.TimestampType[Any, None]]]
+_ZonedTimestampScalarT: TypeAlias = lib.Scalar[lib.TimestampType[Any, Any]]
+_ZonelessTimestampScalarT: TypeAlias = lib.Scalar[lib.TimestampType[Any, None]]
 
 SignedIntegerScalar: TypeAlias = (
     lib.Scalar[lib.Int8Type]
@@ -209,6 +232,7 @@ TemporalScalar: TypeAlias = (
     | lib.Time32Scalar[Any]
     | lib.Time64Scalar[Any]
     | lib.TimestampScalar[Any]
+    | lib.TimestampScalar[Any, None]
     | lib.DurationScalar[Any]
     | lib.MonthDayNanoIntervalScalar
 )
@@ -216,9 +240,9 @@ NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
 NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
 
 _NumericOrTemporalScalarT = TypeVar("_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar)
+_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
 NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT]
 _NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray)
-_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
 _NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar)
 NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar]
 _NumericOrDurationArrayT = TypeVar("_NumericOrDurationArrayT", bound=NumericOrDurationArray)
@@ -245,6 +269,9 @@ _TemporalArrayT = TypeVar("_TemporalArrayT", bound=TemporalArray)
 _ListArray: TypeAlias = ArrayOrChunkedArray[_ListScalar[_DataTypeT]]
 _LargeListArray: TypeAlias = ArrayOrChunkedArray[_LargeListScalar[_DataTypeT]]
 ListArray: TypeAlias = ArrayOrChunkedArray[ListScalar[_DataTypeT]]
+_DecimalScalarT = TypeVar("_DecimalScalarT", bound=DecimalScalar)
+DecimalArray: TypeAlias = lib.Array[_DecimalScalarT] | lib.ChunkedArray[_DecimalScalarT]
+_DecimalArrayT = TypeVar("_DecimalArrayT", bound=DecimalArray)
 # =============================== 1. Aggregation ===============================
 
 # ========================= 1.1 functions =========================
@@ -423,12 +450,12 @@ def first(
     """
 
 def first_last(
-    array: lib.Array[Any] | lib.ChunkedArray[Any],
+    array: lib.Array[Any] | lib.ChunkedArray[Any] | Sequence[Any],
     /,
     *,
     skip_nulls: bool = True,
     min_count: int = 1,
-    options: ScalarAggregateOptions | None = None,
+    options: ScalarAggregateOptions | dict[str, Any] | None = None,
     memory_pool: lib.MemoryPool | None = None,
 ) -> lib.StructScalar:
     """
@@ -742,7 +769,7 @@ def product(
 def quantile(
     array: NumericScalar | NumericArray,
     /,
-    q: float = 0.5,
+    q: float | list[float] = 0.5,
     *,
     interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
     skip_nulls: bool = True,
@@ -823,8 +850,64 @@ def stddev(
         If not passed, will allocate memory from the default memory pool.
     """
 
+def skew(
+    array: NumericArray | Sequence[int | None],
+    /,
+    *,
+    skip_nulls: bool = True,
+    biased: bool = True,
+    min_count: int = 0,
+    options: SkewOptions | None = None,
+) -> NumericScalar:
+    """
+    Calculate the skewness of a numeric array
+    Nulls are ignored by default.  If there are not enough non-null values
+    in the array to satisfy `min_count`, null is returned.
+    The behavior of nulls and the `min_count` parameter can be changed.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    biased : bool, default True
+        Whether the calculated value is biased.
+        If False, the value computed includes a correction factor to reduce bias.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : SkewOptions, optional
+        Options for the `skew` and `kurtosis` functions.
+    """
+
+kurtosis = _clone_signature(skew)
+"""
+Calculate the kurtosis of a numeric array
+Nulls are ignored by default. If there are not enough non-null values
+in the array to satisfy `min_count`, null is returned.
+The behavior of nulls and the `min_count` parameter can be changed.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+biased : bool, default True
+    Whether the calculated value is biased.
+    If False, the value computed includes a correction factor to reduce bias.
+min_count : int, default 0
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : SkewOptions, optional
+    Options for the `skew` and `kurtosis` functions.
+"""
+
 def sum(
-    array: _NumericScalarT | NumericArray[_NumericScalarT],
+    array: _NumericScalarT | NumericArray[_NumericScalarT] | _DecimalArrayT,
     /,
     *,
     skip_nulls: bool = True,
@@ -858,7 +941,7 @@ def sum(
 def tdigest(
     array: NumericScalar | NumericArray,
     /,
-    q: float = 0.5,
+    q: float | list[float] = 0.5,
     *,
     delta: int = 100,
     buffer_size: int = 500,
@@ -899,7 +982,7 @@ def tdigest(
     """
 
 def variance(
-    array: NumericScalar | NumericArray,
+    array: NumericScalar | NumericArray | list[int] | list[int | None],
     /,
     *,
     ddof: int = 0,
@@ -1022,6 +1105,113 @@ def bottom_k_unstable(
     ]
     """
 
+def winsorize(
+    values: lib.Array | lib.ChunkedArray,
+	lower_limit: float | None = None,
+	upper_limit: float | None = None,
+	/,
+	*,
+    options: WinsorizeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array:
+    """
+    Apply a winsorization transform to the input array so as to reduce the influence of potential outliers.
+    NaNs and nulls in the input are ignored for the purpose of computing the lower and upper quantiles.
+    The quantile limits can be changed in WinsorizeOptions.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get bottom indices from.
+
+    lower_limit : float, between 0 and 1
+        The quantile below which all values are replaced with the quantile's value.
+		For example, if lower_limit = 0.05, then all values in the lower 5% percentile will be replaced with the 5% percentile value.
+
+    upper_limit : float, between 0 and 1
+		The quantile above which all values are replaced with the quantile’s value.
+		For example, if upper_limit = 0.95, then all values in the upper 95% percentile will be replaced with the 95% percentile value.
+
+    options : pyarrow.compute.WinsorizeOptions, optional
+        Alternative way of passing options.
+
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array of indices
+        Winsorized array
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array([10, 4, 9, 8, 5, 3, 7, 2, 1, 6])
+    >>> pc.winsorize(arr, 0.1, 0.8)
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+     8,
+     4,
+     8,
+     8,
+     5,
+     3,
+     7,
+     2,
+     2,
+     6
+    ]
+    """
+
+def pivot_wider(
+    pivot_keys: lib.Array | lib.ChunkedArray | list[Any],
+	pivot_values: lib.Array | lib.ChunkedArray | list[Any],
+	/,
+	key_names: list[Any] | None = None,
+	*,
+	unexpected_key_behavior: str | None = None,
+	options: PivotWiderOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar:
+    """
+	Pivot values according to a pivot key column.
+
+	Output is a struct with as many fields as PivotWiderOptions.key_names.
+	All output struct fields have the same type as pivot_values. Each pivot
+	key decides in which output field the corresponding pivot value is emitted.
+	If a pivot key doesn’t appear, null is emitted. If more than one non-null
+	value is encountered for a given pivot key, Invalid is raised. The pivot
+	key column can be string, binary or integer. The key_names will be cast
+	to the pivot key column type for matching. Behavior of unexpected pivot
+	keys is controlled by unexpected_key_behavior.
+
+    Parameters
+    ----------
+    pivot_keys : sequence
+        Array, ChunkedArray, list
+    pivot_values : sequence
+		Array, ChunkedArray, list
+    key_names : sequence of str
+        The pivot key names expected in the pivot key column.
+        For each entry in `key_names`, a column with the same name is emitted
+        in the struct output.
+    unexpected_key_behavior : str, default "ignore"
+        The behavior when pivot keys not in `key_names` are encountered.
+        Accepted values are "ignore", "raise".
+        If "ignore", unexpected keys are silently ignored.
+        If "raise", unexpected keys raise a KeyError.
+    options : pyarrow.compute.PivotWiderOptions, optional
+        Alternative way of passing options.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array of indices
+        Pivoted struct array
+    """
+
 # ========================= 2. Element-wise (“scalar”) functions =========================
 
 # ========================= 2.1 Arithmetic =========================
@@ -1076,8 +1266,8 @@ def add(
 ) -> _NumericOrTemporalScalarT: ...
 @overload
 def add(
-    x: _NumericOrTemporalArrayT,
-    y: _NumericOrTemporalArrayT,
+    x: _NumericOrTemporalArrayT | NDArray[Any] | list[lib._AsPyType | None],
+    y: _NumericOrTemporalArrayT | NDArray[Any] | list[lib._AsPyType | None],
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
@@ -1088,27 +1278,27 @@ def add(
 ) -> Expression: ...
 @overload
 def add(
-    x: NumericOrTemporalScalar,
-    y: _NumericOrTemporalArrayT,
+    x: NumericOrTemporalScalar | lib._AsPyType,
+    y: _NumericOrTemporalArrayT | NDArray[Any] | list[lib._AsPyType | None],
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
 ) -> _NumericOrTemporalArrayT: ...
 @overload
 def add(
-    x: _NumericOrTemporalArrayT,
-    y: NumericOrTemporalScalar,
+    x: _NumericOrTemporalArrayT | NDArray[Any] | list[lib._AsPyType | None],
+    y: NumericOrTemporalScalar | lib._AsPyType,
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
 ) -> _NumericOrTemporalArrayT: ...
 @overload
 def add(
-    x: NumericOrTemporalScalar, y: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+    x: NumericOrTemporalScalar | lib._AsPyType, y: Expression, /, *, memory_pool: lib.MemoryPool | None = None
 ) -> Expression: ...
 @overload
 def add(
-    x: Expression, y: NumericOrTemporalScalar, /, *, memory_pool: lib.MemoryPool | None = None
+    x: Expression, y: NumericOrTemporalScalar | lib._AsPyType, /, *, memory_pool: lib.MemoryPool | None = None
 ) -> Expression: ...
 def add(*args, **kwargs):
     """
@@ -1772,7 +1962,7 @@ memory_pool : pyarrow.MemoryPool, optional
 
 @overload
 def round(
-    x: _NumericScalarT,
+    x: _NumericScalarT | int | float,
     /,
     ndigits: int = 0,
     round_mode: Literal[
@@ -1793,7 +1983,7 @@ def round(
 ) -> _NumericScalarT: ...
 @overload
 def round(
-    x: _NumericArrayT,
+    x: _NumericArrayT | Sequence[int | float | None],
     /,
     ndigits: int = 0,
     round_mode: Literal[
@@ -1860,9 +2050,9 @@ def round(*args, **kwargs):
 
 @overload
 def round_to_multiple(
-    x: _NumericScalarT,
+    x: int | float | _NumericScalarT,
     /,
-    multiple: int = 0,
+    multiple: int | float | _NumericScalarT = 0,
     round_mode: Literal[
         "down",
         "up",
@@ -1881,9 +2071,9 @@ def round_to_multiple(
 ) -> _NumericScalarT: ...
 @overload
 def round_to_multiple(
-    x: _NumericArrayT,
+    x: _NumericArrayT | Sequence[int | float | None],
     /,
-    multiple: int = 0,
+    multiple: int | float | _NumericScalarT = 0,
     round_mode: Literal[
         "down",
         "up",
@@ -1904,7 +2094,7 @@ def round_to_multiple(
 def round_to_multiple(
     x: Expression,
     /,
-    multiple: int = 0,
+    multiple: int | float | _NumericScalarT = 0,
     round_mode: Literal[
         "down",
         "up",
@@ -1949,7 +2139,7 @@ def round_to_multiple(*args, **kwargs):
 
 @overload
 def round_binary(
-    x: _NumericScalarT,
+    x: _NumericScalarT | float,
     s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar,
     /,
     round_mode: Literal[
@@ -1970,7 +2160,7 @@ def round_binary(
 ) -> _NumericScalarT: ...
 @overload
 def round_binary(
-    x: _NumericScalarT,
+    x: _NumericScalarT | float,
     s: Iterable,
     /,
     round_mode: Literal[
@@ -1991,7 +2181,7 @@ def round_binary(
 ) -> lib.NumericArray[_NumericScalarT]: ...
 @overload
 def round_binary(
-    x: _NumericArrayT,
+    x: _NumericArrayT | Sequence[float],
     s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar | Iterable,
     /,
     round_mode: Literal[
@@ -2298,6 +2488,18 @@ Compute the inverse sine.
 NaN is returned for invalid input values;
 to raise an error instead, see "asin_checked".
 
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+asinh = _clone_signature(ln)
+"""
+Compute the inverse hyperbolic sine.
+NaN is returned for invalid input values.
+
 Parameters
 ----------
 x : Array-like or scalar-like
@@ -2326,6 +2528,19 @@ Compute the inverse tangent of x.
 The return value is in the range [-pi/2, pi/2];
 for a full return range [-pi, pi], see "atan2".
 
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+atanh = _clone_signature(ln)
+"""
+Compute the inverse hyperbolic tangent of x.
+The return value is in the range [-1, 1].
+NaN is returned for invalid input values.
+
 Parameters
 ----------
 x : Array-like or scalar-like
@@ -2340,6 +2555,30 @@ Compute the cosine.
 NaN is returned for invalid input values;
 to raise an error instead, see "cos_checked".
 
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cosh = _clone_signature(ln)
+"""
+Compute the hyperbolic cosine.
+NaN is returned for invalid input values.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+acosh = _clone_signature(ln)
+"""
+Compute the inverse hyperbolic cosine.
+NaN is returned for invalid input values.
+
 Parameters
 ----------
 x : Array-like or scalar-like
@@ -2382,6 +2621,18 @@ Compute the sine.
 Invalid input values raise an error;
 to return NaN instead, see "sin".
 
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+sinh = _clone_signature(ln)
+"""
+Compute the hyperbolic sine.
+NaN is returned for invalid input values.
+
 Parameters
 ----------
 x : Array-like or scalar-like
@@ -2410,6 +2661,18 @@ Compute the tangent.
 Infinite values raise an error;
 to return NaN instead, see "tan".
 
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+tanh = _clone_signature(ln)
+"""
+Compute the hyperbolic tangent.
+NaN is returned for invalid input values.
+
 Parameters
 ----------
 x : Array-like or scalar-like
@@ -2473,16 +2736,16 @@ def equal(
 ) -> lib.BooleanScalar: ...
 @overload
 def equal(
-    x: lib.Scalar,
-    y: lib.Array | lib.ChunkedArray,
+    x: lib.Scalar | lib._AsPyType,
+    y: lib.Array | lib.ChunkedArray | list[lib._AsPyType],
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
 ) -> lib.BooleanArray: ...
 @overload
 def equal(
-    x: lib.Array | lib.ChunkedArray,
-    y: lib.Scalar,
+    x: lib.Array | lib.ChunkedArray | list[lib._AsPyType],
+    y: lib.Scalar | lib._AsPyType,
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
@@ -2613,11 +2876,11 @@ memory_pool : pyarrow.MemoryPool, optional
 
 @overload
 def max_element_wise(
-    *args: ScalarOrArray[_Scalar_CoT],
+    *args: ScalarOrArray[_Scalar_CoT] | NDArray[Any] | float,
     skip_nulls: bool = True,
     options: ElementWiseAggregateOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _Scalar_CoT: ...
+) -> lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]: ...
 @overload
 def max_element_wise(
     *args: Expression,
@@ -3774,6 +4037,25 @@ memory_pool : pyarrow.MemoryPool, optional
     If not passed, will allocate memory from the default memory poo
 """
 
+def utf8_normalize(
+    strings: _StringArrayT, /, form: str, *, options: Utf8NormalizeOptions | None = None, memory_pool: lib.MemoryPool | None = None
+) -> _StringArrayT:
+    """
+    Utf8-normalize input
+
+    For each string in `strings`, return the normal form.
+    The normalization form must be given in the Utf8NormalizeOptions.
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    form : str
+        Unicode normalization form.
+        Accepted values are "NFC", "NFKC", "NFD", NFKD".
+    """
+
 # ========================= 2.12 String padding =========================
 @overload
 def ascii_center(
@@ -3960,6 +4242,60 @@ memory_pool : pyarrow.MemoryPool, optional
     If not passed, will allocate memory from the default memory pool.
 """
 
+@overload
+def utf8_zero_fill(
+    strings: _StringScalarT,
+    /,
+    width: int,
+    padding: str = '0',
+    *,
+    options: ZeroFillOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT: ...
+@overload
+def utf8_zero_fill(
+    strings: _StringArrayT,
+    /,
+    width: int | None = None,
+    padding: str | None = '0',
+    *,
+    options: ZeroFillOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringArrayT: ...
+@overload
+def utf8_zero_fill(
+    strings: Expression,
+    /,
+    width: int,
+    padding: str = '0',
+    *,
+    options: ZeroFillOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+def utf8_zero_fill(*args, **kwargs):
+    """
+    Left-pad strings to a given width, preserving leading sign characters
+
+    For each string in `strings`, emit a string of length `width` by
+    prepending the given padding character (defaults to '0' if not specified).
+    If the string starts with '+' or '-', the sign is preserved and padding
+    occurs after the sign. Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    width : int
+        Desired string length.
+    padding : str, default "0"
+        Padding character. Should be one Unicode codepoint.
+    options : pyarrow.compute.ZeroFillOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+utf8_zfill = _clone_signature(utf8_zero_fill)
+
 # ========================= 2.13 String trimming =========================
 @overload
 def ascii_ltrim(
@@ -4448,38 +4784,74 @@ def extract_regex(*args, **kwargs):
         If not passed, will allocate memory from the default memory pool.
     """
 
-# ========================= 2.16 String join =========================
-def binary_join(
-    strings, separator, /, *, memory_pool: lib.MemoryPool | None = None
-) -> StringScalar | StringArray:
+def extract_regex_span(
+    strings: StringOrBinaryArray,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexSpanOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray:
     """
-    Join a list of strings together with a separator.
+    Extract string spans captured by a regex pattern
+
+    For each string in `strings`, match the regular expression and, if
+    successful, emit a struct with field names and values coming from the
+    regular expression's named capture groups. Each struct field value
+    will be a fixed_size_list(offset_type, 2) where offset_type is int32
+    or int64, depending on the input string type. The two elements in
+    each fixed-size list are the index and the length of the substring
+    matched by the corresponding named capture group.
+
+    If the input is null or the regular expression fails matching,
+    a null output value is emitted.
 
-    Concatenate the strings in `list`. The `separator` is inserted
-    between each given string.
-    Any null input and any null `list` element emits a null output.
+    Regular expression matching is done using the Google RE2 library.
 
     Parameters
     ----------
     strings : Array-like or scalar-like
         Argument to compute function.
-    separator : Array-like or scalar-like
-        Argument to compute function.
+    pattern : str
+        Regular expression with named capture fields.
+    options : pyarrow.compute.ExtractRegexSpanOptions, optional
+        Alternative way of passing options.
     memory_pool : pyarrow.MemoryPool, optional
         If not passed, will allocate memory from the default memory pool.
     """
 
+# ========================= 2.16 String join =========================
+def binary_join(
+    strings: ArrayOrChunkedArray[lib.ListType[lib.BinaryType]], separator, /, *, memory_pool: lib.MemoryPool | None = None,
+) -> StringArray | BinaryArray: ...
+"""
+Join a list of strings together with a separator.
+
+Concatenate the strings in `list`. The `separator` is inserted
+between each given string.
+Any null input and any null `list` element emits a null output.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+separator : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
 @overload
 def binary_join_element_wise(
-    *strings: _StringOrBinaryScalarT,
+    *strings: _StringOrBinaryScalarT | str,
     null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
     null_replacement: str = "",
     options: JoinOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringOrBinaryScalarT: ...
+) -> _StringScalarT | _BinaryScalarT: ...
 @overload
 def binary_join_element_wise(
-    *strings: _StringOrBinaryArrayT,
+    *strings: _StringOrBinaryArrayT | Sequence[str | None],
     null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
     null_replacement: str = "",
     options: JoinOptions | None = None,
@@ -4646,55 +5018,30 @@ def utf8_slice_codeunits(*args, **kwargs):
 # ========================= 2.18 Containment tests =========================
 @overload
 def count_substring(
-    strings: lib.StringScalar | lib.BinaryScalar,
-    /,
-    pattern: str,
-    *,
-    ignore_case: bool = False,
-    options: MatchSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Scalar: ...
-@overload
-def count_substring(
-    strings: lib.LargeStringScalar | lib.LargeBinaryScalar,
-    /,
-    pattern: str,
-    *,
-    ignore_case: bool = False,
-    options: MatchSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar: ...
-@overload
-def count_substring(
-    strings: lib.StringArray
-    | lib.BinaryArray
-    | lib.ChunkedArray[lib.StringScalar]
-    | lib.ChunkedArray[lib.BinaryScalar],
+    strings: lib.Scalar[lib.StringType | lib.BinaryType | lib.LargeStringType | lib.LargeBinaryType],
     /,
     pattern: str,
     *,
     ignore_case: bool = False,
     options: MatchSubstringOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Array: ...
+) -> lib.Int32Scalar | lib.Int64Scalar: ...
 @overload
 def count_substring(
-    strings: lib.LargeStringArray
-    | lib.LargeBinaryArray
-    | lib.ChunkedArray[lib.LargeStringScalar]
-    | lib.ChunkedArray[lib.LargeBinaryScalar],
+    strings: lib.Array[lib.Scalar[lib.StringType | lib.BinaryType | lib.LargeStringType | lib.LargeBinaryType]]
+    | lib.ChunkedArray[lib.Scalar[lib.StringType | lib.BinaryType | lib.LargeStringType | lib.LargeBinaryType]],
     /,
     pattern: str,
     *,
     ignore_case: bool = False,
     options: MatchSubstringOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Array: ...
+) -> lib.Int32Array | lib.Int64Array: ...
 @overload
 def count_substring(
     strings: Expression,
     /,
-    pattern: str,
+    pattern: Any,
     *,
     ignore_case: bool = False,
     options: MatchSubstringOptions | None = None,
@@ -5236,7 +5583,7 @@ def choose(indices, /, *values, memory_pool: lib.MemoryPool | None = None):
     """
 
 def coalesce(
-    *values: _ScalarOrArrayT, memory_pool: lib.MemoryPool | None = None
+    *values: _ScalarOrArrayT | Expression, memory_pool: lib.MemoryPool | None = None
 ) -> _ScalarOrArrayT:
     """
     Select the first non-null value.
@@ -5380,7 +5727,7 @@ def list_value_length(*args, **kwargs):
 
 @overload
 def make_struct(
-    *args: lib.Scalar,
+    *args: lib.Scalar | lib._AsPyType,
     field_names: list[str] | tuple[str, ...] = (),
     field_nullability: bool | None = None,
     field_metadata: list[lib.KeyValueMetadata] | None = None,
@@ -5389,7 +5736,7 @@ def make_struct(
 ) -> lib.StructScalar: ...
 @overload
 def make_struct(
-    *args: lib.Array | lib.ChunkedArray,
+    *args: lib.Array | lib.ChunkedArray | list[lib._AsPyType],
     field_names: list[str] | tuple[str, ...] = (),
     field_nullability: bool | None = None,
     field_metadata: list[lib.KeyValueMetadata] | None = None,
@@ -5430,6 +5777,59 @@ def make_struct(*args, **kwargs):
     """
 
 # ========================= 2.22 Conversions =========================
+
+def run_end_decode(
+	array: lib.Array,
+	/,
+	*,
+	memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array:
+    """
+    Decode run-end encoded array.
+
+	Return a decoded version of a run-end encoded input array.
+
+	Parameters
+    ----------
+	array : Array-like
+		Argument to compute function.
+
+	memory_pool : pyarrow.MemoryPool, optional
+		If not passed, will allocate memory from the default memory pool.
+    """
+
+
+def run_end_encode(
+	array: lib.Array,
+	/,
+	run_end_type: lib.Type_INT16 | lib.Type_INT32 | lib.Type_INT64 = lib.Type_INT32,
+	*,
+	options: RunEndEncodeOptions | None = None,
+	memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array:
+    """
+	Run-end encode array.
+
+	Return a run-end encoded version of the input array.
+
+	Parameters
+    ----------
+
+	array : Array-like
+		Argument to compute function.
+
+	run_end_type : DataType, default pyarrow.int32()
+		The data type of the run_ends array.
+
+		Accepted values are pyarrow.{int16(), int32(), int64()}.
+
+	options : pyarrow.compute.RunEndEncodeOptions, optional
+		Alternative way of passing options.
+
+	memory_pool : pyarrow.MemoryPool, optional
+		If not passed, will allocate memory from the default memory pool.
+    """
+
 @overload
 def ceil_temporal(
     timestamps: _TemporalScalarT,
@@ -5666,7 +6066,7 @@ memory_pool : pyarrow.MemoryPool, optional
 @overload
 def cast(
     arr: lib.Scalar,
-    target_type: _DataTypeT,
+    target_type: _DataTypeT | None = None,
     safe: bool | None = None,
     options: CastOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
@@ -5674,7 +6074,7 @@ def cast(
 @overload
 def cast(
     arr: lib.Array,
-    target_type: _DataTypeT,
+    target_type: _DataTypeT | str | None = None,
     safe: bool | None = None,
     options: CastOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
@@ -5682,7 +6082,7 @@ def cast(
 @overload
 def cast(
     arr: lib.ChunkedArray,
-    target_type: _DataTypeT,
+    target_type: _DataTypeT | None = None,
     safe: bool | None = None,
     options: CastOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
@@ -5744,7 +6144,7 @@ def cast(*args, **kwargs):
 
 @overload
 def strftime(
-    timestamps: TemporalScalar,
+    timestamps: _ZonedTimestampScalarT | _ZonelessTimestampScalarT,
     /,
     format: str = "%Y-%m-%dT%H:%M:%S",
     locale: str = "C",
@@ -5754,7 +6154,7 @@ def strftime(
 ) -> lib.StringScalar: ...
 @overload
 def strftime(
-    timestamps: TemporalArray,
+    timestamps: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
     /,
     format: str = "%Y-%m-%dT%H:%M:%S",
     locale: str = "C",
@@ -5866,11 +6266,11 @@ def strptime(*args, **kwargs):
 # ========================= 2.23 Temporal component extraction =========================
 @overload
 def day(
-    values: TemporalScalar, /, *, memory_pool: lib.MemoryPool | None = None
+    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
 ) -> lib.Int64Scalar: ...
 @overload
 def day(
-    values: TemporalArray, /, *, memory_pool: lib.MemoryPool | None = None
+    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT, /, *, memory_pool: lib.MemoryPool | None = None
 ) -> lib.Int64Array: ...
 @overload
 def day(values: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
@@ -5892,7 +6292,7 @@ def day(*args, **kwargs):
 
 @overload
 def day_of_week(
-    values: TemporalScalar,
+    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT,
     /,
     *,
     count_from_zero: bool = True,
@@ -5902,7 +6302,7 @@ def day_of_week(
 ) -> lib.Int64Scalar: ...
 @overload
 def day_of_week(
-    values: TemporalArray,
+    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
     /,
     *,
     count_from_zero: bool = True,
@@ -5967,17 +6367,17 @@ memory_pool : pyarrow.MemoryPool, optional
 
 @overload
 def hour(
-    values: lib.TimestampScalar[Any] | lib.Time32Scalar[Any] | lib.Time64Scalar[Any],
+    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT | lib.Time32Scalar[Any] | lib.Time64Scalar[Any],
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
 ) -> lib.Int64Scalar: ...
 @overload
 def hour(
-    values: lib.TimestampArray[Any]
+    values: _ZonedTimestampArrayT
+    | _ZonelessTimestampArrayT
     | lib.Time32Array[Any]
     | lib.Time64Array[Any]
-    | lib.ChunkedArray[lib.TimestampScalar[Any]]
     | lib.ChunkedArray[lib.Time32Scalar[Any]]
     | lib.ChunkedArray[lib.Time64Scalar[Any]],
     /,
@@ -6009,11 +6409,11 @@ def hour(*args, **kwargs):
 
 @overload
 def is_dst(
-    values: lib.TimestampScalar[Any], /, *, memory_pool: lib.MemoryPool | None = None
+    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
 ) -> lib.BooleanScalar: ...
 @overload
 def is_dst(
-    values: lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar[Any]],
+    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
@@ -6039,11 +6439,11 @@ def is_dst(*args, **kwargs):
 
 @overload
 def iso_week(
-    values: lib.TimestampScalar[Any], /, *, memory_pool: lib.MemoryPool | None = None
+    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
 ) -> lib.Int64Scalar: ...
 @overload
 def iso_week(
-    values: lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar[Any]],
+    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
@@ -6088,16 +6488,48 @@ memory_pool : pyarrow.MemoryPool, optional
     If not passed, will allocate memory from the default memory pool.
 """
 
+@overload
+def iso_calendar(
+    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructScalar: ...
+@overload
+def iso_calendar(
+    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray: ...
+@overload
+def iso_calendar(
+    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+def iso_calendar(*args, **kwargs):
+    """
+    Extract (ISO year, ISO week, ISO day of week) struct.
+
+    ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.
+    Null values emit null. An error is returned if the values have a defined
+    timezone, but it cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
 @overload
 def is_leap_year(
-    values: lib.TimestampScalar[Any] | lib.Date32Scalar | lib.Date64Scalar,
+    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT | lib.Date32Scalar | lib.Date64Scalar,
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
 ) -> lib.BooleanScalar: ...
 @overload
 def is_leap_year(
-    values: lib.TimestampArray
+    values: _ZonedTimestampArrayT
+    | _ZonelessTimestampArrayT
     | lib.Date32Array
     | lib.Date64Array
     | lib.ChunkedArray[lib.TimestampScalar]
@@ -6310,7 +6742,7 @@ memory_pool : pyarrow.MemoryPool, optional
 
 @overload
 def week(
-    values: lib.TimestampScalar,
+    values: lib.Scalar[lib.TimestampType[Any, Any]],
     /,
     *,
     week_starts_monday: bool = True,
@@ -6321,7 +6753,7 @@ def week(
 ) -> lib.Int64Scalar: ...
 @overload
 def week(
-    values: lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar],
+    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
     /,
     *,
     week_starts_monday: bool = True,
@@ -6677,7 +7109,7 @@ memory_pool : pyarrow.MemoryPool, optional
 # ========================= 2.25 Timezone handling =========================
 @overload
 def assume_timezone(
-    timestamps: lib.TimestampScalar,
+    timestamps: _ZonelessTimestampScalarT,
     /,
     timezone: str,
     *,
@@ -6685,10 +7117,10 @@ def assume_timezone(
     nonexistent: Literal["raise", "earliest", "latest"] = "raise",
     options: AssumeTimezoneOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.TimestampScalar: ...
+) -> _ZonedTimestampScalarT: ...
 @overload
 def assume_timezone(
-    timestamps: lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar],
+    timestamps: _ZonelessTimestampArrayT,
     /,
     timezone: str,
     *,
@@ -6696,7 +7128,23 @@ def assume_timezone(
     nonexistent: Literal["raise", "earliest", "latest"] = "raise",
     options: AssumeTimezoneOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.TimestampArray: ...
+) -> _ZonedTimestampArrayT: ...
+@overload
+def assume_timezone(
+    timestamps: _ZonelessTimestampScalarT,
+    /,
+    *,
+    options: AssumeTimezoneOptions,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ZonedTimestampScalarT: ...
+@overload
+def assume_timezone(
+    timestamps: _ZonelessTimestampArrayT,
+    /,
+    *,
+    options: AssumeTimezoneOptions,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ZonedTimestampArrayT: ...
 @overload
 def assume_timezone(
     timestamps: Expression,
@@ -6741,15 +7189,15 @@ def assume_timezone(*args, **kwargs):
 
 @overload
 def local_timestamp(
-    timestamps: lib.TimestampScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.TimestampScalar: ...
+    timestamps: _ZonedTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _ZonelessTimestampScalarT: ...
 @overload
 def local_timestamp(
-    timestamps: lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar],
+    timestamps: _ZonedTimestampArrayT,
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.TimestampArray: ...
+) -> _ZonelessTimestampArrayT: ...
 @overload
 def local_timestamp(
     timestamps: Expression, /, *, memory_pool: lib.MemoryPool | None = None
@@ -6777,7 +7225,7 @@ def local_timestamp(*args, **kwargs):
 def random(
     n: int,
     *,
-    initializer: Literal["system"] | int = "system",
+    initializer: Hashable = "system",
     options: RandomOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
 ) -> lib.DoubleArray:
@@ -6810,7 +7258,7 @@ def random(
 def cumulative_sum(
     values: _NumericArrayT,
     /,
-    start: lib.Scalar | None = None,
+    start: lib.Scalar | int | None = None,
     *,
     skip_nulls: bool = False,
     options: CumulativeSumOptions | None = None,
@@ -7009,6 +7457,22 @@ def dictionary_encode(
     memory_pool: lib.MemoryPool | None = None,
 ) -> Expression: ...
 @overload
+def dictionary_decode(array: _ScalarOrArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ScalarOrArrayT: ...
+@overload
+def dictionary_decode(array: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
+def dictionary_decode(*args, **kwargs):
+    """
+    Decodes a DictionaryArray to an Array
+
+    Return a plain-encoded version of the array input.
+    This function does nothing if the input is not a dictionary.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    """
+@overload
 def unique(array: _ArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT: ...
 @overload
 def unique(array: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
@@ -7045,14 +7509,7 @@ def array_filter(
 @overload
 def array_take(
     array: _ArrayT,
-    indices: list[int]
-    | list[int | None]
-    | lib.Int16Array
-    | lib.Int32Array
-    | lib.Int64Array
-    | lib.ChunkedArray[lib.Int16Scalar]
-    | lib.ChunkedArray[lib.Int32Scalar]
-    | lib.ChunkedArray[lib.Int64Scalar],
+    indices: Indices | list[int | None],
     /,
     *,
     boundscheck: bool = True,
@@ -7210,7 +7667,7 @@ def array_sort_indices(*args, **kwargs):
 
 @overload
 def partition_nth_indices(
-    array: lib.Array | lib.ChunkedArray,
+    array: lib.Array | lib.ChunkedArray | Sequence[int | float | str | None],
     /,
     pivot: int,
     *,
@@ -7315,12 +7772,92 @@ def rank(
         If not passed, will allocate memory from the default memory pool.
     """
 
+def rank_quantile(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    sort_keys: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: RankQuantileOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array:
+    """
+    Compute quantile ranks of an array (1-based).
+
+    This function computes a quantile rank of the input array.
+    By default, null values are considered greater than any other value and
+    are therefore sorted at the end of the input. For floating-point types,
+    NaNs are considered greater than any other non-null value, but smaller
+    than null values.
+
+    The results are real values strictly between 0 and 1. They are
+    computed as in https://en.wikipedia.org/wiki/Quantile_rank
+    but without multiplying by 100.
+
+    The handling of nulls and NaNs can be changed in RankQuantileOptions.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.RankQuantileOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+
+rank_normal = _clone_signature(rank_quantile)
+"""
+Compute normal (gaussian) ranks of an array (1-based).
+
+This function computes a normal (gaussian) rank of the input array.
+By default, null values are considered greater than any other value and
+are therefore sorted at the end of the input. For floating-point types,
+NaNs are considered greater than any other non-null value, but smaller
+than null values.
+The results are finite real values. They are obtained as if first
+calling the "rank_quantile" function and then applying the normal
+percent-point function (PPF) to the resulting quantile values.
+
+The handling of nulls and NaNs can be changed in RankQuantileOptions.
+
+Parameters
+----------
+input : Array-like or scalar-like
+    Argument to compute function.
+sort_keys : sequence of (name, order) tuples or str, default "ascending"
+    Names of field/column keys to sort the input on,
+    along with the order each field/column is sorted in.
+    Accepted values for `order` are "ascending", "descending".
+    The field name can be a string column name or expression.
+    Alternatively, one can simply pass "ascending" or "descending" as a string
+    if the input is array-like.
+null_placement : str, default "at_end"
+    Where nulls in input should be sorted.
+    Accepted values are "at_start", "at_end".
+options : pyarrow.compute.RankQuantileOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
 @overload
 def select_k_unstable(
-    input: lib.Array | lib.ChunkedArray,
+    input: lib.Array | lib.ChunkedArray | lib.Table,
     /,
     k: int,
-    sort_keys: list[tuple[str, _Order]],
+    sort_keys: list[tuple[str | Expression, _Order]] | None = None,
     *,
     options: SelectKOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
@@ -7330,11 +7867,27 @@ def select_k_unstable(
     input: Expression,
     /,
     k: int,
-    sort_keys: list[tuple[str, _Order]],
+    sort_keys: list[tuple[str | Expression, _Order]] | None = None,
     *,
     options: SelectKOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
 ) -> Expression: ...
+@overload
+def select_k_unstable(
+    input: lib.Array | lib.ChunkedArray | lib.Table,
+    /,
+    options: SelectKOptions,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array: ...
+@overload
+def select_k_unstable(
+    input: Expression,
+    /,
+    options: SelectKOptions,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
 def select_k_unstable(*args, **kwargs):
     """
     Select the indices of the first `k` ordered elements from the input.
@@ -7369,7 +7922,7 @@ def select_k_unstable(*args, **kwargs):
 def sort_indices(
     input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
     /,
-    sort_keys: Sequence[tuple[str, _Order]] = (),
+    sort_keys: Sequence[tuple[str|Expression, _Order]] = (),
     *,
     null_placement: _Placement = "at_end",
     options: SortOptions | None = None,
@@ -7379,7 +7932,7 @@ def sort_indices(
 def sort_indices(
     input: Expression,
     /,
-    sort_keys: Sequence[tuple[str, _Order]] = (),
+    sort_keys: Sequence[tuple[str|Expression, _Order]] = (),
     *,
     null_placement: _Placement = "at_end",
     options: SortOptions | None = None,
@@ -7423,7 +7976,7 @@ def list_element(
 ) -> Expression: ...
 @overload
 def list_element(
-    lists: lib.Array[ListScalar[_DataTypeT]],
+    lists: lib.Array[ListScalar[_DataTypeT]] | lib.Array[lib.Scalar[lib.ListType[lib.StructType]]],
     index: ScalarLike,
     /,
     *,

From 965910662d4b5b25c551d23efbad83870b94ca4c Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 21 Aug 2025 19:55:10 +0200
Subject: [PATCH 04/26] Add Apache 2.0 license

---
 python/pyarrow/__init__.pyi                   | 18 +++++++++++++++++-
 python/pyarrow/__lib_pxi/__init__.pyi         | 16 ++++++++++++++++
 python/pyarrow/__lib_pxi/array.pyi            | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/benchmark.pyi        | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/builder.pyi          | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/compat.pyi           | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/config.pyi           | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/device.pyi           | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/error.pyi            | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/io.pyi               | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/ipc.pyi              | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/memory.pyi           | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/pandas_shim.pyi      | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/scalar.pyi           | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/table.pyi            | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/tensor.pyi           | 17 +++++++++++++++++
 python/pyarrow/__lib_pxi/types.pyi            | 17 +++++++++++++++++
 python/pyarrow/_azurefs.pyi                   | 17 +++++++++++++++++
 python/pyarrow/_compute.pyi                   | 17 +++++++++++++++++
 python/pyarrow/_csv.pyi                       | 17 +++++++++++++++++
 python/pyarrow/_cuda.pyi                      | 17 +++++++++++++++++
 python/pyarrow/_dataset.pyi                   | 17 +++++++++++++++++
 python/pyarrow/_dataset_orc.pyi               | 17 +++++++++++++++++
 python/pyarrow/_dataset_parquet.pyi           | 17 +++++++++++++++++
 .../pyarrow/_dataset_parquet_encryption.pyi   | 17 +++++++++++++++++
 python/pyarrow/_feather.pyi                   | 17 +++++++++++++++++
 python/pyarrow/_flight.pyi                    | 17 +++++++++++++++++
 python/pyarrow/_fs.pyi                        | 17 +++++++++++++++++
 python/pyarrow/_gcsfs.pyi                     | 17 +++++++++++++++++
 python/pyarrow/_hdfs.pyi                      | 17 +++++++++++++++++
 python/pyarrow/_json.pyi                      | 17 +++++++++++++++++
 python/pyarrow/_orc.pyi                       | 17 +++++++++++++++++
 python/pyarrow/_parquet.pyi                   | 17 +++++++++++++++++
 python/pyarrow/_parquet_encryption.pyi        | 17 +++++++++++++++++
 python/pyarrow/_s3fs.pyi                      | 17 +++++++++++++++++
 python/pyarrow/_stubs_typing.pyi              | 17 +++++++++++++++++
 python/pyarrow/_substrait.pyi                 | 17 +++++++++++++++++
 python/pyarrow/acero.pyi                      | 17 +++++++++++++++++
 python/pyarrow/benchmark.pyi                  | 17 +++++++++++++++++
 python/pyarrow/cffi.pyi                       | 17 +++++++++++++++++
 python/pyarrow/csv.pyi                        | 17 +++++++++++++++++
 python/pyarrow/cuda.pyi                       | 17 +++++++++++++++++
 python/pyarrow/dataset.pyi                    | 17 +++++++++++++++++
 python/pyarrow/feather.pyi                    | 17 +++++++++++++++++
 python/pyarrow/flight.pyi                     | 17 +++++++++++++++++
 python/pyarrow/fs.pyi                         | 17 +++++++++++++++++
 python/pyarrow/gandiva.pyi                    | 17 +++++++++++++++++
 python/pyarrow/interchange/__init__.pyi       | 16 ++++++++++++++++
 python/pyarrow/interchange/buffer.pyi         | 17 +++++++++++++++++
 python/pyarrow/interchange/column.pyi         | 17 +++++++++++++++++
 python/pyarrow/interchange/dataframe.pyi      | 17 +++++++++++++++++
 python/pyarrow/interchange/from_dataframe.pyi | 17 +++++++++++++++++
 python/pyarrow/ipc.pyi                        | 17 +++++++++++++++++
 python/pyarrow/json.pyi                       | 17 +++++++++++++++++
 python/pyarrow/lib.pyi                        | 17 +++++++++++++++++
 python/pyarrow/orc.pyi                        | 17 +++++++++++++++++
 python/pyarrow/pandas_compat.pyi              | 17 +++++++++++++++++
 python/pyarrow/parquet/__init__.pyi           | 19 ++++++++++++++++++-
 python/pyarrow/parquet/core.pyi               | 17 +++++++++++++++++
 python/pyarrow/parquet/encryption.pyi         | 17 +++++++++++++++++
 python/pyarrow/substrait.pyi                  | 17 +++++++++++++++++
 python/pyarrow/types.pyi                      | 17 +++++++++++++++++
 python/pyarrow/util.pyi                       | 17 +++++++++++++++++
 63 files changed, 1070 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/__init__.pyi b/python/pyarrow/__init__.pyi
index 8a0d1e870c5..ed1cad1bf80 100644
--- a/python/pyarrow/__init__.pyi
+++ b/python/pyarrow/__init__.pyi
@@ -1,4 +1,20 @@
-# ruff: noqa: F401, I001, E402
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 __version__: str
 
 import pyarrow.lib as _lib
diff --git a/python/pyarrow/__lib_pxi/__init__.pyi b/python/pyarrow/__lib_pxi/__init__.pyi
index e69de29bb2d..13a83393a91 100644
--- a/python/pyarrow/__lib_pxi/__init__.pyi
+++ b/python/pyarrow/__lib_pxi/__init__.pyi
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyarrow/__lib_pxi/array.pyi b/python/pyarrow/__lib_pxi/array.pyi
index 9283f57b69f..c14cd1b8c44 100644
--- a/python/pyarrow/__lib_pxi/array.pyi
+++ b/python/pyarrow/__lib_pxi/array.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 import sys
 
diff --git a/python/pyarrow/__lib_pxi/benchmark.pyi b/python/pyarrow/__lib_pxi/benchmark.pyi
index 66981bf0f51..592561636af 100644
--- a/python/pyarrow/__lib_pxi/benchmark.pyi
+++ b/python/pyarrow/__lib_pxi/benchmark.pyi
@@ -1 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 def benchmark_PandasObjectIsNull(list) -> None: ...  # noqa: N802
diff --git a/python/pyarrow/__lib_pxi/builder.pyi b/python/pyarrow/__lib_pxi/builder.pyi
index 4a0e9ca4708..39372f8e512 100644
--- a/python/pyarrow/__lib_pxi/builder.pyi
+++ b/python/pyarrow/__lib_pxi/builder.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Iterable
 
 from pyarrow.lib import MemoryPool, _Weakrefable
diff --git a/python/pyarrow/__lib_pxi/compat.pyi b/python/pyarrow/__lib_pxi/compat.pyi
index ae667be453e..2ea013555c0 100644
--- a/python/pyarrow/__lib_pxi/compat.pyi
+++ b/python/pyarrow/__lib_pxi/compat.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 def encode_file_path(path: str | bytes) -> bytes: ...
 def tobytes(o: str | bytes) -> bytes: ...
 def frombytes(o: bytes, *, safe: bool = False): ...
diff --git a/python/pyarrow/__lib_pxi/config.pyi b/python/pyarrow/__lib_pxi/config.pyi
index 166e10c9734..7c2eb8a9c98 100644
--- a/python/pyarrow/__lib_pxi/config.pyi
+++ b/python/pyarrow/__lib_pxi/config.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import NamedTuple
 
 class VersionInfo(NamedTuple):
diff --git a/python/pyarrow/__lib_pxi/device.pyi b/python/pyarrow/__lib_pxi/device.pyi
index d1b9f39eedd..6c4f1fdeeea 100644
--- a/python/pyarrow/__lib_pxi/device.pyi
+++ b/python/pyarrow/__lib_pxi/device.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 
 from pyarrow.lib import _Weakrefable
diff --git a/python/pyarrow/__lib_pxi/error.pyi b/python/pyarrow/__lib_pxi/error.pyi
index 981ed51e680..c1e1a04ee40 100644
--- a/python/pyarrow/__lib_pxi/error.pyi
+++ b/python/pyarrow/__lib_pxi/error.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow/__lib_pxi/io.pyi b/python/pyarrow/__lib_pxi/io.pyi
index d882fd79d57..dca26a52940 100644
--- a/python/pyarrow/__lib_pxi/io.pyi
+++ b/python/pyarrow/__lib_pxi/io.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 from collections.abc import Callable
diff --git a/python/pyarrow/__lib_pxi/ipc.pyi b/python/pyarrow/__lib_pxi/ipc.pyi
index 3d72892061e..819326443cf 100644
--- a/python/pyarrow/__lib_pxi/ipc.pyi
+++ b/python/pyarrow/__lib_pxi/ipc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 import sys
 
diff --git a/python/pyarrow/__lib_pxi/memory.pyi b/python/pyarrow/__lib_pxi/memory.pyi
index 57a3bb4f1b3..e969e3738b8 100644
--- a/python/pyarrow/__lib_pxi/memory.pyi
+++ b/python/pyarrow/__lib_pxi/memory.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow.lib import _Weakrefable
 
 class MemoryPool(_Weakrefable):
diff --git a/python/pyarrow/__lib_pxi/pandas_shim.pyi b/python/pyarrow/__lib_pxi/pandas_shim.pyi
index 0e80fae4ebf..ae8460cc2b3 100644
--- a/python/pyarrow/__lib_pxi/pandas_shim.pyi
+++ b/python/pyarrow/__lib_pxi/pandas_shim.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from types import ModuleType
 from typing import Any, Iterable, TypeGuard
 
diff --git a/python/pyarrow/__lib_pxi/scalar.pyi b/python/pyarrow/__lib_pxi/scalar.pyi
index 77368bb264b..c6819f7e863 100644
--- a/python/pyarrow/__lib_pxi/scalar.pyi
+++ b/python/pyarrow/__lib_pxi/scalar.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import collections.abc
 import datetime as dt
 import sys
diff --git a/python/pyarrow/__lib_pxi/table.pyi b/python/pyarrow/__lib_pxi/table.pyi
index 34960e2b903..5ad66f9d06c 100644
--- a/python/pyarrow/__lib_pxi/table.pyi
+++ b/python/pyarrow/__lib_pxi/table.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 import sys
 
diff --git a/python/pyarrow/__lib_pxi/tensor.pyi b/python/pyarrow/__lib_pxi/tensor.pyi
index d849abd0f1f..5ad950c84d0 100644
--- a/python/pyarrow/__lib_pxi/tensor.pyi
+++ b/python/pyarrow/__lib_pxi/tensor.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow/__lib_pxi/types.pyi b/python/pyarrow/__lib_pxi/types.pyi
index 5cac864c3cc..aa965e3506c 100644
--- a/python/pyarrow/__lib_pxi/types.pyi
+++ b/python/pyarrow/__lib_pxi/types.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 import sys
 
diff --git a/python/pyarrow/_azurefs.pyi b/python/pyarrow/_azurefs.pyi
index 317943ce20f..b9a83f01c56 100644
--- a/python/pyarrow/_azurefs.pyi
+++ b/python/pyarrow/_azurefs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Literal
 
 from ._fs import FileSystem
diff --git a/python/pyarrow/_compute.pyi b/python/pyarrow/_compute.pyi
index 61ccb233feb..fa80304cf91 100644
--- a/python/pyarrow/_compute.pyi
+++ b/python/pyarrow/_compute.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import (
     Any,
     Callable,
diff --git a/python/pyarrow/_csv.pyi b/python/pyarrow/_csv.pyi
index 2f49f8c9a6c..c490d6be93a 100644
--- a/python/pyarrow/_csv.pyi
+++ b/python/pyarrow/_csv.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from dataclasses import dataclass, field
 from typing import IO, Any, Callable, Literal
 
diff --git a/python/pyarrow/_cuda.pyi b/python/pyarrow/_cuda.pyi
index ad52b2f380f..6bcd9868d7f 100644
--- a/python/pyarrow/_cuda.pyi
+++ b/python/pyarrow/_cuda.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Any
 
 import cuda  # type: ignore[import-not-found]
diff --git a/python/pyarrow/_dataset.pyi b/python/pyarrow/_dataset.pyi
index 114bf625983..4980cb0420f 100644
--- a/python/pyarrow/_dataset.pyi
+++ b/python/pyarrow/_dataset.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow/_dataset_orc.pyi b/python/pyarrow/_dataset_orc.pyi
index 9c4ac04198f..d4e5784750f 100644
--- a/python/pyarrow/_dataset_orc.pyi
+++ b/python/pyarrow/_dataset_orc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from ._dataset import FileFormat
 
 class OrcFileFormat(FileFormat):
diff --git a/python/pyarrow/_dataset_parquet.pyi b/python/pyarrow/_dataset_parquet.pyi
index cbcc17235f1..007d3404a18 100644
--- a/python/pyarrow/_dataset_parquet.pyi
+++ b/python/pyarrow/_dataset_parquet.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from dataclasses import dataclass
 from typing import IO, Any, Iterable, TypedDict
 
diff --git a/python/pyarrow/_dataset_parquet_encryption.pyi b/python/pyarrow/_dataset_parquet_encryption.pyi
index 7623275b865..be40c0b39b3 100644
--- a/python/pyarrow/_dataset_parquet_encryption.pyi
+++ b/python/pyarrow/_dataset_parquet_encryption.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from ._dataset_parquet import ParquetFileWriteOptions, ParquetFragmentScanOptions
 from ._parquet import FileDecryptionProperties
 from ._parquet_encryption import CryptoFactory, EncryptionConfiguration, KmsConnectionConfig
diff --git a/python/pyarrow/_feather.pyi b/python/pyarrow/_feather.pyi
index 8bb914ba45d..373fe38cdce 100644
--- a/python/pyarrow/_feather.pyi
+++ b/python/pyarrow/_feather.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO
 
 from _typeshed import StrPath
diff --git a/python/pyarrow/_flight.pyi b/python/pyarrow/_flight.pyi
index 4450c42df49..a79475a8796 100644
--- a/python/pyarrow/_flight.pyi
+++ b/python/pyarrow/_flight.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import asyncio
 import enum
 import sys
diff --git a/python/pyarrow/_fs.pyi b/python/pyarrow/_fs.pyi
index 7670ef5230d..45d4d922ac2 100644
--- a/python/pyarrow/_fs.pyi
+++ b/python/pyarrow/_fs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 import enum
 import sys
diff --git a/python/pyarrow/_gcsfs.pyi b/python/pyarrow/_gcsfs.pyi
index 4fc7ea68e48..0ced106615a 100644
--- a/python/pyarrow/_gcsfs.pyi
+++ b/python/pyarrow/_gcsfs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 
 from ._fs import FileSystem
diff --git a/python/pyarrow/_hdfs.pyi b/python/pyarrow/_hdfs.pyi
index 200f669379b..ed367379171 100644
--- a/python/pyarrow/_hdfs.pyi
+++ b/python/pyarrow/_hdfs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from _typeshed import StrPath
 
 from ._fs import FileSystem
diff --git a/python/pyarrow/_json.pyi b/python/pyarrow/_json.pyi
index 43d2ae83cd8..f416b4b29c6 100644
--- a/python/pyarrow/_json.pyi
+++ b/python/pyarrow/_json.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO, Any, Literal
 
 from _typeshed import StrPath
diff --git a/python/pyarrow/_orc.pyi b/python/pyarrow/_orc.pyi
index 71bf0dde9ba..7587cc121c3 100644
--- a/python/pyarrow/_orc.pyi
+++ b/python/pyarrow/_orc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO, Literal
 
 from .lib import (
diff --git a/python/pyarrow/_parquet.pyi b/python/pyarrow/_parquet.pyi
index a9187df0428..c75337cbf3b 100644
--- a/python/pyarrow/_parquet.pyi
+++ b/python/pyarrow/_parquet.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO, Any, Iterable, Iterator, Literal, Sequence, TypeAlias, TypedDict
 
 from _typeshed import StrPath
diff --git a/python/pyarrow/_parquet_encryption.pyi b/python/pyarrow/_parquet_encryption.pyi
index c707edb844a..e1228cbdb5a 100644
--- a/python/pyarrow/_parquet_encryption.pyi
+++ b/python/pyarrow/_parquet_encryption.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 
 from typing import Callable
diff --git a/python/pyarrow/_s3fs.pyi b/python/pyarrow/_s3fs.pyi
index fc13c498bd9..e2f5f147096 100644
--- a/python/pyarrow/_s3fs.pyi
+++ b/python/pyarrow/_s3fs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 
 from typing import Literal, NotRequired, Required, TypedDict
diff --git a/python/pyarrow/_stubs_typing.pyi b/python/pyarrow/_stubs_typing.pyi
index c259513f1ea..549dc4059c3 100644
--- a/python/pyarrow/_stubs_typing.pyi
+++ b/python/pyarrow/_stubs_typing.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 
 from collections.abc import Sequence
diff --git a/python/pyarrow/_substrait.pyi b/python/pyarrow/_substrait.pyi
index ff226e9521b..ee78e9720fe 100644
--- a/python/pyarrow/_substrait.pyi
+++ b/python/pyarrow/_substrait.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Any, Callable
 
 from ._compute import Expression
diff --git a/python/pyarrow/acero.pyi b/python/pyarrow/acero.pyi
index 8a520bdc24a..2abb608b32c 100644
--- a/python/pyarrow/acero.pyi
+++ b/python/pyarrow/acero.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow/benchmark.pyi b/python/pyarrow/benchmark.pyi
index 048973301dc..3ea8f70bc34 100644
--- a/python/pyarrow/benchmark.pyi
+++ b/python/pyarrow/benchmark.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow.lib import benchmark_PandasObjectIsNull
 
 __all__ = ["benchmark_PandasObjectIsNull"]
diff --git a/python/pyarrow/cffi.pyi b/python/pyarrow/cffi.pyi
index 2ae945c5974..e4f077d7155 100644
--- a/python/pyarrow/cffi.pyi
+++ b/python/pyarrow/cffi.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import cffi
 
 c_source: str
diff --git a/python/pyarrow/csv.pyi b/python/pyarrow/csv.pyi
index 510229d7e72..a7abd413aab 100644
--- a/python/pyarrow/csv.pyi
+++ b/python/pyarrow/csv.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._csv import (
     ISO8601,
     ConvertOptions,
diff --git a/python/pyarrow/cuda.pyi b/python/pyarrow/cuda.pyi
index e11baf7d4e7..0394965bb73 100644
--- a/python/pyarrow/cuda.pyi
+++ b/python/pyarrow/cuda.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._cuda import (
     BufferReader,
     BufferWriter,
diff --git a/python/pyarrow/dataset.pyi b/python/pyarrow/dataset.pyi
index 98f1a38aa85..6cb7fed43e6 100644
--- a/python/pyarrow/dataset.pyi
+++ b/python/pyarrow/dataset.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
 
 from _typeshed import StrPath
diff --git a/python/pyarrow/feather.pyi b/python/pyarrow/feather.pyi
index 9451ee15763..ce8d83dbcd9 100644
--- a/python/pyarrow/feather.pyi
+++ b/python/pyarrow/feather.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO, Literal
 
 import pandas as pd
diff --git a/python/pyarrow/flight.pyi b/python/pyarrow/flight.pyi
index 9b806ccf305..dcc6ee2244b 100644
--- a/python/pyarrow/flight.pyi
+++ b/python/pyarrow/flight.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._flight import (
     Action,
     ActionType,
diff --git a/python/pyarrow/fs.pyi b/python/pyarrow/fs.pyi
index 6bf75616c13..6c5a0af8d19 100644
--- a/python/pyarrow/fs.pyi
+++ b/python/pyarrow/fs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._fs import (  # noqa
     FileSelector,
     FileType,
diff --git a/python/pyarrow/gandiva.pyi b/python/pyarrow/gandiva.pyi
index a344f885b29..bc07e15c4a6 100644
--- a/python/pyarrow/gandiva.pyi
+++ b/python/pyarrow/gandiva.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Iterable, Literal
 
 from .lib import Array, DataType, Field, MemoryPool, RecordBatch, Schema, _Weakrefable
diff --git a/python/pyarrow/interchange/__init__.pyi b/python/pyarrow/interchange/__init__.pyi
index e69de29bb2d..13a83393a91 100644
--- a/python/pyarrow/interchange/__init__.pyi
+++ b/python/pyarrow/interchange/__init__.pyi
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyarrow/interchange/buffer.pyi b/python/pyarrow/interchange/buffer.pyi
index 46673961a75..78d1dabb8b7 100644
--- a/python/pyarrow/interchange/buffer.pyi
+++ b/python/pyarrow/interchange/buffer.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 
 from pyarrow.lib import Buffer
diff --git a/python/pyarrow/interchange/column.pyi b/python/pyarrow/interchange/column.pyi
index e6662867b6b..ce7e169bfb5 100644
--- a/python/pyarrow/interchange/column.pyi
+++ b/python/pyarrow/interchange/column.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 
 from typing import Any, Iterable, TypeAlias, TypedDict
diff --git a/python/pyarrow/interchange/dataframe.pyi b/python/pyarrow/interchange/dataframe.pyi
index 526a58926a9..a7ea6aeac74 100644
--- a/python/pyarrow/interchange/dataframe.pyi
+++ b/python/pyarrow/interchange/dataframe.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow/interchange/from_dataframe.pyi b/python/pyarrow/interchange/from_dataframe.pyi
index b04b6268975..aa6217b6181 100644
--- a/python/pyarrow/interchange/from_dataframe.pyi
+++ b/python/pyarrow/interchange/from_dataframe.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Any, Protocol, TypeAlias
 
 from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
diff --git a/python/pyarrow/ipc.pyi b/python/pyarrow/ipc.pyi
index c7f2af004d4..985cf0678f9 100644
--- a/python/pyarrow/ipc.pyi
+++ b/python/pyarrow/ipc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from io import IOBase
 
 import pandas as pd
diff --git a/python/pyarrow/json.pyi b/python/pyarrow/json.pyi
index db1d35e0b8b..67768db42e4 100644
--- a/python/pyarrow/json.pyi
+++ b/python/pyarrow/json.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
 
 __all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/pyarrow/lib.pyi b/python/pyarrow/lib.pyi
index 1698b55520b..3292c52b2c0 100644
--- a/python/pyarrow/lib.pyi
+++ b/python/pyarrow/lib.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # ruff: noqa: F403
 from typing import NamedTuple
 
diff --git a/python/pyarrow/orc.pyi b/python/pyarrow/orc.pyi
index 2eba8d40a11..557f38a2b9e 100644
--- a/python/pyarrow/orc.pyi
+++ b/python/pyarrow/orc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow/pandas_compat.pyi b/python/pyarrow/pandas_compat.pyi
index efbd05ac2fe..82fcb19ad97 100644
--- a/python/pyarrow/pandas_compat.pyi
+++ b/python/pyarrow/pandas_compat.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Any, TypedDict, TypeVar
 
 import numpy as np
diff --git a/python/pyarrow/parquet/__init__.pyi b/python/pyarrow/parquet/__init__.pyi
index 4ef88705809..8d0b5374ea0 100644
--- a/python/pyarrow/parquet/__init__.pyi
+++ b/python/pyarrow/parquet/__init__.pyi
@@ -1 +1,18 @@
-from .core import *  # noqa
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from .core import *  # noqa
diff --git a/python/pyarrow/parquet/core.pyi b/python/pyarrow/parquet/core.pyi
index 56b2c8447d9..f5ac0510ffc 100644
--- a/python/pyarrow/parquet/core.pyi
+++ b/python/pyarrow/parquet/core.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 from pathlib import Path
diff --git a/python/pyarrow/parquet/encryption.pyi b/python/pyarrow/parquet/encryption.pyi
index 5a77dae7ef7..fe9a454e593 100644
--- a/python/pyarrow/parquet/encryption.pyi
+++ b/python/pyarrow/parquet/encryption.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._parquet_encryption import (
     CryptoFactory,
     DecryptionConfiguration,
diff --git a/python/pyarrow/substrait.pyi b/python/pyarrow/substrait.pyi
index a56a8a5b40f..b78bbd8aebd 100644
--- a/python/pyarrow/substrait.pyi
+++ b/python/pyarrow/substrait.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._substrait import (
     BoundExpressions,
     SubstraitSchema,
diff --git a/python/pyarrow/types.pyi b/python/pyarrow/types.pyi
index 0cb4f6171d3..3ead6830421 100644
--- a/python/pyarrow/types.pyi
+++ b/python/pyarrow/types.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 from typing import Any
diff --git a/python/pyarrow/util.pyi b/python/pyarrow/util.pyi
index c2ecf7d6b61..5c9687bb83f 100644
--- a/python/pyarrow/util.pyi
+++ b/python/pyarrow/util.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from collections.abc import Callable
 from os import PathLike
 from typing import Any, Protocol, Sequence, TypeVar

From dd34a49bdece15b643255ee3a172803df519d8cf Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 21 Aug 2025 21:29:07 +0200
Subject: [PATCH 05/26] Add a pyright check to CI

---
 .github/workflows/python.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 0d12accda4e..a4aa53e5cdc 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -138,6 +138,11 @@ jobs:
         continue-on-error: true
         run: archery docker push ${{ matrix.image }}
 
+        - name: Type check with pyright
+        run: |-
+            python -m pip install pyright
+            pushd python; python -m pyright
+
   macos:
     name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} Python 3
     runs-on: macos-${{ matrix.macos-version }}

From e3172115d39558594b789940579e3fed3b10c37a Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 2 Sep 2025 23:02:42 +0200
Subject: [PATCH 06/26] automate docstring updates for stubfiles

---
 dev/update_stub_docstrings.py | 118 ++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 dev/update_stub_docstrings.py

diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py
new file mode 100644
index 00000000000..72db8b0d000
--- /dev/null
+++ b/dev/update_stub_docstrings.py
@@ -0,0 +1,118 @@
+# Utility to extract docstrings from pyarrow and update
+# docstrings in stubfiles.
+#
+# Usage
+# =====
+#
+# python ../dev/update_stub_docstrings.py -s ./pyarrow/compute.pyi
+
+
+import os
+from pathlib import Path
+from textwrap import indent
+
+import click
+import griffe
+import libcst as cst
+
+docstrings_map = {}
+
+
+def extract_docstrings(pckg, path=""):
+    if "filepath" in pckg and pckg["filepath"].endswith(".pyi"):
+        return
+    if "docstring" in pckg:
+        docstrings_map[path] = pckg["docstring"].value
+
+    for name, pckg in pckg.get("members", {}).items():
+        extract_docstrings(pckg, path=f"{path}.{name}")
+
+
+def _is_docstring_node(node):
+    """Checks if a node is a docstring."""
+    return (
+        isinstance(node, cst.SimpleStatementLine) and
+        isinstance(node.body[0], cst.Expr) and
+        isinstance(node.body[0].value, cst.SimpleString)
+    )
+
+
+class ClonedSignatureDocstringTransformer(cst.CSTTransformer):
+    def __init__(self, docstrings_map, module_name):
+        self.docstrings_map = docstrings_map
+        self.module_name = module_name
+        self.name_of_function = None
+
+    def leave_Assign(self, original_node, updated_node):
+        target = original_node.targets[0].target
+        value = original_node.value
+
+        if isinstance(target, cst.Name) and isinstance(value, cst.Call) and \
+            value.func.value == "_clone_signature":
+            self.name_of_function = f"{self.module_name}.{target.value}"
+        return updated_node
+
+    def leave_SimpleStatementLine(self, original_node, updated_node):
+        if self.name_of_function:
+            if len(updated_node.body) > 0 and _is_docstring_node(updated_node):
+                comment_content = self.docstrings_map[self.name_of_function].strip()
+                self.name_of_function = None
+
+                new_string_node = cst.SimpleString(value=f'"""\n{comment_content}\n"""')
+                new_expr_node = updated_node.body[0].with_changes(value=new_string_node)
+                new_body = [new_expr_node] + list(updated_node.body[1:])
+                updated_node = updated_node.with_changes(body=new_body)
+
+        return updated_node
+
+
+class FunctionDocstringTransformer(cst.CSTTransformer):
+    def __init__(self, docstrings_map, module_name):
+        self.docstrings_map = docstrings_map
+        self.module_name = module_name
+
+    def leave_FunctionDef(self, original_node, updated_node):
+        full_name = f"{self.module_name}.{original_node.name.value}"
+
+        # Check if we have a docstring for this function
+        if full_name in self.docstrings_map:
+            # Check if the function already has a docstring
+            body_list = list(updated_node.body.body)
+            has_docstring = len(body_list) > 0 and _is_docstring_node(body_list[0])
+
+            if has_docstring:
+                # Replace existing docstring
+                docstring = indent(self.docstrings_map[full_name], "    ").strip()
+                docstring_value = f'"""\n    {docstring}\n    """'
+                new_docstring_node = cst.SimpleStatementLine(
+                    body=[cst.Expr(value=cst.SimpleString(value=docstring_value))]
+                )
+                new_body = [new_docstring_node] + body_list[1:]
+                return updated_node.with_changes(
+                    body=updated_node.body.with_changes(body=new_body)
+                )
+
+        return updated_node
+
+@click.command()
+@click.option('--stub_file', '-s', type=click.Path(resolve_path=True))
+def update_stub_file(stub_file):
+    package = griffe.load("pyarrow", try_relative_path=False, force_inspection=True, resolve_aliases=True)
+    extract_docstrings(package.as_dict(), "pyarrow")
+
+    with open(stub_file, 'r') as f:
+        tree = cst.parse_module(f.read())
+
+    cloned_signature_transformer = ClonedSignatureDocstringTransformer(docstrings_map, "pyarrow.compute")
+    function_docstring_transformer = FunctionDocstringTransformer(docstrings_map, "pyarrow.compute")
+
+    modified_tree = tree.visit(function_docstring_transformer)
+    modified_tree = modified_tree.visit(cloned_signature_transformer)
+
+
+    # Write the modified code
+    with open(stub_file, "w") as f:
+        f.write(modified_tree.code)
+
+if __name__ == "__main__":
+    update_stub_file(obj={})

From 30d0ca02e0d3c4af21065e2b78db9ef6cea9afc4 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 12 Sep 2025 15:30:33 +0200
Subject: [PATCH 07/26] Remove some stubs

---
 python/pyarrow/__lib_pxi/benchmark.pyi   |   18 -
 python/pyarrow/__lib_pxi/builder.pyi     |  106 -
 python/pyarrow/__lib_pxi/compat.pyi      |   22 -
 python/pyarrow/__lib_pxi/config.pyi      |   58 -
 python/pyarrow/__lib_pxi/device.pyi      |  105 -
 python/pyarrow/__lib_pxi/error.pyi       |   70 -
 python/pyarrow/__lib_pxi/ipc.pyi         |  722 --
 python/pyarrow/__lib_pxi/pandas_shim.pyi |   68 -
 python/pyarrow/__lib_pxi/table.pyi       | 5640 ---------------
 python/pyarrow/_azurefs.pyi              |   91 -
 python/pyarrow/_compute.pyi              | 1768 -----
 python/pyarrow/_csv.pyi                  |  658 --
 python/pyarrow/_cuda.pyi                 |  573 --
 python/pyarrow/_dataset.pyi              | 2318 ------
 python/pyarrow/_dataset_orc.pyi          |   23 -
 python/pyarrow/_dataset_parquet.pyi      |  331 -
 python/pyarrow/_flight.pyi               | 1397 ----
 python/pyarrow/_fs.pyi                   | 1022 ---
 python/pyarrow/_hdfs.pyi                 |   92 -
 python/pyarrow/_json.pyi                 |  186 -
 python/pyarrow/_orc.pyi                  |   73 -
 python/pyarrow/_parquet.pyi              |  462 --
 python/pyarrow/_s3fs.pyi                 |   91 -
 python/pyarrow/_substrait.pyi            |   56 -
 python/pyarrow/acero.pyi                 |  102 -
 python/pyarrow/benchmark.pyi             |   20 -
 python/pyarrow/cffi.pyi                  |   21 -
 python/pyarrow/compute.pyi               | 8332 ----------------------
 python/pyarrow/cuda.py                   |   25 -
 python/pyarrow/dataset.pyi               |  246 -
 python/pyarrow/feather.pyi               |   67 -
 python/pyarrow/gandiva.pyi               |   82 -
 python/pyarrow/ipc.pyi                   |  140 -
 python/pyarrow/json.pyi                  |   20 -
 python/pyarrow/orc.pyi                   |  296 -
 python/pyarrow/pandas_compat.pyi         |   71 -
 python/pyarrow/substrait.pyi             |   38 -
 python/pyarrow/util.pyi                  |   44 -
 38 files changed, 25454 deletions(-)
 delete mode 100644 python/pyarrow/__lib_pxi/benchmark.pyi
 delete mode 100644 python/pyarrow/__lib_pxi/builder.pyi
 delete mode 100644 python/pyarrow/__lib_pxi/compat.pyi
 delete mode 100644 python/pyarrow/__lib_pxi/config.pyi
 delete mode 100644 python/pyarrow/__lib_pxi/device.pyi
 delete mode 100644 python/pyarrow/__lib_pxi/error.pyi
 delete mode 100644 python/pyarrow/__lib_pxi/ipc.pyi
 delete mode 100644 python/pyarrow/__lib_pxi/pandas_shim.pyi
 delete mode 100644 python/pyarrow/__lib_pxi/table.pyi
 delete mode 100644 python/pyarrow/_azurefs.pyi
 delete mode 100644 python/pyarrow/_compute.pyi
 delete mode 100644 python/pyarrow/_csv.pyi
 delete mode 100644 python/pyarrow/_cuda.pyi
 delete mode 100644 python/pyarrow/_dataset.pyi
 delete mode 100644 python/pyarrow/_dataset_orc.pyi
 delete mode 100644 python/pyarrow/_dataset_parquet.pyi
 delete mode 100644 python/pyarrow/_flight.pyi
 delete mode 100644 python/pyarrow/_fs.pyi
 delete mode 100644 python/pyarrow/_hdfs.pyi
 delete mode 100644 python/pyarrow/_json.pyi
 delete mode 100644 python/pyarrow/_orc.pyi
 delete mode 100644 python/pyarrow/_parquet.pyi
 delete mode 100644 python/pyarrow/_s3fs.pyi
 delete mode 100644 python/pyarrow/_substrait.pyi
 delete mode 100644 python/pyarrow/acero.pyi
 delete mode 100644 python/pyarrow/benchmark.pyi
 delete mode 100644 python/pyarrow/cffi.pyi
 delete mode 100644 python/pyarrow/compute.pyi
 delete mode 100644 python/pyarrow/cuda.py
 delete mode 100644 python/pyarrow/dataset.pyi
 delete mode 100644 python/pyarrow/feather.pyi
 delete mode 100644 python/pyarrow/gandiva.pyi
 delete mode 100644 python/pyarrow/ipc.pyi
 delete mode 100644 python/pyarrow/json.pyi
 delete mode 100644 python/pyarrow/orc.pyi
 delete mode 100644 python/pyarrow/pandas_compat.pyi
 delete mode 100644 python/pyarrow/substrait.pyi
 delete mode 100644 python/pyarrow/util.pyi

diff --git a/python/pyarrow/__lib_pxi/benchmark.pyi b/python/pyarrow/__lib_pxi/benchmark.pyi
deleted file mode 100644
index 592561636af..00000000000
--- a/python/pyarrow/__lib_pxi/benchmark.pyi
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-def benchmark_PandasObjectIsNull(list) -> None: ...  # noqa: N802
diff --git a/python/pyarrow/__lib_pxi/builder.pyi b/python/pyarrow/__lib_pxi/builder.pyi
deleted file mode 100644
index 39372f8e512..00000000000
--- a/python/pyarrow/__lib_pxi/builder.pyi
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Iterable
-
-from pyarrow.lib import MemoryPool, _Weakrefable
-
-from .array import StringArray, StringViewArray
-
-class StringBuilder(_Weakrefable):
-    """
-    Builder class for UTF8 strings.
-
-    This class exposes facilities for incrementally adding string values and
-    building the null bitmap for a pyarrow.Array (type='string').
-    """
-    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
-    def append(self, value: str | bytes | None):
-        """
-        Append a single value to the builder.
-
-        The value can either be a string/bytes object or a null value
-        (np.nan or None).
-
-        Parameters
-        ----------
-        value : string/bytes or np.nan/None
-            The value to append to the string array builder.
-        """
-    def append_values(self, values: Iterable[str | bytes | None]):
-        """
-        Append all the values from an iterable.
-
-        Parameters
-        ----------
-        values : iterable of string/bytes or np.nan/None values
-            The values to append to the string array builder.
-        """
-    def finish(self) -> StringArray:
-        """
-        Return result of builder as an Array object; also resets the builder.
-
-        Returns
-        -------
-        array : pyarrow.Array
-        """
-    @property
-    def null_count(self) -> int: ...
-    def __len__(self) -> int: ...
-
-class StringViewBuilder(_Weakrefable):
-    """
-    Builder class for UTF8 string views.
-
-    This class exposes facilities for incrementally adding string values and
-    building the null bitmap for a pyarrow.Array (type='string_view').
-    """
-    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
-    def append(self, value: str | bytes | None):
-        """
-        Append a single value to the builder.
-
-        The value can either be a string/bytes object or a null value
-        (np.nan or None).
-
-        Parameters
-        ----------
-        value : string/bytes or np.nan/None
-            The value to append to the string array builder.
-        """
-    def append_values(self, values: Iterable[str | bytes | None]):
-        """
-        Append all the values from an iterable.
-
-        Parameters
-        ----------
-        values : iterable of string/bytes or np.nan/None values
-            The values to append to the string array builder.
-        """
-    def finish(self) -> StringViewArray:
-        """
-        Return result of builder as an Array object; also resets the builder.
-
-        Returns
-        -------
-        array : pyarrow.Array
-        """
-    @property
-    def null_count(self) -> int: ...
-    def __len__(self) -> int: ...
-
-__all__ = ["StringBuilder", "StringViewBuilder"]
diff --git a/python/pyarrow/__lib_pxi/compat.pyi b/python/pyarrow/__lib_pxi/compat.pyi
deleted file mode 100644
index 2ea013555c0..00000000000
--- a/python/pyarrow/__lib_pxi/compat.pyi
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-def encode_file_path(path: str | bytes) -> bytes: ...
-def tobytes(o: str | bytes) -> bytes: ...
-def frombytes(o: bytes, *, safe: bool = False): ...
-
-__all__ = ["encode_file_path", "tobytes", "frombytes"]
diff --git a/python/pyarrow/__lib_pxi/config.pyi b/python/pyarrow/__lib_pxi/config.pyi
deleted file mode 100644
index 7c2eb8a9c98..00000000000
--- a/python/pyarrow/__lib_pxi/config.pyi
+++ /dev/null
@@ -1,58 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import NamedTuple
-
-class VersionInfo(NamedTuple):
-    major: int
-    minor: int
-    patch: int
-
-class BuildInfo(NamedTuple):
-    version: str
-    version_info: VersionInfo
-    so_version: str
-    full_so_version: str
-    compiler_id: str
-    compiler_version: str
-    compiler_flags: str
-    git_id: str
-    git_description: str
-    package_kind: str
-    build_type: str
-
-class RuntimeInfo(NamedTuple):
-    simd_level: str
-    detected_simd_level: str
-
-cpp_build_info: BuildInfo
-cpp_version: str
-cpp_version_info: VersionInfo
-
-def runtime_info() -> RuntimeInfo: ...
-def set_timezone_db_path(path: str) -> None: ...
-
-__all__ = [
-    "VersionInfo",
-    "BuildInfo",
-    "RuntimeInfo",
-    "cpp_build_info",
-    "cpp_version",
-    "cpp_version_info",
-    "runtime_info",
-    "set_timezone_db_path",
-]
diff --git a/python/pyarrow/__lib_pxi/device.pyi b/python/pyarrow/__lib_pxi/device.pyi
deleted file mode 100644
index 6c4f1fdeeea..00000000000
--- a/python/pyarrow/__lib_pxi/device.pyi
+++ /dev/null
@@ -1,105 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import enum
-
-from pyarrow.lib import _Weakrefable
-
-class DeviceAllocationType(enum.Flag):
-    CPU = enum.auto()
-    CUDA = enum.auto()
-    CUDA_HOST = enum.auto()
-    OPENCL = enum.auto()
-    VULKAN = enum.auto()
-    METAL = enum.auto()
-    VPI = enum.auto()
-    ROCM = enum.auto()
-    ROCM_HOST = enum.auto()
-    EXT_DEV = enum.auto()
-    CUDA_MANAGED = enum.auto()
-    ONEAPI = enum.auto()
-    WEBGPU = enum.auto()
-    HEXAGON = enum.auto()
-
-class Device(_Weakrefable):
-    """
-    Abstract interface for hardware devices
-
-    This object represents a device with access to some memory spaces.
-    When handling a Buffer or raw memory address, it allows deciding in which
-    context the raw memory address should be interpreted
-    (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
-    """
-
-    @property
-    def type_name(self) -> str:
-        """
-        A shorthand for this device's type.
-        """
-    @property
-    def device_id(self) -> int:
-        """
-        A device ID to identify this device if there are multiple of this type.
-
-        If there is no "device_id" equivalent (such as for the main CPU device on
-        non-numa systems) returns -1.
-        """
-    @property
-    def is_cpu(self) -> bool:
-        """
-        Whether this device is the main CPU device.
-
-        This shorthand method is very useful when deciding whether a memory address
-        is CPU-accessible.
-        """
-    @property
-    def device_type(self) -> DeviceAllocationType:
-        """
-        Return the DeviceAllocationType of this device.
-        """
-
-class MemoryManager(_Weakrefable):
-    """
-    An object that provides memory management primitives.
-
-    A MemoryManager is always tied to a particular Device instance.
-    It can also have additional parameters (such as a MemoryPool to
-    allocate CPU memory).
-
-    """
-    @property
-    def device(self) -> Device:
-        """
-        The device this MemoryManager is tied to.
-        """
-    @property
-    def is_cpu(self) -> bool:
-        """
-        Whether this MemoryManager is tied to the main CPU device.
-
-        This shorthand method is very useful when deciding whether a memory
-        address is CPU-accessible.
-        """
-
-def default_cpu_memory_manager() -> MemoryManager:
-    """
-    Return the default CPU MemoryManager instance.
-
-    The returned singleton instance uses the default MemoryPool.
-    """
-
-__all__ = ["DeviceAllocationType", "Device", "MemoryManager", "default_cpu_memory_manager"]
diff --git a/python/pyarrow/__lib_pxi/error.pyi b/python/pyarrow/__lib_pxi/error.pyi
deleted file mode 100644
index c1e1a04ee40..00000000000
--- a/python/pyarrow/__lib_pxi/error.pyi
+++ /dev/null
@@ -1,70 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-
-class ArrowException(Exception): ...
-class ArrowInvalid(ValueError, ArrowException): ...
-class ArrowMemoryError(MemoryError, ArrowException): ...
-class ArrowKeyError(KeyError, ArrowException): ...
-class ArrowTypeError(TypeError, ArrowException): ...
-class ArrowNotImplementedError(NotImplementedError, ArrowException): ...
-class ArrowCapacityError(ArrowException): ...
-class ArrowIndexError(IndexError, ArrowException): ...
-class ArrowSerializationError(ArrowException): ...
-
-class ArrowCancelled(ArrowException):
-    signum: int | None
-    def __init__(self, message: str, signum: int | None = None) -> None: ...
-
-ArrowIOError = IOError
-
-class StopToken: ...
-
-def enable_signal_handlers(enable: bool) -> None: ...
-
-have_signal_refcycle: bool
-
-class SignalStopHandler:
-    def __enter__(self) -> Self: ...
-    def __exit__(self, exc_type, exc_value, exc_tb) -> None: ...
-    def __dealloc__(self) -> None: ...
-    @property
-    def stop_token(self) -> StopToken: ...
-
-__all__ = [
-    "ArrowException",
-    "ArrowInvalid",
-    "ArrowMemoryError",
-    "ArrowKeyError",
-    "ArrowTypeError",
-    "ArrowNotImplementedError",
-    "ArrowCapacityError",
-    "ArrowIndexError",
-    "ArrowSerializationError",
-    "ArrowCancelled",
-    "ArrowIOError",
-    "StopToken",
-    "enable_signal_handlers",
-    "have_signal_refcycle",
-    "SignalStopHandler",
-]
diff --git a/python/pyarrow/__lib_pxi/ipc.pyi b/python/pyarrow/__lib_pxi/ipc.pyi
deleted file mode 100644
index 819326443cf..00000000000
--- a/python/pyarrow/__lib_pxi/ipc.pyi
+++ /dev/null
@@ -1,722 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import enum
-import sys
-
-from io import IOBase
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-from typing import Iterable, Iterator, Literal, Mapping, NamedTuple
-
-import pandas as pd
-
-from pyarrow._stubs_typing import SupportArrowStream, SupportPyBuffer
-from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable
-
-from .io import Buffer, Codec, NativeFile
-from .types import DictionaryMemo, KeyValueMetadata
-
-class MetadataVersion(enum.IntEnum):
-    V1 = enum.auto()
-    V2 = enum.auto()
-    V3 = enum.auto()
-    V4 = enum.auto()
-    V5 = enum.auto()
-
-class WriteStats(NamedTuple):
-    """IPC write statistics
-
-    Parameters
-    ----------
-    num_messages : int
-        Number of messages.
-    num_record_batches : int
-        Number of record batches.
-    num_dictionary_batches : int
-        Number of dictionary batches.
-    num_dictionary_deltas : int
-        Delta of dictionaries.
-    num_replaced_dictionaries : int
-        Number of replaced dictionaries.
-    """
-
-    num_messages: int
-    num_record_batches: int
-    num_dictionary_batches: int
-    num_dictionary_deltas: int
-    num_replaced_dictionaries: int
-
-class ReadStats(NamedTuple):
-    """IPC read statistics
-
-    Parameters
-    ----------
-    num_messages : int
-        Number of messages.
-    num_record_batches : int
-        Number of record batches.
-    num_dictionary_batches : int
-        Number of dictionary batches.
-    num_dictionary_deltas : int
-        Delta of dictionaries.
-    num_replaced_dictionaries : int
-        Number of replaced dictionaries.
-    """
-
-    num_messages: int
-    num_record_batches: int
-    num_dictionary_batches: int
-    num_dictionary_deltas: int
-    num_replaced_dictionaries: int
-
-class IpcReadOptions(_Weakrefable):
-    """
-    Serialization options for reading IPC format.
-
-    Parameters
-    ----------
-    ensure_native_endian : bool, default True
-        Whether to convert incoming data to platform-native endianness.
-    use_threads : bool
-        Whether to use the global CPU thread pool to parallelize any
-        computational tasks like decompression
-    included_fields : list
-        If empty (the default), return all deserialized fields.
-        If non-empty, the values are the indices of fields to read on
-        the top-level schema
-    """
-
-    ensure_native_endian: bool
-    use_threads: bool
-    included_fields: list[int]
-    def __init__(
-        self,
-        *,
-        ensure_native_endian: bool = True,
-        use_threads: bool = True,
-        included_fields: list[int] | None = None,
-    ) -> None: ...
-
-class IpcWriteOptions(_Weakrefable):
-    """
-    Serialization options for the IPC format.
-
-    Parameters
-    ----------
-    metadata_version : MetadataVersion, default MetadataVersion.V5
-        The metadata version to write.  V5 is the current and latest,
-        V4 is the pre-1.0 metadata version (with incompatible Union layout).
-    allow_64bit : bool, default False
-        If true, allow field lengths that don't fit in a signed 32-bit int.
-    use_legacy_format : bool, default False
-        Whether to use the pre-Arrow 0.15 IPC format.
-    compression : str, Codec, or None
-        compression codec to use for record batch buffers.
-        If None then batch buffers will be uncompressed.
-        Must be "lz4", "zstd" or None.
-        To specify a compression_level use `pyarrow.Codec`
-    use_threads : bool
-        Whether to use the global CPU thread pool to parallelize any
-        computational tasks like compression.
-    emit_dictionary_deltas : bool
-        Whether to emit dictionary deltas.  Default is false for maximum
-        stream compatibility.
-    unify_dictionaries : bool
-        If true then calls to write_table will attempt to unify dictionaries
-        across all batches in the table.  This can help avoid the need for
-        replacement dictionaries (which the file format does not support)
-        but requires computing the unified dictionary and then remapping
-        the indices arrays.
-
-        This parameter is ignored when writing to the IPC stream format as
-        the IPC stream format can support replacement dictionaries.
-    """
-
-    metadata_version: MetadataVersion
-    allow_64bit: bool
-    use_legacy_format: bool
-    compression: Codec | Literal["lz4", "zstd"] | None
-    use_threads: bool
-    emit_dictionary_deltas: bool
-    unify_dictionaries: bool
-    def __init__(
-        self,
-        *,
-        metadata_version: MetadataVersion = MetadataVersion.V5,
-        allow_64bit: bool = False,
-        use_legacy_format: bool = False,
-        compression: Codec | Literal["lz4", "zstd"] | None = None,
-        use_threads: bool = True,
-        emit_dictionary_deltas: bool = False,
-        unify_dictionaries: bool = False,
-    ) -> None: ...
-
-class Message(_Weakrefable):
-    """
-    Container for an Arrow IPC message with metadata and optional body
-    """
-
-    @property
-    def type(self) -> str: ...
-    @property
-    def metadata(self) -> Buffer: ...
-    @property
-    def metadata_version(self) -> MetadataVersion: ...
-    @property
-    def body(self) -> Buffer | None: ...
-    def equals(self, other: Message) -> bool: ...
-    def serialize_to(
-        self, sink: NativeFile, alignment: int = 8, memory_pool: MemoryPool | None = None
-    ):
-        """
-        Write message to generic OutputStream
-
-        Parameters
-        ----------
-        sink : NativeFile
-        alignment : int, default 8
-            Byte alignment for metadata and body
-        memory_pool : MemoryPool, default None
-            Uses default memory pool if not specified
-        """
-    def serialize(self, alignment: int = 8, memory_pool: MemoryPool | None = None) -> Buffer:
-        """
-        Write message as encapsulated IPC message
-
-        Parameters
-        ----------
-        alignment : int, default 8
-            Byte alignment for metadata and body
-        memory_pool : MemoryPool, default None
-            Uses default memory pool if not specified
-
-        Returns
-        -------
-        serialized : Buffer
-        """
-
-class MessageReader(_Weakrefable):
-    """
-    Interface for reading Message objects from some source (like an
-    InputStream)
-    """
-    @classmethod
-    def open_stream(cls, source: bytes | NativeFile | IOBase | SupportPyBuffer) -> Self:
-        """
-        Open stream from source, if you want to use memory map use
-        MemoryMappedFile as source.
-
-        Parameters
-        ----------
-        source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
-            A readable source, like an InputStream
-        """
-    def __iter__(self) -> Self: ...
-    def read_next_message(self) -> Message:
-        """
-        Read next Message from the stream.
-
-        Raises
-        ------
-        StopIteration
-            At end of stream
-        """
-    __next__ = read_next_message
-
-# ----------------------------------------------------------------------
-# File and stream readers and writers
-
-class _CRecordBatchWriter(_Weakrefable):
-    """The base RecordBatchWriter wrapper.
-
-    Provides common implementations of convenience methods. Should not
-    be instantiated directly by user code.
-    """
-    def write(self, table_or_batch: Table | RecordBatch):
-        """
-        Write RecordBatch or Table to stream.
-
-        Parameters
-        ----------
-        table_or_batch : {RecordBatch, Table}
-        """
-    def write_batch(
-        self,
-        batch: RecordBatch,
-        custom_metadata: Mapping[bytes, bytes] | KeyValueMetadata | None = None,
-    ):
-        """
-        Write RecordBatch to stream.
-
-        Parameters
-        ----------
-        batch : RecordBatch
-        custom_metadata : mapping or KeyValueMetadata
-            Keys and values must be string-like / coercible to bytes
-        """
-    def write_table(self, table: Table, max_chunksize: int | None = None) -> None:
-        """
-        Write Table to stream in (contiguous) RecordBatch objects.
-
-        Parameters
-        ----------
-        table : Table
-        max_chunksize : int, default None
-            Maximum number of rows for RecordBatch chunks. Individual chunks may
-            be smaller depending on the chunk layout of individual columns.
-        """
-    def close(self) -> None:
-        """
-        Close stream and write end-of-stream 0 marker.
-        """
-    def __enter__(self) -> Self: ...
-    def __exit__(self, exc_type, exc_val, exc_tb): ...
-    @property
-    def stats(self) -> WriteStats:
-        """
-        Current IPC write statistics.
-        """
-
-class _RecordBatchStreamWriter(_CRecordBatchWriter):
-    def __dealloc__(self) -> None: ...
-    def _open(self, sink, schema: Schema, options: IpcWriteOptions = IpcWriteOptions()): ...
-
-class _ReadPandasMixin:
-    def read_pandas(self, **options) -> pd.DataFrame:
-        """
-        Read contents of stream to a pandas.DataFrame.
-
-        Read all record batches as a pyarrow.Table then convert it to a
-        pandas.DataFrame using Table.to_pandas.
-
-        Parameters
-        ----------
-        **options
-            Arguments to forward to :meth:`Table.to_pandas`.
-
-        Returns
-        -------
-        df : pandas.DataFrame
-        """
-
-class RecordBatchReader(_Weakrefable):
-    """Base class for reading stream of record batches.
-
-    Record batch readers function as iterators of record batches that also
-    provide the schema (without the need to get any batches).
-
-    Warnings
-    --------
-    Do not call this class's constructor directly, use one of the
-    ``RecordBatchReader.from_*`` functions instead.
-
-    Notes
-    -----
-    To import and export using the Arrow C stream interface, use the
-    ``_import_from_c`` and ``_export_to_c`` methods. However, keep in mind this
-    interface is intended for expert users.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> schema = pa.schema([("x", pa.int64())])
-    >>> def iter_record_batches():
-    ...     for i in range(2):
-    ...         yield pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], schema=schema)
-    >>> reader = pa.RecordBatchReader.from_batches(schema, iter_record_batches())
-    >>> print(reader.schema)
-    x: int64
-    >>> for batch in reader:
-    ...     print(batch)
-    pyarrow.RecordBatch
-    x: int64
-    ----
-    x: [1,2,3]
-    pyarrow.RecordBatch
-    x: int64
-    ----
-    x: [1,2,3]
-    """
-
-    def __iter__(self) -> Self: ...
-    def read_next_batch(self) -> RecordBatch:
-        """
-        Read next RecordBatch from the stream.
-
-        Raises
-        ------
-        StopIteration:
-            At end of stream.
-
-        Returns
-        -------
-        RecordBatch
-        """
-    __next__ = read_next_batch
-    @property
-    def schema(self) -> Schema:
-        """
-        Shared schema of the record batches in the stream.
-
-        Returns
-        -------
-        Schema
-        """
-    def read_next_batch_with_custom_metadata(self) -> RecordBatchWithMetadata:
-        """
-        Read next RecordBatch from the stream along with its custom metadata.
-
-        Raises
-        ------
-        StopIteration:
-            At end of stream.
-
-        Returns
-        -------
-        batch : RecordBatch
-        custom_metadata : KeyValueMetadata
-        """
-    def iter_batches_with_custom_metadata(
-        self,
-    ) -> Iterator[RecordBatchWithMetadata]:
-        """
-        Iterate over record batches from the stream along with their custom
-        metadata.
-
-        Yields
-        ------
-        RecordBatchWithMetadata
-        """
-    def read_all(self) -> Table:
-        """
-        Read all record batches as a pyarrow.Table.
-
-        Returns
-        -------
-        Table
-        """
-    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
-    def close(self) -> None:
-        """
-        Release any resources associated with the reader.
-        """
-    def __enter__(self) -> Self: ...
-    def __exit__(self, exc_type, exc_val, exc_tb): ...
-    def cast(self, target_schema: Schema) -> Self:
-        """
-        Wrap this reader with one that casts each batch lazily as it is pulled.
-        Currently only a safe cast to target_schema is implemented.
-
-        Parameters
-        ----------
-        target_schema : Schema
-            Schema to cast to, the names and order of fields must match.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
-    def _export_to_c(self, out_ptr: int) -> None:
-        """
-        Export to a C ArrowArrayStream struct, given its pointer.
-
-        Parameters
-        ----------
-        out_ptr: int
-            The raw pointer to a C ArrowArrayStream struct.
-
-        Be careful: if you don't pass the ArrowArrayStream struct to a
-        consumer, array memory will leak.  This is a low-level function
-        intended for expert users.
-        """
-    @classmethod
-    def _import_from_c(cls, in_ptr: int) -> Self:
-        """
-        Import RecordBatchReader from a C ArrowArrayStream struct,
-        given its pointer.
-
-        Parameters
-        ----------
-        in_ptr: int
-            The raw pointer to a C ArrowArrayStream struct.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_stream__(self, requested_schema=None):
-        """
-        Export to a C ArrowArrayStream PyCapsule.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule, default None
-            The schema to which the stream should be casted, passed as a
-            PyCapsule containing a C ArrowSchema representation of the
-            requested schema.
-
-        Returns
-        -------
-        PyCapsule
-            A capsule containing a C ArrowArrayStream struct.
-        """
-    @classmethod
-    def _import_from_c_capsule(cls, stream) -> Self:
-        """
-        Import RecordBatchReader from a C ArrowArrayStream PyCapsule.
-
-        Parameters
-        ----------
-        stream: PyCapsule
-            A capsule containing a C ArrowArrayStream PyCapsule.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
-    @classmethod
-    def from_stream(cls, data: SupportArrowStream, schema: Schema | None = None) -> Self:
-        """
-        Create RecordBatchReader from a Arrow-compatible stream object.
-
-        This accepts objects implementing the Arrow PyCapsule Protocol for
-        streams, i.e. objects that have a ``__arrow_c_stream__`` method.
-
-        Parameters
-        ----------
-        data : Arrow-compatible stream object
-            Any object that implements the Arrow PyCapsule Protocol for
-            streams.
-        schema : Schema, default None
-            The schema to which the stream should be casted, if supported
-            by the stream object.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
-    @classmethod
-    def from_batches(cls, schema: Schema, batches: Iterable[RecordBatch]) -> Self:
-        """
-        Create RecordBatchReader from an iterable of batches.
-
-        Parameters
-        ----------
-        schema : Schema
-            The shared schema of the record batches
-        batches : Iterable[RecordBatch]
-            The batches that this reader will return.
-
-        Returns
-        -------
-        reader : RecordBatchReader
-        """
-
-class _RecordBatchStreamReader(RecordBatchReader):
-    @property
-    def stats(self) -> ReadStats:
-        """
-        Current IPC read statistics.
-        """
-
-class _RecordBatchFileWriter(_RecordBatchStreamWriter): ...
-
-class RecordBatchWithMetadata(NamedTuple):
-    """RecordBatch with its custom metadata
-
-    Parameters
-    ----------
-    batch : RecordBatch
-    custom_metadata : KeyValueMetadata
-    """
-
-    batch: RecordBatch
-    custom_metadata: KeyValueMetadata
-
-class _RecordBatchFileReader(_Weakrefable):
-    @property
-    def num_record_batches(self) -> int:
-        """
-        The number of record batches in the IPC file.
-        """
-    def get_batch(self, i: int) -> RecordBatch:
-        """
-        Read the record batch with the given index.
-
-        Parameters
-        ----------
-        i : int
-            The index of the record batch in the IPC file.
-
-        Returns
-        -------
-        batch : RecordBatch
-        """
-    get_record_batch = get_batch
-    def get_batch_with_custom_metadata(self, i: int) -> RecordBatchWithMetadata:
-        """
-        Read the record batch with the given index along with
-        its custom metadata
-
-        Parameters
-        ----------
-        i : int
-            The index of the record batch in the IPC file.
-
-        Returns
-        -------
-        batch : RecordBatch
-        custom_metadata : KeyValueMetadata
-        """
-    def read_all(self) -> Table:
-        """
-        Read all record batches as a pyarrow.Table
-        """
-    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
-    def __enter__(self) -> Self: ...
-    def __exit__(self, exc_type, exc_val, exc_tb): ...
-    @property
-    def schema(self) -> Schema: ...
-    @property
-    def stats(self) -> ReadStats: ...
-
-def get_tensor_size(tensor: Tensor) -> int:
-    """
-    Return total size of serialized Tensor including metadata and padding.
-
-    Parameters
-    ----------
-    tensor : Tensor
-        The tensor for which we want to known the size.
-    """
-
-def get_record_batch_size(batch: RecordBatch) -> int:
-    """
-    Return total size of serialized RecordBatch including metadata and padding.
-
-    Parameters
-    ----------
-    batch : RecordBatch
-        The recordbatch for which we want to know the size.
-    """
-
-def write_tensor(tensor: Tensor, dest: NativeFile) -> int:
-    """
-    Write pyarrow.Tensor to pyarrow.NativeFile object its current position.
-
-    Parameters
-    ----------
-    tensor : pyarrow.Tensor
-    dest : pyarrow.NativeFile
-
-    Returns
-    -------
-    bytes_written : int
-        Total number of bytes written to the file
-    """
-
-def read_tensor(source: NativeFile) -> Tensor:
-    """Read pyarrow.Tensor from pyarrow.NativeFile object from current
-    position. If the file source supports zero copy (e.g. a memory map), then
-    this operation does not allocate any memory. This function not assume that
-    the stream is aligned
-
-    Parameters
-    ----------
-    source : pyarrow.NativeFile
-
-    Returns
-    -------
-    tensor : Tensor
-
-    """
-
-def read_message(source: NativeFile | IOBase | SupportPyBuffer) -> Message:
-    """
-    Read length-prefixed message from file or buffer-like object
-
-    Parameters
-    ----------
-    source : pyarrow.NativeFile, file-like object, or buffer-like object
-
-    Returns
-    -------
-    message : Message
-    """
-
-def read_schema(obj: Buffer | Message, dictionary_memo: DictionaryMemo | None = None) -> Schema:
-    """
-    Read Schema from message or buffer
-
-    Parameters
-    ----------
-    obj : buffer or Message
-    dictionary_memo : DictionaryMemo, optional
-        Needed to be able to reconstruct dictionary-encoded fields
-        with read_record_batch
-
-    Returns
-    -------
-    schema : Schema
-    """
-
-def read_record_batch(
-    obj: Message | SupportPyBuffer, schema: Schema, dictionary_memo: DictionaryMemo | None = None
-) -> RecordBatch:
-    """
-    Read RecordBatch from message, given a known schema. If reading data from a
-    complete IPC stream, use ipc.open_stream instead
-
-    Parameters
-    ----------
-    obj : Message or Buffer-like
-    schema : Schema
-    dictionary_memo : DictionaryMemo, optional
-        If message contains dictionaries, must pass a populated
-        DictionaryMemo
-
-    Returns
-    -------
-    batch : RecordBatch
-    """
-
-__all__ = [
-    "MetadataVersion",
-    "WriteStats",
-    "ReadStats",
-    "IpcReadOptions",
-    "IpcWriteOptions",
-    "Message",
-    "MessageReader",
-    "_CRecordBatchWriter",
-    "_RecordBatchStreamWriter",
-    "_ReadPandasMixin",
-    "RecordBatchReader",
-    "_RecordBatchStreamReader",
-    "_RecordBatchFileWriter",
-    "RecordBatchWithMetadata",
-    "_RecordBatchFileReader",
-    "get_tensor_size",
-    "get_record_batch_size",
-    "write_tensor",
-    "read_tensor",
-    "read_message",
-    "read_schema",
-    "read_record_batch",
-]
diff --git a/python/pyarrow/__lib_pxi/pandas_shim.pyi b/python/pyarrow/__lib_pxi/pandas_shim.pyi
deleted file mode 100644
index ae8460cc2b3..00000000000
--- a/python/pyarrow/__lib_pxi/pandas_shim.pyi
+++ /dev/null
@@ -1,68 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from types import ModuleType
-from typing import Any, Iterable, TypeGuard
-
-import pandas as pd
-
-from numpy import dtype
-from pandas.core.dtypes.base import ExtensionDtype
-
-class _PandasAPIShim:
-    has_sparse: bool
-
-    def series(self, *args, **kwargs) -> pd.Series: ...
-    def data_frame(self, *args, **kwargs) -> pd.DataFrame: ...
-    @property
-    def have_pandas(self) -> bool: ...
-    @property
-    def compat(self) -> ModuleType: ...
-    @property
-    def pd(self) -> ModuleType: ...
-    def infer_dtype(self, obj: Iterable) -> str: ...
-    def pandas_dtype(self, dtype: str) -> dtype: ...
-    @property
-    def loose_version(self) -> Any: ...
-    @property
-    def version(self) -> str: ...
-    def is_v1(self) -> bool: ...
-    def is_ge_v21(self) -> bool: ...
-    def is_ge_v23(self) -> bool: ...
-    def is_ge_v3(self) -> bool: ...
-    @property
-    def categorical_type(self) -> type[pd.Categorical]: ...
-    @property
-    def datetimetz_type(self) -> type[pd.DatetimeTZDtype]: ...
-    @property
-    def extension_dtype(self) -> type[ExtensionDtype]: ...
-    def is_array_like(
-        self, obj: Any
-    ) -> TypeGuard[pd.Series | pd.Index | pd.Categorical | ExtensionDtype]: ...
-    def is_categorical(self, obj: Any) -> TypeGuard[pd.Categorical]: ...
-    def is_datetimetz(self, obj: Any) -> TypeGuard[pd.DatetimeTZDtype]: ...
-    def is_extension_array_dtype(self, obj: Any) -> TypeGuard[ExtensionDtype]: ...
-    def is_sparse(self, obj: Any) -> bool: ...
-    def is_data_frame(self, obj: Any) -> TypeGuard[pd.DataFrame]: ...
-    def is_series(self, obj: Any) -> TypeGuard[pd.Series]: ...
-    def is_index(self, obj: Any) -> TypeGuard[pd.Index]: ...
-    def get_values(self, obj: Any) -> bool: ...
-    def get_rangeindex_attribute(self, level, name): ...
-
-_pandas_api: _PandasAPIShim
-
-__all__ = ["_PandasAPIShim", "_pandas_api"]
diff --git a/python/pyarrow/__lib_pxi/table.pyi b/python/pyarrow/__lib_pxi/table.pyi
deleted file mode 100644
index 5ad66f9d06c..00000000000
--- a/python/pyarrow/__lib_pxi/table.pyi
+++ /dev/null
@@ -1,5640 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import datetime as dt
-import sys
-
-from decimal import Decimal
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-if sys.version_info >= (3, 10):
-    from typing import TypeAlias
-else:
-    from typing_extensions import TypeAlias
-from typing import (
-    Any,
-    Collection,
-    Generator,
-    Generic,
-    Iterable,
-    Iterator,
-    Literal,
-    Mapping,
-    Sequence,
-    TypeVar,
-    overload,
-)
-
-import numpy as np
-import pandas as pd
-
-from numpy.typing import NDArray
-from pyarrow._compute import (
-    CastOptions,
-    CountOptions,
-    FunctionOptions,
-    ScalarAggregateOptions,
-    TDigestOptions,
-    VarianceOptions,
-)
-from pyarrow._stubs_typing import (
-    Indices,
-    Mask,
-    NullEncoding,
-    NullSelectionBehavior,
-    Order,
-    SupportArrowArray,
-    SupportArrowDeviceArray,
-    SupportArrowStream,
-)
-from pyarrow.compute import ArrayOrChunkedArray, Expression
-from pyarrow.interchange.dataframe import _PyArrowDataFrame
-from pyarrow.lib import Device, MemoryManager, MemoryPool, MonthDayNano, Schema
-from pyarrow.lib import Field as _Field
-
-from . import array, scalar, types
-from .array import Array, StructArray, _CastAs, _PandasConvertible
-from .device import DeviceAllocationType
-from .io import Buffer
-from .ipc import RecordBatchReader
-from .scalar import Int64Scalar, Scalar, NullableCollection
-from .tensor import Tensor
-from .types import DataType, _AsPyType, _BasicDataType, _DataTypeT
-
-Field: TypeAlias = _Field[DataType]
-_ScalarT = TypeVar("_ScalarT", bound=Scalar)
-_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
-
-_Aggregation: TypeAlias = Literal[
-    "all",
-    "any",
-    "approximate_median",
-    "count",
-    "count_all",
-    "count_distinct",
-    "distinct",
-    "first",
-    "first_last",
-    "last",
-    "list",
-    "max",
-    "mean",
-    "min",
-    "min_max",
-    "one",
-    "product",
-    "stddev",
-    "sum",
-    "tdigest",
-    "variance",
-]
-_AggregationPrefixed: TypeAlias = Literal[
-    "hash_all",
-    "hash_any",
-    "hash_approximate_median",
-    "hash_count",
-    "hash_count_all",
-    "hash_count_distinct",
-    "hash_distinct",
-    "hash_first",
-    "hash_first_last",
-    "hash_last",
-    "hash_list",
-    "hash_max",
-    "hash_mean",
-    "hash_min",
-    "hash_min_max",
-    "hash_one",
-    "hash_product",
-    "hash_stddev",
-    "hash_sum",
-    "hash_tdigest",
-    "hash_variance",
-]
-Aggregation: TypeAlias = _Aggregation | _AggregationPrefixed
-AggregateOptions: TypeAlias = (
-    ScalarAggregateOptions | CountOptions | TDigestOptions | VarianceOptions | FunctionOptions
-)
-
-UnarySelector: TypeAlias = str
-NullarySelector: TypeAlias = tuple[()]
-NarySelector: TypeAlias = list[str] | tuple[str, ...]
-ColumnSelector: TypeAlias = UnarySelector | NullarySelector | NarySelector
-
-class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
-    """
-    An array-like composed from a (possibly empty) collection of pyarrow.Arrays
-
-    Warnings
-    --------
-    Do not call this class's constructor directly.
-
-    Examples
-    --------
-    To construct a ChunkedArray object use :func:`pyarrow.chunked_array`:
-
-    >>> import pyarrow as pa
-    >>> pa.chunked_array([], type=pa.int8())
-    <pyarrow.lib.ChunkedArray object at ...>
-    [
-    ...
-    ]
-
-    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-    <pyarrow.lib.ChunkedArray object at ...>
-    [
-      [
-        2,
-        2,
-        4
-      ],
-      [
-        4,
-        5,
-        100
-      ]
-    ]
-    >>> isinstance(pa.chunked_array([[2, 2, 4], [4, 5, 100]]), pa.ChunkedArray)
-    True
-    """
-
-    @property
-    def data(self) -> Self: ...
-    @property
-    def type(self: ChunkedArray[Scalar[_DataTypeT]]) -> _DataTypeT:
-        """
-        Return data type of a ChunkedArray.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.type
-        DataType(int64)
-        """
-    def length(self) -> int:
-        """
-        Return length of a ChunkedArray.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.length()
-        6
-        """
-    __len__ = length
-    def to_string(
-        self,
-        *,
-        indent: int = 0,
-        window: int = 5,
-        container_window: int = 2,
-        skip_new_lines: bool = False,
-    ) -> str:
-        """
-        Render a "pretty-printed" string representation of the ChunkedArray
-
-        Parameters
-        ----------
-        indent : int
-            How much to indent right the content of the array,
-            by default ``0``.
-        window : int
-            How many items to preview within each chunk at the begin and end
-            of the chunk when the chunk is bigger than the window.
-            The other elements will be ellipsed.
-        container_window : int
-            How many chunks to preview at the begin and end
-            of the array when the array is bigger than the window.
-            The other elements will be ellipsed.
-            This setting also applies to list columns.
-        skip_new_lines : bool
-            If the array should be rendered as a single line of text
-            or if each element should be on its own line.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.to_string(skip_new_lines=True)
-        '[[2,2,4],[4,5,100]]'
-        """
-    format = to_string
-    def validate(self, *, full: bool = False) -> None:
-        """
-        Perform validation checks.  An exception is raised if validation fails.
-
-        By default only cheap validation checks are run.  Pass `full=True`
-        for thorough validation checks (potentially O(n)).
-
-        Parameters
-        ----------
-        full : bool, default False
-            If True, run expensive checks, otherwise cheap checks only.
-
-        Raises
-        ------
-        ArrowInvalid
-        """
-    @property
-    def null_count(self) -> int:
-        """
-        Number of null entries
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.null_count
-        1
-        """
-    @property
-    def nbytes(self) -> int:
-        """
-        Total number of bytes consumed by the elements of the chunked array.
-
-        In other words, the sum of bytes from all buffer ranges referenced.
-
-        Unlike `get_total_buffer_size` this method will account for array
-        offsets.
-
-        If buffers are shared between arrays then the shared
-        portion will only be counted multiple times.
-
-        The dictionary of dictionary arrays will always be counted in their
-        entirety even if the array only references a portion of the dictionary.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.nbytes
-        49
-        """
-    def get_total_buffer_size(self) -> int:
-        """
-        The sum of bytes in each buffer referenced by the chunked array.
-
-        An array may only reference a portion of a buffer.
-        This method will overestimate in this case and return the
-        byte size of the entire buffer.
-
-        If a buffer is referenced multiple times then it will
-        only be counted once.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.get_total_buffer_size()
-        49
-        """
-    def __sizeof__(self) -> int: ...
-    @overload
-    def __getitem__(self, key: slice) -> Self: ...
-    @overload
-    def __getitem__(self, key: int) -> _Scalar_co: ...
-    def __getitem__(self, key):
-        """
-        Slice or return value at given index
-
-        Parameters
-        ----------
-        key : integer or slice
-            Slices with step not equal to 1 (or None) will produce a copy
-            rather than a zero-copy view
-
-        Returns
-        -------
-        value : Scalar (index) or ChunkedArray (slice)
-        """
-    def getitem(self, i: int) -> Scalar: ...
-    def is_null(self, *, nan_is_null: bool = False) -> ChunkedArray[scalar.BooleanScalar]:
-        """
-        Return boolean array indicating the null values.
-
-        Parameters
-        ----------
-        nan_is_null : bool (optional, default False)
-            Whether floating-point NaN values should also be considered null.
-
-        Returns
-        -------
-        array : boolean Array or ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.is_null()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            false,
-            false,
-            false,
-            false,
-            true,
-            false
-          ]
-        ]
-        """
-    def is_nan(self) -> ChunkedArray[scalar.BooleanScalar]:
-        """
-        Return boolean array indicating the NaN values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> arr = pa.chunked_array([[2, np.nan, 4], [4, None, 100]])
-        >>> arr.is_nan()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            false,
-            true,
-            false,
-            false,
-            null,
-            false
-          ]
-        ]
-        """
-    def is_valid(self) -> ChunkedArray[scalar.BooleanScalar]:
-        """
-        Return boolean array indicating the non-null values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.is_valid()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            true,
-            true,
-            true
-          ],
-          [
-            true,
-            false,
-            true
-          ]
-        ]
-        """
-    def fill_null(self, fill_value: Scalar[_DataTypeT] | _AsPyType | str | None) -> Self:
-        """
-        Replace each null element in values with fill_value.
-
-        See :func:`pyarrow.compute.fill_null` for full usage.
-
-        Parameters
-        ----------
-        fill_value : any
-            The replacement value for null entries.
-
-        Returns
-        -------
-        result : Array or ChunkedArray
-            A new array with nulls replaced by the given value.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> fill_value = pa.scalar(5, type=pa.int8())
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.fill_null(fill_value)
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4,
-            4,
-            5,
-            100
-          ]
-        ]
-        """
-    def equals(self, other: Self | Array[Any] | Iterable[Any]) -> bool:
-        """
-        Return whether the contents of two chunked arrays are equal.
-
-        Parameters
-        ----------
-        other : pyarrow.ChunkedArray
-            Chunked array to compare against.
-
-        Returns
-        -------
-        are_equal : bool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> animals = pa.chunked_array(
-        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
-        ... )
-        >>> n_legs.equals(n_legs)
-        True
-        >>> n_legs.equals(animals)
-        False
-        """
-    def to_numpy(self, zero_copy_only: bool = False) -> np.ndarray:
-        """
-        Return a NumPy copy of this array (experimental).
-
-        Parameters
-        ----------
-        zero_copy_only : bool, default False
-            Introduced for signature consistence with pyarrow.Array.to_numpy.
-            This must be False here since NumPy arrays' buffer must be contiguous.
-
-        Returns
-        -------
-        array : numpy.ndarray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.to_numpy()
-        array([  2,   2,   4,   4,   5, 100])
-        """
-    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
-    @overload
-    def cast(
-        self,
-        target_type: None = None,
-        safe: bool | None = None,
-        options: CastOptions | None = None,
-    ) -> Self: ...
-    @overload
-    def cast(
-        self, target_type: _CastAs, safe: bool | None = None, options: CastOptions | None = None
-    ) -> ChunkedArray[Scalar[_CastAs]]: ...
-    def cast(self, *args, **kwargs):
-        """
-        Cast array values to another data type
-
-        See :func:`pyarrow.compute.cast` for usage.
-
-        Parameters
-        ----------
-        target_type : DataType, None
-            Type to cast array to.
-        safe : boolean, default True
-            Whether to check for conversion errors such as overflow.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-
-        Returns
-        -------
-        cast : Array or ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.type
-        DataType(int64)
-
-        Change the data type of an array:
-
-        >>> n_legs_seconds = n_legs.cast(pa.duration("s"))
-        >>> n_legs_seconds.type
-        DurationType(duration[s])
-        """
-    def dictionary_encode(self, null_encoding: NullEncoding = "mask") -> Self:
-        """
-        Compute dictionary-encoded representation of array.
-
-        See :func:`pyarrow.compute.dictionary_encode` for full usage.
-
-        Parameters
-        ----------
-        null_encoding : str, default "mask"
-            How to handle null entries.
-
-        Returns
-        -------
-        encoded : ChunkedArray
-            A dictionary-encoded version of this array.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> animals = pa.chunked_array(
-        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
-        ... )
-        >>> animals.dictionary_encode()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog",
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              0,
-              1,
-              2
-            ],
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog",
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              3,
-              4,
-              5
-            ]
-        ]
-        """
-    def flatten(self, memory_pool: MemoryPool | None = None) -> list[ChunkedArray[Any]]:
-        """
-        Flatten this ChunkedArray.  If it has a struct type, the column is
-        flattened into one array per struct field.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        result : list of ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> c_arr = pa.chunked_array(n_legs.value_counts())
-        >>> c_arr
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          -- is_valid: all not null
-          -- child 0 type: int64
-            [
-              2,
-              4,
-              5,
-              100
-            ]
-          -- child 1 type: int64
-            [
-              2,
-              2,
-              1,
-              1
-            ]
-        ]
-        >>> c_arr.flatten()
-        [<pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            4,
-            5,
-            100
-          ]
-        ], <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            1,
-            1
-          ]
-        ]]
-        >>> c_arr.type
-        StructType(struct<values: int64, counts: int64>)
-        >>> n_legs.type
-        DataType(int64)
-        """
-    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Array[_Scalar_co]:
-        """
-        Flatten this ChunkedArray into a single non-chunked array.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        result : Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.combine_chunks()
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          2,
-          2,
-          4,
-          4,
-          5,
-          100
-        ]
-        """
-    def unique(self) -> ChunkedArray[_Scalar_co]:
-        """
-        Compute distinct elements in array
-
-        Returns
-        -------
-        pyarrow.Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.unique()
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          2,
-          4,
-          5,
-          100
-        ]
-        """
-    def value_counts(self) -> StructArray:
-        """
-        Compute counts of unique elements in array.
-
-        Returns
-        -------
-        An array of  <input type "Values", int64_t "Counts"> structs
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.value_counts()
-        <pyarrow.lib.StructArray object at ...>
-        -- is_valid: all not null
-        -- child 0 type: int64
-          [
-            2,
-            4,
-            5,
-            100
-          ]
-        -- child 1 type: int64
-          [
-            2,
-            2,
-            1,
-            1
-          ]
-        """
-    def slice(self, offset: int = 0, length: int | None = None) -> Self:
-        """
-        Compute zero-copy slice of this ChunkedArray
-
-        Parameters
-        ----------
-        offset : int, default 0
-            Offset from start of array to slice
-        length : int, default None
-            Length of slice (default is until end of batch starting from
-            offset)
-
-        Returns
-        -------
-        sliced : ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.slice(2, 2)
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            4
-          ],
-          [
-            4
-          ]
-        ]
-        """
-    def filter(self, mask: Mask, null_selection_behavior: NullSelectionBehavior = "drop") -> Self:
-        """
-        Select values from the chunked array.
-
-        See :func:`pyarrow.compute.filter` for full usage.
-
-        Parameters
-        ----------
-        mask : Array or array-like
-            The boolean mask to filter the chunked array with.
-        null_selection_behavior : str, default "drop"
-            How nulls in the mask should be handled.
-
-        Returns
-        -------
-        filtered : Array or ChunkedArray
-            An array of the same type, with only the elements selected by
-            the boolean mask.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> mask = pa.array([True, False, None, True, False, True])
-        >>> n_legs.filter(mask)
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2
-          ],
-          [
-            4,
-            100
-          ]
-        ]
-        >>> n_legs.filter(mask, null_selection_behavior="emit_null")
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            null
-          ],
-          [
-            4,
-            100
-          ]
-        ]
-        """
-    @overload
-    def index(
-        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
-        value: Scalar[_DataTypeT] | _AsPyType,
-        start: int | None = None,
-        end: int | None = None,
-        *,
-        memory_pool: MemoryPool | None = None,
-    ) -> Int64Scalar: ...
-    @overload
-    def index(
-        self,
-        value: Scalar[_DataTypeT],
-        start: int | None = None,
-        end: int | None = None,
-        *,
-        memory_pool: MemoryPool | None = None,
-    ) -> Int64Scalar: ...
-    def index(self, *args, **kwargs):
-        """
-        Find the first index of a value.
-
-        See :func:`pyarrow.compute.index` for full usage.
-
-        Parameters
-        ----------
-        value : Scalar or object
-            The value to look for in the array.
-        start : int, optional
-            The start index where to look for `value`.
-        end : int, optional
-            The end index where to look for `value`.
-        memory_pool : MemoryPool, optional
-            A memory pool for potential memory allocations.
-
-        Returns
-        -------
-        index : Int64Scalar
-            The index of the value in the array (-1 if not found).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.index(4)
-        <pyarrow.Int64Scalar: 2>
-        >>> n_legs.index(4, start=3)
-        <pyarrow.Int64Scalar: 3>
-        """
-    def take(self, indices: Indices) -> Self:
-        """
-        Select values from the chunked array.
-
-        See :func:`pyarrow.compute.take` for full usage.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            The indices in the array whose values will be returned.
-
-        Returns
-        -------
-        taken : Array or ChunkedArray
-            An array with the same datatype, containing the taken values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.take([1, 4, 5])
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            5,
-            100
-          ]
-        ]
-        """
-    def drop_null(self) -> Self:
-        """
-        Remove missing values from a chunked array.
-        See :func:`pyarrow.compute.drop_null` for full description.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            null
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.drop_null()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        """
-    def sort(self, order: Order = "ascending", **kwargs) -> Self:
-        """
-        Sort the ChunkedArray
-
-        Parameters
-        ----------
-        order : str, default "ascending"
-            Which order to sort values in.
-            Accepted values are "ascending", "descending".
-        **kwargs : dict, optional
-            Additional sorting options.
-            As allowed by :class:`SortOptions`
-
-        Returns
-        -------
-        result : ChunkedArray
-        """
-    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
-        """
-        Unify dictionaries across all chunks.
-
-        This method returns an equivalent chunked array, but where all
-        chunks share the same dictionary values.  Dictionary indices are
-        transposed accordingly.
-
-        If there are no dictionaries in the chunked array, it is returned
-        unchanged.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        result : ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
-        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
-        >>> c_arr = pa.chunked_array([arr_1, arr_2])
-        >>> c_arr
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog"
-            ]
-          -- indices:
-            [
-              0,
-              1,
-              2
-            ],
-        ...
-          -- dictionary:
-            [
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              0,
-              1,
-              2
-            ]
-        ]
-        >>> c_arr.unify_dictionaries()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog",
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              0,
-              1,
-              2
-            ],
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog",
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              3,
-              4,
-              5
-            ]
-        ]
-        """
-    @property
-    def num_chunks(self) -> int:
-        """
-        Number of underlying chunks.
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
-        >>> n_legs.num_chunks
-        2
-        """
-    def chunk(self, i: int) -> ChunkedArray[_Scalar_co]:
-        """
-        Select a chunk by its index.
-
-        Parameters
-        ----------
-        i : int
-
-        Returns
-        -------
-        pyarrow.Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
-        >>> n_legs.chunk(1)
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          4,
-          5,
-          100
-        ]
-        """
-    @property
-    def chunks(self) -> list[Array[_Scalar_co]]:
-        """
-        Convert to a list of single-chunked arrays.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            null
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.chunks
-        [<pyarrow.lib.Int64Array object at ...>
-        [
-          2,
-          2,
-          null
-        ], <pyarrow.lib.Int64Array object at ...>
-        [
-          4,
-          5,
-          100
-        ]]
-        """
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.NullScalar],
-    ) -> Generator[array.NullArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.BooleanScalar],
-    ) -> Generator[array.BooleanArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.UInt8Scalar],
-    ) -> Generator[array.UInt8Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Int8Scalar],
-    ) -> Generator[array.Int8Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.UInt16Scalar],
-    ) -> Generator[array.UInt16Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Int16Scalar],
-    ) -> Generator[array.Int16Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.UInt32Scalar],
-    ) -> Generator[array.UInt32Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Int32Scalar],
-    ) -> Generator[array.Int32Array, None, None]:
-        """
-        Convert to an iterator of ChunkArrays.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> for i in n_legs.iterchunks():
-        ...     print(i.null_count)
-        0
-        1
-
-        """
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.UInt64Scalar],
-    ) -> Generator[array.UInt64Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Int64Scalar],
-    ) -> Generator[array.Int64Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.HalfFloatScalar],
-    ) -> Generator[array.HalfFloatArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.FloatScalar],
-    ) -> Generator[array.FloatArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.DoubleScalar],
-    ) -> Generator[array.DoubleArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Decimal32Scalar],
-    ) -> Generator[array.Decimal32Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Decimal64Scalar],
-    ) -> Generator[array.Decimal64Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Decimal128Scalar],
-    ) -> Generator[array.Decimal128Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Decimal256Scalar],
-    ) -> Generator[array.Decimal256Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Date32Scalar],
-    ) -> Generator[array.Date32Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Date64Scalar],
-    ) -> Generator[array.Date64Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Time32Scalar[types._Time32Unit]],
-    ) -> Generator[array.Time32Array[types._Time32Unit], None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Time64Scalar[types._Time64Unit]],
-    ) -> Generator[array.Time64Array[types._Time64Unit], None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.DurationScalar[types._Unit]],
-    ) -> Generator[array.DurationArray[types._Unit], None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.MonthDayNanoIntervalScalar],
-    ) -> Generator[array.MonthDayNanoIntervalArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.BinaryScalar],
-    ) -> Generator[array.BinaryArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.LargeBinaryScalar],
-    ) -> Generator[array.LargeBinaryArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.FixedSizeBinaryScalar],
-    ) -> Generator[array.FixedSizeBinaryArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.StringScalar],
-    ) -> Generator[array.StringArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.LargeStringScalar],
-    ) -> Generator[array.LargeStringArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.BinaryViewScalar],
-    ) -> Generator[array.BinaryViewArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.StringViewScalar],
-    ) -> Generator[array.StringViewArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.ListScalar[_DataTypeT]],
-    ) -> Generator[array.ListArray[scalar.ListScalar[_DataTypeT]], None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.FixedSizeListScalar[_DataTypeT, types._Size]],
-    ) -> Generator[array.FixedSizeListArray[_DataTypeT, types._Size], None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.LargeListScalar[_DataTypeT]],
-    ) -> Generator[array.LargeListArray[_DataTypeT], None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.LargeListViewScalar[_DataTypeT]],
-    ) -> Generator[array.LargeListViewArray[_DataTypeT], None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.StructScalar],
-    ) -> Generator[array.StructArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.MapScalar[array._MapKeyT, array._MapItemT]],
-    ) -> Generator[array.MapArray[array._MapKeyT, array._MapItemT], None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.DictionaryScalar[types._IndexT, types._BasicValueT]],
-    ) -> Generator[array.DictionaryArray[types._IndexT, types._BasicValueT], None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.RunEndEncodedScalar],
-    ) -> Generator[array.RunEndEncodedArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.UnionScalar],
-    ) -> Generator[array.UnionArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.Bool8Scalar],
-    ) -> Generator[array.Bool8Array, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.UuidScalar],
-    ) -> Generator[array.UuidArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.JsonScalar],
-    ) -> Generator[array.JsonArray, None, None]: ...
-    @overload
-    def iterchunks(
-        self: ChunkedArray[scalar.OpaqueScalar],
-    ) -> Generator[array.OpaqueArray, None, None]: ...
-    def iterchunks(self):
-        """
-        Convert to an iterator of ChunkArrays.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> for i in n_legs.iterchunks():
-        ...     print(i.null_count)
-        0
-        1
-
-        """
-    def __iter__(self) -> Iterator[_Scalar_co]: ...
-    def to_pylist(
-        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[_AsPyType | None]:
-        """
-        Convert to a list of native Python objects.
-
-        Parameters
-        ----------
-        maps_as_pydicts : str, optional, default `None`
-            Valid values are `None`, 'lossy', or 'strict'.
-            The default behavior (`None`), is to convert Arrow Map arrays to
-            Python association lists (list-of-tuples) in the same order as the
-            Arrow Map, as in [(key1, value1), (key2, value2), ...].
-
-            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
-
-            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
-            The last seen value of a duplicate key will be in the Python dictionary.
-            If 'strict', this instead results in an exception being raised when detected.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.to_pylist()
-        [2, 2, 4, 4, None, 100]
-        """
-    def __arrow_c_stream__(self, requested_schema=None) -> Any:
-        """
-        Export to a C ArrowArrayStream PyCapsule.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule, default None
-            The schema to which the stream should be casted, passed as a
-            PyCapsule containing a C ArrowSchema representation of the
-            requested schema.
-
-        Returns
-        -------
-        PyCapsule
-            A capsule containing a C ArrowArrayStream struct.
-        """
-    @classmethod
-    def _import_from_c_capsule(cls, stream) -> Self:
-        """
-        Import ChunkedArray from a C ArrowArrayStream PyCapsule.
-
-        Parameters
-        ----------
-        stream: PyCapsule
-            A capsule containing a C ArrowArrayStream PyCapsule.
-
-        Returns
-        -------
-        ChunkedArray
-        """
-    @property
-    def is_cpu(self) -> bool:
-        """
-        Whether all chunks in the ChunkedArray are CPU-accessible.
-        """
-
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[bool]],
-    type: None = None,
-) -> ChunkedArray[scalar.BooleanScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[int]],
-    type: None = None,
-) -> ChunkedArray[scalar.Int64Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[float]],
-    type: None = None,
-) -> ChunkedArray[scalar.DoubleScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[Decimal]],
-    type: None = None,
-) -> ChunkedArray[scalar.Decimal128Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[dict[str, Any]]],
-    type: None = None,
-) -> ChunkedArray[scalar.StructScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[dt.datetime]],
-    type: None = None,
-) -> ChunkedArray[scalar.TimestampScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[dt.date]],
-    type: None = None,
-) -> ChunkedArray[scalar.Date32Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[dt.time]],
-    type: None = None,
-) -> ChunkedArray[scalar.Time64Scalar[Literal["us"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[dt.timedelta]],
-    type: None = None,
-) -> ChunkedArray[scalar.DurationScalar[Literal["us"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[MonthDayNano]],
-    type: None = None,
-) -> ChunkedArray[scalar.MonthDayNanoIntervalScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[str]],
-    type: None = None,
-) -> ChunkedArray[scalar.StringScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[bytes]],
-    type: None = None,
-) -> ChunkedArray[scalar.BinaryScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[list[Any]]],
-    type: None = None,
-) -> ChunkedArray[scalar.ListScalar[Any]]: ...
-@overload
-def chunked_array(
-    values: Iterable[NullableCollection[types.Decimal128Type[Any, Any]]],
-    type: types.Decimal128Type,
-) -> ChunkedArray[types.Decimal128Type]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["null"] | types.NullType,
-) -> ChunkedArray[scalar.NullScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["bool", "boolean"] | types.BoolType,
-) -> ChunkedArray[scalar.BooleanScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["i1", "int8"] | types.Int8Type,
-) -> ChunkedArray[scalar.Int8Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["i2", "int16"] | types.Int16Type,
-) -> ChunkedArray[scalar.Int16Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["i4", "int32"] | types.Int32Type,
-) -> ChunkedArray[scalar.Int32Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["i8", "int64"] | types.Int64Type,
-) -> ChunkedArray[scalar.Int64Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["u1", "uint8"] | types.UInt8Type,
-) -> ChunkedArray[scalar.UInt8Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["u2", "uint16"] | types.UInt16Type,
-) -> ChunkedArray[scalar.UInt16Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["u4", "uint32"] | types.Uint32Type,
-) -> ChunkedArray[scalar.UInt32Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["u8", "uint64"] | types.UInt64Type,
-) -> ChunkedArray[scalar.UInt64Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["f2", "halffloat", "float16"] | types.Float16Type,
-) -> ChunkedArray[scalar.HalfFloatScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["f4", "float", "float32"] | types.Float32Type,
-) -> ChunkedArray[scalar.FloatScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["f8", "double", "float64"] | types.Float64Type,
-) -> ChunkedArray[scalar.DoubleScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["string", "str", "utf8"] | types.StringType,
-) -> ChunkedArray[scalar.StringScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["binary"] | types.BinaryType,
-) -> ChunkedArray[scalar.BinaryScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["large_string", "large_str", "large_utf8"] | types.LargeStringType,
-) -> ChunkedArray[scalar.LargeStringScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["large_binary"] | types.LargeBinaryType,
-) -> ChunkedArray[scalar.LargeBinaryScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["binary_view"] | types.BinaryViewType,
-) -> ChunkedArray[scalar.BinaryViewScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["string_view"] | types.StringViewType,
-) -> ChunkedArray[scalar.StringViewScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["date32", "date32[day]"] | types.Date32Type,
-) -> ChunkedArray[scalar.Date32Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["date64", "date64[ms]"] | types.Date64Type,
-) -> ChunkedArray[scalar.Date64Scalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["time32[s]"] | types.Time32Type[Literal["s"]],
-) -> ChunkedArray[scalar.Time32Scalar[Literal["s"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["time32[ms]"] | types.Time32Type[Literal["ms"]],
-) -> ChunkedArray[scalar.Time32Scalar[Literal["ms"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["time64[us]"] | types.Time64Type[Literal["us"]],
-) -> ChunkedArray[scalar.Time64Scalar[Literal["us"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["time64[ns]"] | types.Time64Type[Literal["ns"]],
-) -> ChunkedArray[scalar.Time64Scalar[Literal["ns"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["timestamp[s]"] | types.TimestampType[Literal["s"]],
-) -> ChunkedArray[scalar.TimestampScalar[Literal["s"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["timestamp[ms]"] | types.TimestampType[Literal["ms"]],
-) -> ChunkedArray[scalar.TimestampScalar[Literal["ms"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["timestamp[us]"] | types.TimestampType[Literal["us"]],
-) -> ChunkedArray[scalar.TimestampScalar[Literal["us"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["timestamp[ns]"] | types.TimestampType[Literal["ns"]],
-) -> ChunkedArray[scalar.TimestampScalar[Literal["ns"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["duration[s]"] | types.DurationType[Literal["s"]],
-) -> ChunkedArray[scalar.DurationScalar[Literal["s"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["duration[ms]"] | types.DurationType[Literal["ms"]],
-) -> ChunkedArray[scalar.DurationScalar[Literal["ms"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["duration[us]"] | types.DurationType[Literal["us"]],
-) -> ChunkedArray[scalar.DurationScalar[Literal["us"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray],
-    type: Literal["duration[ns]"] | types.DurationType[Literal["ns"]],
-) -> ChunkedArray[scalar.DurationScalar[Literal["ns"]]]: ...
-@overload
-def chunked_array(
-    values: Iterable[Iterable[Any]] | SupportArrowStream | SupportArrowArray,
-    type: Literal["month_day_nano_interval"] | types.MonthDayNanoIntervalType,
-) -> ChunkedArray[scalar.MonthDayNanoIntervalScalar]: ...
-@overload
-def chunked_array(
-    values: Iterable[Array[_ScalarT]],
-    type: None = None,
-) -> ChunkedArray[_ScalarT]: ...
-def chunked_array(value, type=None):
-    """
-    Construct chunked array from list of array-like objects
-
-    Parameters
-    ----------
-    arrays : Array, list of Array, or array-like
-        Must all be the same data type. Can be empty only if type also passed.
-        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
-        (has an ``__arrow_c_array__`` or ``__arrow_c_stream__`` method) can be
-        passed as well.
-    type : DataType or string coercible to DataType
-
-    Returns
-    -------
-    ChunkedArray
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> pa.chunked_array([], type=pa.int8())
-    <pyarrow.lib.ChunkedArray object at ...>
-    [
-    ...
-    ]
-
-    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-    <pyarrow.lib.ChunkedArray object at ...>
-    [
-      [
-        2,
-        2,
-        4
-      ],
-      [
-        4,
-        5,
-        100
-      ]
-    ]
-    """
-
-_ColumnT = TypeVar("_ColumnT", bound=ArrayOrChunkedArray[Any])
-
-class _Tabular(_PandasConvertible[pd.DataFrame], Generic[_ColumnT]):
-    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
-    def __dataframe__(
-        self, nan_as_null: bool = False, allow_copy: bool = True
-    ) -> _PyArrowDataFrame:
-        """
-        Return the dataframe interchange object implementing the interchange protocol.
-
-        Parameters
-        ----------
-        nan_as_null : bool, default False
-            Whether to tell the DataFrame to overwrite null values in the data
-            with ``NaN`` (or ``NaT``).
-        allow_copy : bool, default True
-            Whether to allow memory copying when exporting. If set to False
-            it would cause non-zero-copy exports to fail.
-
-        Returns
-        -------
-        DataFrame interchange object
-            The object which consuming library can use to ingress the dataframe.
-
-        Notes
-        -----
-        Details on the interchange protocol:
-        https://data-apis.org/dataframe-protocol/latest/index.html
-        `nan_as_null` currently has no effect; once support for nullable extension
-        dtypes is added, this value should be propagated to columns.
-        """
-    @overload
-    def __getitem__(self, key: int | str) -> _ColumnT: ...
-    @overload
-    def __getitem__(self, key: slice) -> Self: ...
-    def __getitem__(self, key):
-        """
-        Slice or return column at given index or column name
-
-        Parameters
-        ----------
-        key : integer, str, or slice
-            Slices with step not equal to 1 (or None) will produce a copy
-            rather than a zero-copy view
-
-        Returns
-        -------
-        Array (from RecordBatch) or ChunkedArray (from Table) for column input.
-        RecordBatch or Table for slice input.
-        """
-    def __len__(self) -> int: ...
-    def column(self, i: int | str) -> _ColumnT:
-        """
-        Select single column from Table or RecordBatch.
-
-        Parameters
-        ----------
-        i : int or string
-            The index or name of the column to retrieve.
-
-        Returns
-        -------
-        column : Array (for RecordBatch) or ChunkedArray (for Table)
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Select a column by numeric index:
-
-        >>> table.column(0)
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            4,
-            5,
-            100
-          ]
-        ]
-
-        Select a column by its name:
-
-        >>> table.column("animals")
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            "Flamingo",
-            "Horse",
-            "Brittle stars",
-            "Centipede"
-          ]
-        ]
-        """
-    @property
-    def column_names(self) -> list[str]:
-        """
-        Names of the Table or RecordBatch columns.
-
-        Returns
-        -------
-        list of str
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> table = pa.Table.from_arrays(
-        ...     [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
-        ...     names=["n_legs", "animals"],
-        ... )
-        >>> table.column_names
-        ['n_legs', 'animals']
-        """
-    @property
-    def columns(self) -> list[_ColumnT]:
-        """
-        List of all columns in numerical order.
-
-        Returns
-        -------
-        columns : list of Array (for RecordBatch) or list of ChunkedArray (for Table)
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.columns
-        [<pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            null,
-            4,
-            5,
-            null
-          ]
-        ], <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            "Flamingo",
-            "Horse",
-            null,
-            "Centipede"
-          ]
-        ]]
-        """
-    def drop_null(self) -> Self:
-        """
-        Remove rows that contain missing values from a Table or RecordBatch.
-
-        See :func:`pyarrow.compute.drop_null` for full usage.
-
-        Returns
-        -------
-        Table or RecordBatch
-            A tabular object with the same schema, with rows containing
-            no missing values.
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [None, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", None, "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.drop_null()
-        pyarrow.Table
-        year: double
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2022,2021]]
-        n_legs: [[4,100]]
-        animals: [["Horse","Centipede"]]
-        """
-    def field(self, i: int | str) -> Field:
-        """
-        Select a schema field by its column name or numeric index.
-
-        Parameters
-        ----------
-        i : int or string
-            The index or name of the field to retrieve.
-
-        Returns
-        -------
-        Field
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.field(0)
-        pyarrow.Field<n_legs: int64>
-        >>> table.field(1)
-        pyarrow.Field<animals: string>
-        """
-    @classmethod
-    def from_pydict(
-        cls,
-        mapping: Mapping[str, ArrayOrChunkedArray[Any] | list[Any] | np.ndarray],
-        schema: Schema | None = None,
-        metadata: Mapping[str | bytes, str | bytes] | None = None,
-    ) -> Self:
-        """
-        Construct a Table or RecordBatch from Arrow arrays or columns.
-
-        Parameters
-        ----------
-        mapping : dict or Mapping
-            A mapping of strings to Arrays or Python lists.
-        schema : Schema, default None
-            If not passed, will be inferred from the Mapping values.
-        metadata : dict or Mapping, default None
-            Optional metadata for the schema (if inferred).
-
-        Returns
-        -------
-        Table or RecordBatch
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 4, 5, 100])
-        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-        >>> pydict = {"n_legs": n_legs, "animals": animals}
-
-        Construct a Table from a dictionary of arrays:
-
-        >>> pa.Table.from_pydict(pydict)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        >>> pa.Table.from_pydict(pydict).schema
-        n_legs: int64
-        animals: string
-
-        Construct a Table from a dictionary of arrays with metadata:
-
-        >>> my_metadata = {"n_legs": "Number of legs per animal"}
-        >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-
-        Construct a Table from a dictionary of arrays with pyarrow schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> pa.Table.from_pydict(pydict, schema=my_schema).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-        """
-    @classmethod
-    def from_pylist(
-        cls,
-        mapping: Sequence[Mapping[str, Any]],
-        schema: Schema | None = None,
-        metadata: Mapping[str | bytes, str | bytes] | None = None,
-    ) -> Self:
-        """
-        Construct a Table or RecordBatch from list of rows / dictionaries.
-
-        Parameters
-        ----------
-        mapping : list of dicts of rows
-            A mapping of strings to row values.
-        schema : Schema, default None
-            If not passed, will be inferred from the first row of the
-            mapping values.
-        metadata : dict or Mapping, default None
-            Optional metadata for the schema (if inferred).
-
-        Returns
-        -------
-        Table or RecordBatch
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
-
-        Construct a Table from a list of rows:
-
-        >>> pa.Table.from_pylist(pylist)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4]]
-        animals: [["Flamingo","Dog"]]
-
-        Construct a Table from a list of rows with metadata:
-
-        >>> my_metadata = {"n_legs": "Number of legs per animal"}
-        >>> pa.Table.from_pylist(pylist, metadata=my_metadata).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-
-        Construct a Table from a list of rows with pyarrow schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-        """
-    def itercolumns(self) -> Generator[_ColumnT, None, None]:
-        """
-        Iterator over all columns in their numerical order.
-
-        Yields
-        ------
-        Array (for RecordBatch) or ChunkedArray (for Table)
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> for i in table.itercolumns():
-        ...     print(i.null_count)
-        2
-        1
-        """
-    @property
-    def num_columns(self) -> int: ...
-    @property
-    def num_rows(self) -> int: ...
-    @property
-    def shape(self) -> tuple[int, int]:
-        """
-        Dimensions of the table or record batch: (#rows, #columns).
-
-        Returns
-        -------
-        (int, int)
-            Number of rows and number of columns.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table.shape
-        (4, 2)
-        """
-    @property
-    def schema(self) -> Schema: ...
-    @property
-    def nbytes(self) -> int: ...
-    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> Self:
-        """
-        Sort the Table or RecordBatch by one or multiple columns.
-
-        Parameters
-        ----------
-        sorting : str or list[tuple(name, order)]
-            Name of the column to use to sort (ascending), or
-            a list of multiple sorting conditions where
-            each entry is a tuple with column name
-            and sorting order ("ascending" or "descending")
-        **kwargs : dict, optional
-            Additional sorting options.
-            As allowed by :class:`SortOptions`
-
-        Returns
-        -------
-        Table or RecordBatch
-            A new tabular object sorted according to the sort keys.
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pandas as pd
-        >>> import pyarrow as pa
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.sort_by("animal")
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        year: [[2019,2021,2021,2020,2022,2022]]
-        n_legs: [[5,100,4,2,4,2]]
-        animal: [["Brittle stars","Centipede","Dog","Flamingo","Horse","Parrot"]]
-        """
-    def take(self, indices: Indices) -> Self:
-        """
-        Select rows from a Table or RecordBatch.
-
-        See :func:`pyarrow.compute.take` for full usage.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            The indices in the tabular object whose rows will be returned.
-
-        Returns
-        -------
-        Table or RecordBatch
-            A tabular object with the same schema, containing the taken rows.
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.take([1, 3])
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2022,2021]]
-        n_legs: [[4,100]]
-        animals: [["Horse","Centipede"]]
-        """
-    def filter(
-        self, mask: Mask | Expression, null_selection_behavior: NullSelectionBehavior = "drop"
-    ) -> Self:
-        """
-        Select rows from the table or record batch based on a boolean mask.
-
-        The Table can be filtered based on a mask, which will be passed to
-        :func:`pyarrow.compute.filter` to perform the filtering, or it can
-        be filtered through a boolean :class:`.Expression`
-
-        Parameters
-        ----------
-        mask : Array or array-like or .Expression
-            The boolean mask or the :class:`.Expression` to filter the table with.
-        null_selection_behavior : str, default "drop"
-            How nulls in the mask should be handled, does nothing if
-            an :class:`.Expression` is used.
-
-        Returns
-        -------
-        filtered : Table or RecordBatch
-            A tabular object of the same schema, with only the rows selected
-            by applied filtering
-
-        Examples
-        --------
-        Using a Table (works similarly for RecordBatch):
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-
-        Define an expression and select rows:
-
-        >>> import pyarrow.compute as pc
-        >>> expr = pc.field("year") <= 2020
-        >>> table.filter(expr)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2020,2019]]
-        n_legs: [[2,5]]
-        animals: [["Flamingo","Brittle stars"]]
-
-        Define a mask and select rows:
-
-        >>> mask = [True, True, False, None]
-        >>> table.filter(mask)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2020,2022]]
-        n_legs: [[2,4]]
-        animals: [["Flamingo","Horse"]]
-        >>> table.filter(mask, null_selection_behavior="emit_null")
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2020,2022,null]]
-        n_legs: [[2,4,null]]
-        animals: [["Flamingo","Horse",null]]
-        """
-    def to_pydict(
-        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
-    ) -> dict[str, list[Any]]:
-        """
-        Convert the Table or RecordBatch to a dict or OrderedDict.
-
-        Parameters
-        ----------
-        maps_as_pydicts : str, optional, default `None`
-            Valid values are `None`, 'lossy', or 'strict'.
-            The default behavior (`None`), is to convert Arrow Map arrays to
-            Python association lists (list-of-tuples) in the same order as the
-            Arrow Map, as in [(key1, value1), (key2, value2), ...].
-
-            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
-
-            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
-            The last seen value of a duplicate key will be in the Python dictionary.
-            If 'strict', this instead results in an exception being raised when detected.
-
-        Returns
-        -------
-        dict
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> table = pa.Table.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> table.to_pydict()
-        {'n_legs': [2, 2, 4, 4, 5, 100], 'animals': ['Flamingo', 'Parrot', ..., 'Centipede']}
-        """
-    def to_pylist(
-        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
-    ) -> list[dict[str, Any]]:
-        """
-        Convert the Table or RecordBatch to a list of rows / dictionaries.
-
-        Parameters
-        ----------
-        maps_as_pydicts : str, optional, default `None`
-            Valid values are `None`, 'lossy', or 'strict'.
-            The default behavior (`None`), is to convert Arrow Map arrays to
-            Python association lists (list-of-tuples) in the same order as the
-            Arrow Map, as in [(key1, value1), (key2, value2), ...].
-
-            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
-
-            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
-            The last seen value of a duplicate key will be in the Python dictionary.
-            If 'strict', this instead results in an exception being raised when detected.
-
-        Returns
-        -------
-        list
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> data = [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]]
-        >>> table = pa.table(data, names=["n_legs", "animals"])
-        >>> table.to_pylist()
-        [{'n_legs': 2, 'animals': 'Flamingo'}, {'n_legs': 4, 'animals': 'Horse'}, ...
-        """
-    def to_string(self, *, show_metadata: bool = False, preview_cols: int = 0) -> str:
-        """
-        Return human-readable string representation of Table or RecordBatch.
-
-        Parameters
-        ----------
-        show_metadata : bool, default False
-            Display Field-level and Schema-level KeyValueMetadata.
-        preview_cols : int, default 0
-            Display values of the columns for the first N columns.
-
-        Returns
-        -------
-        str
-        """
-    def remove_column(self, i: int) -> Self: ...
-    def drop_columns(self, columns: str | list[str]) -> Self:
-        """
-        Drop one or more columns and return a new Table or RecordBatch.
-
-        Parameters
-        ----------
-        columns : str or list[str]
-            Field name(s) referencing existing column(s).
-
-        Raises
-        ------
-        KeyError
-            If any of the passed column names do not exist.
-
-        Returns
-        -------
-        Table or RecordBatch
-            A tabular object without the column(s).
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Drop one column:
-
-        >>> table.drop_columns("animals")
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[2,4,5,100]]
-
-        Drop one or more columns:
-
-        >>> table.drop_columns(["n_legs", "animals"])
-        pyarrow.Table
-        ...
-        ----
-        """
-    def add_column(
-        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
-    ) -> Self: ...
-    def append_column(
-        self, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
-    ) -> Self:
-        """
-        Append column at end of columns.
-
-        Parameters
-        ----------
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array or value coercible to array
-            Column data.
-
-        Returns
-        -------
-        Table or RecordBatch
-            New table or record batch with the passed column added.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Append column at the end:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> table.append_column("year", [year])
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        year: int64
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        year: [[2021,2022,2019,2021]]
-        """
-
-class RecordBatch(_Tabular[Array]):
-    """
-    Batch of rows of columns of equal length
-
-    Warnings
-    --------
-    Do not call this class's constructor directly, use one of the
-    ``RecordBatch.from_*`` functions instead.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
-    >>> names = ["n_legs", "animals"]
-
-    Constructing a RecordBatch from arrays:
-
-    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,2,4,4,5,100]
-    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
-       n_legs        animals
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-
-    Constructing a RecordBatch from pandas DataFrame:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022],
-    ...         "month": [3, 5, 7, 9],
-    ...         "day": [1, 5, 9, 13],
-    ...         "n_legs": [2, 4, 5, 100],
-    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> pa.RecordBatch.from_pandas(df)
-    pyarrow.RecordBatch
-    year: int64
-    month: int64
-    day: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [2020,2022,2021,2022]
-    month: [3,5,7,9]
-    day: [1,5,9,13]
-    n_legs: [2,4,5,100]
-    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-    >>> pa.RecordBatch.from_pandas(df).to_pandas()
-       year  month  day  n_legs        animals
-    0  2020      3    1       2       Flamingo
-    1  2022      5    5       4          Horse
-    2  2021      7    9       5  Brittle stars
-    3  2022      9   13     100      Centipede
-
-    Constructing a RecordBatch from pylist:
-
-    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
-    >>> pa.RecordBatch.from_pylist(pylist).to_pandas()
-       n_legs   animals
-    0       2  Flamingo
-    1       4       Dog
-
-    You can also construct a RecordBatch using :func:`pyarrow.record_batch`:
-
-    >>> pa.record_batch([n_legs, animals], names=names).to_pandas()
-       n_legs        animals
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-
-    >>> pa.record_batch(df)
-    pyarrow.RecordBatch
-    year: int64
-    month: int64
-    day: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [2020,2022,2021,2022]
-    month: [3,5,7,9]
-    day: [1,5,9,13]
-    n_legs: [2,4,5,100]
-    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-    """
-
-    def validate(self, *, full: bool = False) -> None:
-        """
-        Perform validation checks.  An exception is raised if validation fails.
-
-        By default only cheap validation checks are run.  Pass `full=True`
-        for thorough validation checks (potentially O(n)).
-
-        Parameters
-        ----------
-        full : bool, default False
-            If True, run expensive checks, otherwise cheap checks only.
-
-        Raises
-        ------
-        ArrowInvalid
-        """
-    def replace_schema_metadata(
-        self, metadata: dict[str | bytes, str | bytes] | None = None
-    ) -> Self:
-        """
-        Create shallow copy of record batch by replacing schema
-        key-value metadata with the indicated new metadata (which may be None,
-        which deletes any existing metadata
-
-        Parameters
-        ----------
-        metadata : dict, default None
-
-        Returns
-        -------
-        shallow_copy : RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-
-        Constructing a RecordBatch with schema and metadata:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64())], metadata={"n_legs": "Number of legs per animal"}
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs], schema=my_schema)
-        >>> batch.schema
-        n_legs: int64
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-
-        Shallow copy of a RecordBatch with deleted schema metadata:
-
-        >>> batch.replace_schema_metadata().schema
-        n_legs: int64
-        """
-    @property
-    def num_columns(self) -> int:
-        """
-        Number of columns
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.num_columns
-        2
-        """
-
-    @property
-    def num_rows(self) -> int:
-        """
-        Number of rows
-
-        Due to the definition of a RecordBatch, all columns have the same
-        number of rows.
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.num_rows
-        6
-        """
-    @property
-    def schema(self) -> Schema:
-        """
-        Schema of the RecordBatch and its columns
-
-        Returns
-        -------
-        pyarrow.Schema
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.schema
-        n_legs: int64
-        animals: string
-        """
-    @property
-    def nbytes(self) -> int:
-        """
-        Total number of bytes consumed by the elements of the record batch.
-
-        In other words, the sum of bytes from all buffer ranges referenced.
-
-        Unlike `get_total_buffer_size` this method will account for array
-        offsets.
-
-        If buffers are shared between arrays then the shared
-        portion will only be counted multiple times.
-
-        The dictionary of dictionary arrays will always be counted in their
-        entirety even if the array only references a portion of the dictionary.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.nbytes
-        116
-        """
-    def get_total_buffer_size(self) -> int:
-        """
-        The sum of bytes in each buffer referenced by the record batch
-
-        An array may only reference a portion of a buffer.
-        This method will overestimate in this case and return the
-        byte size of the entire buffer.
-
-        If a buffer is referenced multiple times then it will
-        only be counted once.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.get_total_buffer_size()
-        120
-        """
-
-    def __sizeof__(self) -> int: ...
-    def add_column(
-        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
-    ) -> Self:
-        """
-        Add column to RecordBatch at position i.
-
-        A new record batch is returned with the column added, the original record batch
-        object is left unchanged.
-
-        Parameters
-        ----------
-        i : int
-            Index to place the column at.
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array or value coercible to array
-            Column data.
-
-        Returns
-        -------
-        RecordBatch
-            New record batch with the passed column added.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-
-        Add column:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> batch.add_column(0, "year", year)
-        pyarrow.RecordBatch
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [2021,2022,2019,2021]
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-
-        Original record batch is left unchanged:
-
-        >>> batch
-        pyarrow.RecordBatch
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-        """
-    def remove_column(self, i: int) -> Self:
-        """
-        Create new RecordBatch with the indicated column removed.
-
-        Parameters
-        ----------
-        i : int
-            Index of column to remove.
-
-        Returns
-        -------
-        Table
-            New record batch without the column.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-        >>> batch.remove_column(1)
-        pyarrow.RecordBatch
-        n_legs: int64
-        ----
-        n_legs: [2,4,5,100]
-        """
-    def set_column(self, i: int, field_: str | Field, column: Array | list) -> Self:
-        """
-        Replace column in RecordBatch at position.
-
-        Parameters
-        ----------
-        i : int
-            Index to place the column at.
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array or value coercible to array
-            Column data.
-
-        Returns
-        -------
-        RecordBatch
-            New record batch with the passed column set.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-
-        Replace a column:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> batch.set_column(1, "year", year)
-        pyarrow.RecordBatch
-        n_legs: int64
-        year: int64
-        ----
-        n_legs: [2,4,5,100]
-        year: [2021,2022,2019,2021]
-        """
-    @overload
-    def rename_columns(self, names: list[str]) -> Self: ...
-    @overload
-    def rename_columns(self, names: dict[str, str]) -> Self: ...
-    def rename_columns(self, names):
-        """
-        Create new record batch with columns renamed to provided names.
-
-        Parameters
-        ----------
-        names : list[str] or dict[str, str]
-            List of new column names or mapping of old column names to new column names.
-
-            If a mapping of old to new column names is passed, then all columns which are
-            found to match a provided old column name will be renamed to the new column name.
-            If any column names are not found in the mapping, a KeyError will be raised.
-
-        Raises
-        ------
-        KeyError
-            If any of the column names passed in the names mapping do not exist.
-
-        Returns
-        -------
-        RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-        >>> new_names = ["n", "name"]
-        >>> batch.rename_columns(new_names)
-        pyarrow.RecordBatch
-        n: int64
-        name: string
-        ----
-        n: [2,4,5,100]
-        name: ["Flamingo","Horse","Brittle stars","Centipede"]
-        >>> new_names = {"n_legs": "n", "animals": "name"}
-        >>> batch.rename_columns(new_names)
-        pyarrow.RecordBatch
-        n: int64
-        name: string
-        ----
-        n: [2,4,5,100]
-        name: ["Flamingo","Horse","Brittle stars","Centipede"]
-        """
-    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer:
-        """
-        Write RecordBatch to Buffer as encapsulated IPC message, which does not
-        include a Schema.
-
-        To reconstruct a RecordBatch from the encapsulated IPC message Buffer
-        returned by this function, a Schema must be passed separately. See
-        Examples.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            Uses default memory pool if not specified
-
-        Returns
-        -------
-        serialized : Buffer
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> buf = batch.serialize()
-        >>> buf
-        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
-
-        Reconstruct RecordBatch from IPC message Buffer and original Schema
-
-        >>> pa.ipc.read_record_batch(buf, batch.schema)
-        pyarrow.RecordBatch
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [2,2,4,4,5,100]
-        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-        """
-    def slice(self, offset: int = 0, length: int | None = None) -> Self:
-        """
-        Compute zero-copy slice of this RecordBatch
-
-        Parameters
-        ----------
-        offset : int, default 0
-            Offset from start of record batch to slice
-        length : int, default None
-            Length of slice (default is until end of batch starting from
-            offset)
-
-        Returns
-        -------
-        sliced : RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       2         Parrot
-        2       4            Dog
-        3       4          Horse
-        4       5  Brittle stars
-        5     100      Centipede
-        >>> batch.slice(offset=3).to_pandas()
-           n_legs        animals
-        0       4          Horse
-        1       5  Brittle stars
-        2     100      Centipede
-        >>> batch.slice(length=2).to_pandas()
-           n_legs   animals
-        0       2  Flamingo
-        1       2    Parrot
-        >>> batch.slice(offset=3, length=1).to_pandas()
-           n_legs animals
-        0       4   Horse
-        """
-    def equals(self, other: Self, check_metadata: bool = False) -> bool:
-        """
-        Check if contents of two record batches are equal.
-
-        Parameters
-        ----------
-        other : pyarrow.RecordBatch
-            RecordBatch to compare against.
-        check_metadata : bool, default False
-            Whether schema metadata equality should be checked as well.
-
-        Returns
-        -------
-        are_equal : bool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch_0 = pa.record_batch([])
-        >>> batch_1 = pa.RecordBatch.from_arrays(
-        ...     [n_legs, animals],
-        ...     names=["n_legs", "animals"],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> batch.equals(batch)
-        True
-        >>> batch.equals(batch_0)
-        False
-        >>> batch.equals(batch_1)
-        True
-        >>> batch.equals(batch_1, check_metadata=True)
-        False
-        """
-    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
-        """
-        Select columns of the RecordBatch.
-
-        Returns a new RecordBatch with the specified columns, and metadata
-        preserved.
-
-        Parameters
-        ----------
-        columns : list-like
-            The column names or integer indices to select.
-
-        Returns
-        -------
-        RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.record_batch([n_legs, animals], names=["n_legs", "animals"])
-
-        Select columns my indices:
-
-        >>> batch.select([1])
-        pyarrow.RecordBatch
-        animals: string
-        ----
-        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-
-        Select columns by names:
-
-        >>> batch.select(["n_legs"])
-        pyarrow.RecordBatch
-        n_legs: int64
-        ----
-        n_legs: [2,2,4,4,5,100]
-        """
-    def cast(
-        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
-    ) -> Self:
-        """
-        Cast record batch values to another schema.
-
-        Parameters
-        ----------
-        target_schema : Schema
-            Schema to cast to, the names and order of fields must match.
-        safe : bool, default True
-            Check for overflows or other unsafe conversions.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-
-        Returns
-        -------
-        RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-        >>> batch.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
-
-        Define new schema and cast batch values:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
-        ... )
-        >>> batch.cast(target_schema=my_schema)
-        pyarrow.RecordBatch
-        n_legs: duration[s]
-        animals: string
-        ----
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-        """
-    @classmethod
-    def from_arrays(
-        cls,
-        arrays: Collection[Array],
-        names: list[str] | None = None,
-        schema: Schema | None = None,
-        metadata: Mapping[str | bytes, str | bytes] | None = None,
-    ) -> Self:
-        """
-        Construct a RecordBatch from multiple pyarrow.Arrays
-
-        Parameters
-        ----------
-        arrays : list of pyarrow.Array
-            One for each field in RecordBatch
-        names : list of str, optional
-            Names for the batch fields. If not passed, schema must be passed
-        schema : Schema, default None
-            Schema for the created batch. If not passed, names must be passed
-        metadata : dict or Mapping, default None
-            Optional metadata for the schema (if inferred).
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> names = ["n_legs", "animals"]
-
-        Construct a RecordBatch from pyarrow Arrays using names:
-
-        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
-        pyarrow.RecordBatch
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [2,2,4,4,5,100]
-        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       2         Parrot
-        2       4            Dog
-        3       4          Horse
-        4       5  Brittle stars
-        5     100      Centipede
-
-        Construct a RecordBatch from pyarrow Arrays using schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       2         Parrot
-        2       4            Dog
-        3       4          Horse
-        4       5  Brittle stars
-        5     100      Centipede
-        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-        """
-    @classmethod
-    def from_pandas(
-        cls,
-        df: pd.DataFrame,
-        schema: Schema | None = None,
-        preserve_index: bool | None = None,
-        nthreads: int | None = None,
-        columns: list[str] | None = None,
-    ) -> Self:
-        """
-        Convert pandas.DataFrame to an Arrow RecordBatch
-
-        Parameters
-        ----------
-        df : pandas.DataFrame
-        schema : pyarrow.Schema, optional
-            The expected schema of the RecordBatch. This can be used to
-            indicate the type of columns if we cannot infer it automatically.
-            If passed, the output will have exactly this schema. Columns
-            specified in the schema that are not found in the DataFrame columns
-            or its index will raise an error. Additional columns or index
-            levels in the DataFrame which are not specified in the schema will
-            be ignored.
-        preserve_index : bool, optional
-            Whether to store the index as an additional column in the resulting
-            ``RecordBatch``. The default of None will store the index as a
-            column, except for RangeIndex which is stored as metadata only. Use
-            ``preserve_index=True`` to force it to be stored as a column.
-        nthreads : int, default None
-            If greater than 1, convert columns to Arrow in parallel using
-            indicated number of threads. By default, this follows
-            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
-        columns : list, optional
-           List of column to be converted. If None, use all columns.
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-
-
-        Examples
-        --------
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022],
-        ...         "month": [3, 5, 7, 9],
-        ...         "day": [1, 5, 9, 13],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-
-        Convert pandas DataFrame to RecordBatch:
-
-        >>> import pyarrow as pa
-        >>> pa.RecordBatch.from_pandas(df)
-        pyarrow.RecordBatch
-        year: int64
-        month: int64
-        day: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [2020,2022,2021,2022]
-        month: [3,5,7,9]
-        day: [1,5,9,13]
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-
-        Convert pandas DataFrame to RecordBatch using schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> pa.RecordBatch.from_pandas(df, schema=my_schema)
-        pyarrow.RecordBatch
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-
-        Convert pandas DataFrame to RecordBatch specifying columns:
-
-        >>> pa.RecordBatch.from_pandas(df, columns=["n_legs"])
-        pyarrow.RecordBatch
-        n_legs: int64
-        ----
-        n_legs: [2,4,5,100]
-        """
-    @classmethod
-    def from_struct_array(
-        cls, struct_array: StructArray | ChunkedArray[scalar.StructScalar]
-    ) -> Self:
-        """
-        Construct a RecordBatch from a StructArray.
-
-        Each field in the StructArray will become a column in the resulting
-        ``RecordBatch``.
-
-        Parameters
-        ----------
-        struct_array : StructArray
-            Array to construct the record batch from.
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
-        >>> pa.RecordBatch.from_struct_array(struct).to_pandas()
-          animals  n_legs    year
-        0  Parrot       2     NaN
-        1    None       4  2022.0
-        """
-    def to_struct_array(self) -> StructArray:
-        """
-        Convert to a struct array.
-        """
-    def to_tensor(
-        self,
-        null_to_nan: bool = False,
-        row_major: bool = True,
-        memory_pool: MemoryPool | None = None,
-    ) -> Tensor:
-        """
-        Convert to a :class:`~pyarrow.Tensor`.
-
-        RecordBatches that can be converted have fields of type signed or unsigned
-        integer or float, including all bit-widths.
-
-        ``null_to_nan`` is ``False`` by default and this method will raise an error in case
-        any nulls are present. RecordBatches with nulls can be converted with ``null_to_nan``
-        set to ``True``. In this case null values are converted to ``NaN`` and integer type
-        arrays are promoted to the appropriate float type.
-
-        Parameters
-        ----------
-        null_to_nan : bool, default False
-            Whether to write null values in the result as ``NaN``.
-        row_major : bool, default True
-            Whether resulting Tensor is row-major or column-major
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> batch = pa.record_batch(
-        ...     [
-        ...         pa.array([1, 2, 3, 4, None], type=pa.int32()),
-        ...         pa.array([10, 20, 30, 40, None], type=pa.float32()),
-        ...     ],
-        ...     names=["a", "b"],
-        ... )
-
-        >>> batch
-        pyarrow.RecordBatch
-        a: int32
-        b: float
-        ----
-        a: [1,2,3,4,null]
-        b: [10,20,30,40,null]
-
-        Convert a RecordBatch to row-major Tensor with null values
-        written as ``NaN``s
-
-        >>> batch.to_tensor(null_to_nan=True)
-        <pyarrow.Tensor>
-        type: double
-        shape: (5, 2)
-        strides: (16, 8)
-        >>> batch.to_tensor(null_to_nan=True).to_numpy()
-        array([[ 1., 10.],
-               [ 2., 20.],
-               [ 3., 30.],
-               [ 4., 40.],
-               [nan, nan]])
-
-        Convert a RecordBatch to column-major Tensor
-
-        >>> batch.to_tensor(null_to_nan=True, row_major=False)
-        <pyarrow.Tensor>
-        type: double
-        shape: (5, 2)
-        strides: (8, 40)
-        >>> batch.to_tensor(null_to_nan=True, row_major=False).to_numpy()
-        array([[ 1., 10.],
-               [ 2., 20.],
-               [ 3., 30.],
-               [ 4., 40.],
-               [nan, nan]])
-        """
-    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0):
-        """
-        Export to a C ArrowArray struct, given its pointer.
-
-        If a C ArrowSchema struct pointer is also given, the record batch
-        schema is exported to it at the same time.
-
-        Parameters
-        ----------
-        out_ptr: int
-            The raw pointer to a C ArrowArray struct.
-        out_schema_ptr: int (optional)
-            The raw pointer to a C ArrowSchema struct.
-
-        Be careful: if you don't pass the ArrowArray struct to a consumer,
-        array memory will leak.  This is a low-level function intended for
-        expert users.
-        """
-    @classmethod
-    def _import_from_c(cls, in_ptr: int, schema: Schema) -> Self:
-        """
-        Import RecordBatch from a C ArrowArray struct, given its pointer
-        and the imported schema.
-
-        Parameters
-        ----------
-        in_ptr: int
-            The raw pointer to a C ArrowArray struct.
-        type: Schema or int
-            Either a Schema object, or the raw pointer to a C ArrowSchema
-            struct.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_array__(self, requested_schema=None):
-        """
-        Get a pair of PyCapsules containing a C ArrowArray representation of the object.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule | None
-            A PyCapsule containing a C ArrowSchema representation of a requested
-            schema. PyArrow will attempt to cast the batch to this schema.
-            If None, the batch will be returned as-is, with a schema matching the
-            one returned by :meth:`__arrow_c_schema__()`.
-
-        Returns
-        -------
-        Tuple[PyCapsule, PyCapsule]
-            A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
-            respectively.
-        """
-    def __arrow_c_stream__(self, requested_schema=None):
-        """
-        Export the batch as an Arrow C stream PyCapsule.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule, default None
-            The schema to which the stream should be casted, passed as a
-            PyCapsule containing a C ArrowSchema representation of the
-            requested schema.
-            Currently, this is not supported and will raise a
-            NotImplementedError if the schema doesn't match the current schema.
-
-        Returns
-        -------
-        PyCapsule
-        """
-    @classmethod
-    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self:
-        """
-        Import RecordBatch from a pair of PyCapsules containing a C ArrowSchema
-        and ArrowArray, respectively.
-
-        Parameters
-        ----------
-        schema_capsule : PyCapsule
-            A PyCapsule containing a C ArrowSchema representation of the schema.
-        array_capsule : PyCapsule
-            A PyCapsule containing a C ArrowArray representation of the array.
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-        """
-    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
-        """
-        Export to a C ArrowDeviceArray struct, given its pointer.
-
-        If a C ArrowSchema struct pointer is also given, the record batch
-        schema is exported to it at the same time.
-
-        Parameters
-        ----------
-        out_ptr: int
-            The raw pointer to a C ArrowDeviceArray struct.
-        out_schema_ptr: int (optional)
-            The raw pointer to a C ArrowSchema struct.
-
-        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
-        array memory will leak.  This is a low-level function intended for
-        expert users.
-        """
-    @classmethod
-    def _import_from_c_device(cls, in_ptr: int, schema: Schema) -> Self:
-        """
-        Import RecordBatch from a C ArrowDeviceArray struct, given its pointer
-        and the imported schema.
-
-        Parameters
-        ----------
-        in_ptr: int
-            The raw pointer to a C ArrowDeviceArray struct.
-        type: Schema or int
-            Either a Schema object, or the raw pointer to a C ArrowSchema
-            struct.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_device_array__(self, requested_schema=None, **kwargs):
-        """
-        Get a pair of PyCapsules containing a C ArrowDeviceArray representation
-        of the object.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule | None
-            A PyCapsule containing a C ArrowSchema representation of a requested
-            schema. PyArrow will attempt to cast the batch to this data type.
-            If None, the batch will be returned as-is, with a type matching the
-            one returned by :meth:`__arrow_c_schema__()`.
-        kwargs
-            Currently no additional keyword arguments are supported, but
-            this method will accept any keyword with a value of ``None``
-            for compatibility with future keywords.
-
-        Returns
-        -------
-        Tuple[PyCapsule, PyCapsule]
-            A pair of PyCapsules containing a C ArrowSchema and ArrowDeviceArray,
-            respectively.
-        """
-    @classmethod
-    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self:
-        """
-        Import RecordBatch from a pair of PyCapsules containing a
-        C ArrowSchema and ArrowDeviceArray, respectively.
-
-        Parameters
-        ----------
-        schema_capsule : PyCapsule
-            A PyCapsule containing a C ArrowSchema representation of the schema.
-        array_capsule : PyCapsule
-            A PyCapsule containing a C ArrowDeviceArray representation of the array.
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-        """
-    @property
-    def device_type(self) -> DeviceAllocationType:
-        """
-        The device type where the arrays in the RecordBatch reside.
-
-        Returns
-        -------
-        DeviceAllocationType
-        """
-    @property
-    def is_cpu(self) -> bool:
-        """
-        Whether the RecordBatch's arrays are CPU-accessible.
-        """
-    def copy_to(self, destination: MemoryManager | Device) -> Self:
-        """
-        Copy the entire RecordBatch to destination device.
-
-        This copies each column of the record batch to create
-        a new record batch where all underlying buffers for the columns have
-        been copied to the destination MemoryManager.
-
-        Parameters
-        ----------
-        destination : pyarrow.MemoryManager or pyarrow.Device
-            The destination device to copy the array to.
-
-        Returns
-        -------
-        RecordBatch
-        """
-
-def table_to_blocks(options, table: Table, categories, extension_columns): ...
-
-JoinType: TypeAlias = Literal[
-    "left semi",
-    "right semi",
-    "left anti",
-    "right anti",
-    "inner",
-    "left outer",
-    "right outer",
-    "full outer",
-]
-
-class Table(_Tabular[ChunkedArray[Any]]):
-    """
-    A collection of top-level named, equal length Arrow arrays.
-
-    Warnings
-    --------
-    Do not call this class's constructor directly, use one of the ``from_*``
-    methods instead.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> n_legs = pa.array([2, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-    >>> names = ["n_legs", "animals"]
-
-    Construct a Table from arrays:
-
-    >>> pa.Table.from_arrays([n_legs, animals], names=names)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from a RecordBatch:
-
-    >>> batch = pa.record_batch([n_legs, animals], names=names)
-    >>> pa.Table.from_batches([batch])
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from pandas DataFrame:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "year": [2020, 2022, 2019, 2021],
-    ...         "n_legs": [2, 4, 5, 100],
-    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> pa.Table.from_pandas(df)
-    pyarrow.Table
-    year: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [[2020,2022,2019,2021]]
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from a dictionary of arrays:
-
-    >>> pydict = {"n_legs": n_legs, "animals": animals}
-    >>> pa.Table.from_pydict(pydict)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    >>> pa.Table.from_pydict(pydict).schema
-    n_legs: int64
-    animals: string
-
-    Construct a Table from a dictionary of arrays with metadata:
-
-    >>> my_metadata = {"n_legs": "Number of legs per animal"}
-    >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'Number of legs per animal'
-
-    Construct a Table from a list of rows:
-
-    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"year": 2021, "animals": "Centipede"}]
-    >>> pa.Table.from_pylist(pylist)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,null]]
-    animals: [["Flamingo","Centipede"]]
-
-    Construct a Table from a list of rows with pyarrow schema:
-
-    >>> my_schema = pa.schema(
-    ...     [
-    ...         pa.field("year", pa.int64()),
-    ...         pa.field("n_legs", pa.int64()),
-    ...         pa.field("animals", pa.string()),
-    ...     ],
-    ...     metadata={"year": "Year of entry"},
-    ... )
-    >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
-    year: int64
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    year: 'Year of entry'
-
-    Construct a Table with :func:`pyarrow.table`:
-
-    >>> pa.table([n_legs, animals], names=names)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    """
-
-    def validate(self, *, full: bool = False) -> None:
-        """
-        Perform validation checks.  An exception is raised if validation fails.
-
-        By default only cheap validation checks are run.  Pass `full=True`
-        for thorough validation checks (potentially O(n)).
-
-        Parameters
-        ----------
-        full : bool, default False
-            If True, run expensive checks, otherwise cheap checks only.
-
-        Raises
-        ------
-        ArrowInvalid
-        """
-    def slice(self, offset: int = 0, length: int | None = None) -> Self:
-        """
-        Compute zero-copy slice of this Table.
-
-        Parameters
-        ----------
-        offset : int, default 0
-            Offset from start of table to slice.
-        length : int, default None
-            Length of slice (default is until end of table starting from
-            offset).
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.slice(length=3)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2020,2022,2019]]
-        n_legs: [[2,4,5]]
-        animals: [["Flamingo","Horse","Brittle stars"]]
-        >>> table.slice(offset=2)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2019,2021]]
-        n_legs: [[5,100]]
-        animals: [["Brittle stars","Centipede"]]
-        >>> table.slice(offset=2, length=1)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2019]]
-        n_legs: [[5]]
-        animals: [["Brittle stars"]]
-        """
-    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
-        """
-        Select columns of the Table.
-
-        Returns a new Table with the specified columns, and metadata
-        preserved.
-
-        Parameters
-        ----------
-        columns : list-like
-            The column names or integer indices to select.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.select([0, 1])
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        ----
-        year: [[2020,2022,2019,2021]]
-        n_legs: [[2,4,5,100]]
-        >>> table.select(["year"])
-        pyarrow.Table
-        year: int64
-        ----
-        year: [[2020,2022,2019,2021]]
-        """
-    def replace_schema_metadata(
-        self, metadata: dict[str | bytes, str | bytes] | None = None
-    ) -> Self:
-        """
-        Create shallow copy of table by replacing schema
-        key-value metadata with the indicated new metadata (which may be None),
-        which deletes any existing metadata.
-
-        Parameters
-        ----------
-        metadata : dict, default None
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Constructing a Table with pyarrow schema and metadata:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> table = pa.table(df, my_schema)
-        >>> table.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-        pandas: ...
-
-        Create a shallow copy of a Table with deleted schema metadata:
-
-        >>> table.replace_schema_metadata().schema
-        n_legs: int64
-        animals: string
-
-        Create a shallow copy of a Table with new schema metadata:
-
-        >>> metadata = {"animals": "Which animal"}
-        >>> table.replace_schema_metadata(metadata=metadata).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        animals: 'Which animal'
-        """
-    def flatten(self, memory_pool: MemoryPool | None = None) -> Self:
-        """
-        Flatten this Table.
-
-        Each column with a struct type is flattened
-        into one column per struct field.  Other columns are left unchanged.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
-        >>> month = pa.array([4, 6])
-        >>> table = pa.Table.from_arrays([struct, month], names=["a", "month"])
-        >>> table
-        pyarrow.Table
-        a: struct<animals: string, n_legs: int64, year: int64>
-          child 0, animals: string
-          child 1, n_legs: int64
-          child 2, year: int64
-        month: int64
-        ----
-        a: [
-          -- is_valid: all not null
-          -- child 0 type: string
-        ["Parrot",null]
-          -- child 1 type: int64
-        [2,4]
-          -- child 2 type: int64
-        [null,2022]]
-        month: [[4,6]]
-
-        Flatten the columns with struct field:
-
-        >>> table.flatten()
-        pyarrow.Table
-        a.animals: string
-        a.n_legs: int64
-        a.year: int64
-        month: int64
-        ----
-        a.animals: [["Parrot",null]]
-        a.n_legs: [[2,4]]
-        a.year: [[null,2022]]
-        month: [[4,6]]
-        """
-    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Self:
-        """
-        Make a new table by combining the chunks this table has.
-
-        All the underlying chunks in the ChunkedArray of each column are
-        concatenated into zero or one chunk.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> animals = pa.chunked_array(
-        ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
-        ... )
-        >>> names = ["n_legs", "animals"]
-        >>> table = pa.table([n_legs, animals], names=names)
-        >>> table
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,2,4],[4,5,100]]
-        animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
-        >>> table.combine_chunks()
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,2,4,4,5,100]]
-        animals: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
-        """
-    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
-        """
-        Unify dictionaries across all chunks.
-
-        This method returns an equivalent table, but where all chunks of
-        each column share the same dictionary values.  Dictionary indices
-        are transposed accordingly.
-
-        Columns without dictionaries are returned unchanged.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
-        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
-        >>> c_arr = pa.chunked_array([arr_1, arr_2])
-        >>> table = pa.table([c_arr], names=["animals"])
-        >>> table
-        pyarrow.Table
-        animals: dictionary<values=string, indices=int32, ordered=0>
-        ----
-        animals: [  -- dictionary:
-        ["Flamingo","Parrot","Dog"]  -- indices:
-        [0,1,2],  -- dictionary:
-        ["Horse","Brittle stars","Centipede"]  -- indices:
-        [0,1,2]]
-
-        Unify dictionaries across both chunks:
-
-        >>> table.unify_dictionaries()
-        pyarrow.Table
-        animals: dictionary<values=string, indices=int32, ordered=0>
-        ----
-        animals: [  -- dictionary:
-        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
-        [0,1,2],  -- dictionary:
-        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
-        [3,4,5]]
-        """
-    def equals(self, other: Self, check_metadata: bool = False) -> Self:
-        """
-        Check if contents of two tables are equal.
-
-        Parameters
-        ----------
-        other : pyarrow.Table
-            Table to compare against.
-        check_metadata : bool, default False
-            Whether schema metadata equality should be checked as well.
-
-        Returns
-        -------
-        bool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> names = ["n_legs", "animals"]
-        >>> table = pa.Table.from_arrays([n_legs, animals], names=names)
-        >>> table_0 = pa.Table.from_arrays([])
-        >>> table_1 = pa.Table.from_arrays(
-        ...     [n_legs, animals], names=names, metadata={"n_legs": "Number of legs per animal"}
-        ... )
-        >>> table.equals(table)
-        True
-        >>> table.equals(table_0)
-        False
-        >>> table.equals(table_1)
-        True
-        >>> table.equals(table_1, check_metadata=True)
-        False
-        """
-    def cast(
-        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
-    ) -> Self:
-        """
-        Cast table values to another schema.
-
-        Parameters
-        ----------
-        target_schema : Schema
-            Schema to cast to, the names and order of fields must match.
-        safe : bool, default True
-            Check for overflows or other unsafe conversions.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
-
-        Define new schema and cast table values:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
-        ... )
-        >>> table.cast(target_schema=my_schema)
-        pyarrow.Table
-        n_legs: duration[s]
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
-    @classmethod
-    def from_pandas(
-        cls,
-        df: pd.DataFrame,
-        schema: Schema | None = None,
-        preserve_index: bool | None = None,
-        nthreads: int | None = None,
-        columns: list[str] | None = None,
-        safe: bool = True,
-    ) -> Self:
-        """
-        Convert pandas.DataFrame to an Arrow Table.
-
-        The column types in the resulting Arrow Table are inferred from the
-        dtypes of the pandas.Series in the DataFrame. In the case of non-object
-        Series, the NumPy dtype is translated to its Arrow equivalent. In the
-        case of `object`, we need to guess the datatype by looking at the
-        Python objects in this Series.
-
-        Be aware that Series of the `object` dtype don't carry enough
-        information to always lead to a meaningful Arrow type. In the case that
-        we cannot infer a type, e.g. because the DataFrame is of length 0 or
-        the Series only contains None/nan objects, the type is set to
-        null. This behavior can be avoided by constructing an explicit schema
-        and passing it to this function.
-
-        Parameters
-        ----------
-        df : pandas.DataFrame
-        schema : pyarrow.Schema, optional
-            The expected schema of the Arrow Table. This can be used to
-            indicate the type of columns if we cannot infer it automatically.
-            If passed, the output will have exactly this schema. Columns
-            specified in the schema that are not found in the DataFrame columns
-            or its index will raise an error. Additional columns or index
-            levels in the DataFrame which are not specified in the schema will
-            be ignored.
-        preserve_index : bool, optional
-            Whether to store the index as an additional column in the resulting
-            ``Table``. The default of None will store the index as a column,
-            except for RangeIndex which is stored as metadata only. Use
-            ``preserve_index=True`` to force it to be stored as a column.
-        nthreads : int, default None
-            If greater than 1, convert columns to Arrow in parallel using
-            indicated number of threads. By default, this follows
-            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
-        columns : list, optional
-           List of column to be converted. If None, use all columns.
-        safe : bool, default True
-           Check for overflows or other unsafe conversions.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> pa.Table.from_pandas(df)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
-    @classmethod
-    def from_arrays(
-        cls,
-        arrays: Collection[ArrayOrChunkedArray[Any]],
-        names: list[str] | None = None,
-        schema: Schema | None = None,
-        metadata: Mapping[str | bytes, str | bytes] | None = None,
-    ) -> Self:
-        """
-        Construct a Table from Arrow arrays.
-
-        Parameters
-        ----------
-        arrays : list of pyarrow.Array or pyarrow.ChunkedArray
-            Equal-length arrays that should form the table.
-        names : list of str, optional
-            Names for the table columns. If not passed, schema must be passed.
-        schema : Schema, default None
-            Schema for the created table. If not passed, names must be passed.
-        metadata : dict or Mapping, default None
-            Optional metadata for the schema (if inferred).
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 4, 5, 100])
-        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-        >>> names = ["n_legs", "animals"]
-
-        Construct a Table from arrays:
-
-        >>> pa.Table.from_arrays([n_legs, animals], names=names)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-        Construct a Table from arrays with metadata:
-
-        >>> my_metadata = {"n_legs": "Number of legs per animal"}
-        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-
-        Construct a Table from arrays with pyarrow schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"animals": "Name of the animal species"},
-        ... )
-        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        animals: 'Name of the animal species'
-        """
-    @classmethod
-    def from_struct_array(
-        cls, struct_array: StructArray | ChunkedArray[scalar.StructScalar]
-    ) -> Self:
-        """
-        Construct a Table from a StructArray.
-
-        Each field in the StructArray will become a column in the resulting
-        ``Table``.
-
-        Parameters
-        ----------
-        struct_array : StructArray or ChunkedArray
-            Array to construct the table from.
-
-        Returns
-        -------
-        pyarrow.Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
-        >>> pa.Table.from_struct_array(struct).to_pandas()
-          animals  n_legs    year
-        0  Parrot       2     NaN
-        1    None       4  2022.0
-        """
-    def to_struct_array(
-        self, max_chunksize: int | None = None
-    ) -> ChunkedArray[scalar.StructScalar]:
-        """
-        Convert to a chunked array of struct type.
-
-        Parameters
-        ----------
-        max_chunksize : int, default None
-            Maximum number of rows for ChunkedArray chunks. Individual chunks
-            may be smaller depending on the chunk layout of individual columns.
-
-        Returns
-        -------
-        ChunkedArray
-        """
-    @classmethod
-    def from_batches(cls, batches: Iterable[RecordBatch], schema: Schema | None = None) -> Self:
-        """
-        Construct a Table from a sequence or iterator of Arrow RecordBatches.
-
-        Parameters
-        ----------
-        batches : sequence or iterator of RecordBatch
-            Sequence of RecordBatch to be converted, all schemas must be equal.
-        schema : Schema, default None
-            If not passed, will be inferred from the first RecordBatch.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 4, 5, 100])
-        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-        >>> names = ["n_legs", "animals"]
-        >>> batch = pa.record_batch([n_legs, animals], names=names)
-        >>> batch.to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       4          Horse
-        2       5  Brittle stars
-        3     100      Centipede
-
-        Construct a Table from a RecordBatch:
-
-        >>> pa.Table.from_batches([batch])
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-        Construct a Table from a sequence of RecordBatches:
-
-        >>> pa.Table.from_batches([batch, batch])
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100],[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
-    def to_batches(self, max_chunksize: int | None = None) -> list[RecordBatch]:
-        """
-        Convert Table to a list of RecordBatch objects.
-
-        Note that this method is zero-copy, it merely exposes the same data
-        under a different API.
-
-        Parameters
-        ----------
-        max_chunksize : int, default None
-            Maximum number of rows for each RecordBatch chunk. Individual chunks
-            may be smaller depending on the chunk layout of individual columns.
-
-        Returns
-        -------
-        list[RecordBatch]
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Convert a Table to a RecordBatch:
-
-        >>> table.to_batches()[0].to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       4          Horse
-        2       5  Brittle stars
-        3     100      Centipede
-
-        Convert a Table to a list of RecordBatches:
-
-        >>> table.to_batches(max_chunksize=2)[0].to_pandas()
-           n_legs   animals
-        0       2  Flamingo
-        1       4     Horse
-        >>> table.to_batches(max_chunksize=2)[1].to_pandas()
-           n_legs        animals
-        0       5  Brittle stars
-        1     100      Centipede
-        """
-    def to_reader(self, max_chunksize: int | None = None) -> RecordBatchReader:
-        """
-        Convert the Table to a RecordBatchReader.
-
-        Note that this method is zero-copy, it merely exposes the same data
-        under a different API.
-
-        Parameters
-        ----------
-        max_chunksize : int, default None
-            Maximum number of rows for each RecordBatch chunk. Individual chunks
-            may be smaller depending on the chunk layout of individual columns.
-
-        Returns
-        -------
-        RecordBatchReader
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Convert a Table to a RecordBatchReader:
-
-        >>> table.to_reader()
-        <pyarrow.lib.RecordBatchReader object at ...>
-
-        >>> reader = table.to_reader()
-        >>> reader.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
-        >>> reader.read_all()
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
-    @property
-    def schema(self) -> Schema:
-        """
-        Schema of the table and its columns.
-
-        Returns
-        -------
-        Schema
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, "' ...
-        """
-    @property
-    def num_columns(self) -> int:
-        """
-        Number of columns in this table.
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.num_columns
-        2
-        """
-    @property
-    def num_rows(self) -> int:
-        """
-        Number of rows in this table.
-
-        Due to the definition of a table, all columns have the same number of
-        rows.
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.num_rows
-        4
-        """
-    @property
-    def nbytes(self) -> int:
-        """
-        Total number of bytes consumed by the elements of the table.
-
-        In other words, the sum of bytes from all buffer ranges referenced.
-
-        Unlike `get_total_buffer_size` this method will account for array
-        offsets.
-
-        If buffers are shared between arrays then the shared
-        portion will only be counted multiple times.
-
-        The dictionary of dictionary arrays will always be counted in their
-        entirety even if the array only references a portion of the dictionary.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.nbytes
-        72
-        """
-    def get_total_buffer_size(self) -> int:
-        """
-        The sum of bytes in each buffer referenced by the table.
-
-        An array may only reference a portion of a buffer.
-        This method will overestimate in this case and return the
-        byte size of the entire buffer.
-
-        If a buffer is referenced multiple times then it will
-        only be counted once.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.get_total_buffer_size()
-        76
-        """
-    def __sizeof__(self) -> int: ...
-    def add_column(
-        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
-    ) -> Self:
-        """
-        Add column to Table at position.
-
-        A new table is returned with the column added, the original table
-        object is left unchanged.
-
-        Parameters
-        ----------
-        i : int
-            Index to place the column at.
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array, list of Array, or values coercible to arrays
-            Column data.
-
-        Returns
-        -------
-        Table
-            New table with the passed column added.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Add column:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> table.add_column(0, "year", [year])
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2021,2022,2019,2021]]
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-        Original table is left unchanged:
-
-        >>> table
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
-    def remove_column(self, i: int) -> Self:
-        """
-        Create new Table with the indicated column removed.
-
-        Parameters
-        ----------
-        i : int
-            Index of column to remove.
-
-        Returns
-        -------
-        Table
-            New table without the column.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.remove_column(1)
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[2,4,5,100]]
-        """
-    def set_column(
-        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
-    ) -> Self:
-        """
-        Replace column in Table at position.
-
-        Parameters
-        ----------
-        i : int
-            Index to place the column at.
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array, list of Array, or values coercible to arrays
-            Column data.
-
-        Returns
-        -------
-        Table
-            New table with the passed column set.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Replace a column:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> table.set_column(1, "year", [year])
-        pyarrow.Table
-        n_legs: int64
-        year: int64
-        ----
-        n_legs: [[2,4,5,100]]
-        year: [[2021,2022,2019,2021]]
-        """
-    @overload
-    def rename_columns(self, names: list[str]) -> Self: ...
-    @overload
-    def rename_columns(self, names: dict[str, str]) -> Self: ...
-    def rename_columns(self, names):
-        """
-        Create new table with columns renamed to provided names.
-
-        Parameters
-        ----------
-        names : list[str] or dict[str, str]
-            List of new column names or mapping of old column names to new column names.
-
-            If a mapping of old to new column names is passed, then all columns which are
-            found to match a provided old column name will be renamed to the new column name.
-            If any column names are not found in the mapping, a KeyError will be raised.
-
-        Raises
-        ------
-        KeyError
-            If any of the column names passed in the names mapping do not exist.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> new_names = ["n", "name"]
-        >>> table.rename_columns(new_names)
-        pyarrow.Table
-        n: int64
-        name: string
-        ----
-        n: [[2,4,5,100]]
-        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        >>> new_names = {"n_legs": "n", "animals": "name"}
-        >>> table.rename_columns(new_names)
-        pyarrow.Table
-        n: int64
-        name: string
-        ----
-        n: [[2,4,5,100]]
-        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
-    def drop(self, columns: str | list[str]) -> Self:
-        """
-        Drop one or more columns and return a new table.
-
-        Alias of Table.drop_columns, but kept for backwards compatibility.
-
-        Parameters
-        ----------
-        columns : str or list[str]
-            Field name(s) referencing existing column(s).
-
-        Returns
-        -------
-        Table
-            New table without the column(s).
-        """
-    def group_by(self, keys: str | list[str], use_threads: bool = True) -> TableGroupBy:
-        """
-        Declare a grouping over the columns of the table.
-
-        Resulting grouping can then be used to perform aggregations
-        with a subsequent ``aggregate()`` method.
-
-        Parameters
-        ----------
-        keys : str or list[str]
-            Name of the columns that should be used as the grouping key.
-        use_threads : bool, default True
-            Whether to use multithreading or not. When set to True (the
-            default), no stable ordering of the output is guaranteed.
-
-        Returns
-        -------
-        TableGroupBy
-
-        See Also
-        --------
-        TableGroupBy.aggregate
-
-        Examples
-        --------
-        >>> import pandas as pd
-        >>> import pyarrow as pa
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.group_by("year").aggregate([("n_legs", "sum")])
-        pyarrow.Table
-        year: int64
-        n_legs_sum: int64
-        ----
-        year: [[2020,2022,2021,2019]]
-        n_legs_sum: [[2,6,104,5]]
-        """
-    def join(
-        self,
-        right_table: Self,
-        keys: str | list[str],
-        right_keys: str | list[str] | None = None,
-        join_type: JoinType = "left outer",
-        left_suffix: str | None = None,
-        right_suffix: str | None = None,
-        coalesce_keys: bool = True,
-        use_threads: bool = True,
-    ) -> Self:
-        """
-        Perform a join between this table and another one.
-
-        Result of the join will be a new Table, where further
-        operations can be applied.
-
-        Parameters
-        ----------
-        right_table : Table
-            The table to join to the current one, acting as the right table
-            in the join operation.
-        keys : str or list[str]
-            The columns from current table that should be used as keys
-            of the join operation left side.
-        right_keys : str or list[str], default None
-            The columns from the right_table that should be used as keys
-            on the join operation right side.
-            When ``None`` use the same key names as the left table.
-        join_type : str, default "left outer"
-            The kind of join that should be performed, one of
-            ("left semi", "right semi", "left anti", "right anti",
-            "inner", "left outer", "right outer", "full outer")
-        left_suffix : str, default None
-            Which suffix to add to left column names. This prevents confusion
-            when the columns in left and right tables have colliding names.
-        right_suffix : str, default None
-            Which suffix to add to the right column names. This prevents confusion
-            when the columns in left and right tables have colliding names.
-        coalesce_keys : bool, default True
-            If the duplicated keys should be omitted from one of the sides
-            in the join result.
-        use_threads : bool, default True
-            Whether to use multithreading or not.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pandas as pd
-        >>> import pyarrow as pa
-        >>> df1 = pd.DataFrame({"id": [1, 2, 3], "year": [2020, 2022, 2019]})
-        >>> df2 = pd.DataFrame(
-        ...     {"id": [3, 4], "n_legs": [5, 100], "animal": ["Brittle stars", "Centipede"]}
-        ... )
-        >>> t1 = pa.Table.from_pandas(df1)
-        >>> t2 = pa.Table.from_pandas(df2)
-
-        Left outer join:
-
-        >>> t1.join(t2, "id").combine_chunks().sort_by("year")
-        pyarrow.Table
-        id: int64
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        id: [[3,1,2]]
-        year: [[2019,2020,2022]]
-        n_legs: [[5,null,null]]
-        animal: [["Brittle stars",null,null]]
-
-        Full outer join:
-
-        >>> t1.join(t2, "id", join_type="full outer").combine_chunks().sort_by("year")
-        pyarrow.Table
-        id: int64
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        id: [[3,1,2,4]]
-        year: [[2019,2020,2022,null]]
-        n_legs: [[5,null,null,100]]
-        animal: [["Brittle stars",null,null,"Centipede"]]
-
-        Right outer join:
-
-        >>> t1.join(t2, "id", join_type="right outer").combine_chunks().sort_by("year")
-        pyarrow.Table
-        year: int64
-        id: int64
-        n_legs: int64
-        animal: string
-        ----
-        year: [[2019,null]]
-        id: [[3,4]]
-        n_legs: [[5,100]]
-        animal: [["Brittle stars","Centipede"]]
-
-        Right anti join
-
-        >>> t1.join(t2, "id", join_type="right anti")
-        pyarrow.Table
-        id: int64
-        n_legs: int64
-        animal: string
-        ----
-        id: [[4]]
-        n_legs: [[100]]
-        animal: [["Centipede"]]
-        """
-    def join_asof(
-        self,
-        right_table: Self,
-        on: str,
-        by: str | list[str],
-        tolerance: int,
-        right_on: str | list[str] | None = None,
-        right_by: str | list[str] | None = None,
-    ) -> Self:
-        """
-        Perform an asof join between this table and another one.
-
-        This is similar to a left-join except that we match on nearest key rather
-        than equal keys. Both tables must be sorted by the key. This type of join
-        is most useful for time series data that are not perfectly aligned.
-
-        Optionally match on equivalent keys with "by" before searching with "on".
-
-        Result of the join will be a new Table, where further
-        operations can be applied.
-
-        Parameters
-        ----------
-        right_table : Table
-            The table to join to the current one, acting as the right table
-            in the join operation.
-        on : str
-            The column from current table that should be used as the "on" key
-            of the join operation left side.
-
-            An inexact match is used on the "on" key, i.e. a row is considered a
-            match if and only if left_on - tolerance <= right_on <= left_on.
-
-            The input dataset must be sorted by the "on" key. Must be a single
-            field of a common type.
-
-            Currently, the "on" key must be an integer, date, or timestamp type.
-        by : str or list[str]
-            The columns from current table that should be used as the keys
-            of the join operation left side. The join operation is then done
-            only for the matches in these columns.
-        tolerance : int
-            The tolerance for inexact "on" key matching. A right row is considered
-            a match with the left row ``right.on - left.on <= tolerance``. The
-            ``tolerance`` may be:
-
-            - negative, in which case a past-as-of-join occurs;
-            - or positive, in which case a future-as-of-join occurs;
-            - or zero, in which case an exact-as-of-join occurs.
-
-            The tolerance is interpreted in the same units as the "on" key.
-        right_on : str or list[str], default None
-            The columns from the right_table that should be used as the on key
-            on the join operation right side.
-            When ``None`` use the same key name as the left table.
-        right_by : str or list[str], default None
-            The columns from the right_table that should be used as keys
-            on the join operation right side.
-            When ``None`` use the same key names as the left table.
-
-        Returns
-        -------
-        Table
-
-        Example
-        --------
-        >>> import pyarrow as pa
-        >>> t1 = pa.table({"id": [1, 3, 2, 3, 3], "year": [2020, 2021, 2022, 2022, 2023]})
-        >>> t2 = pa.table(
-        ...     {
-        ...         "id": [3, 4],
-        ...         "year": [2020, 2021],
-        ...         "n_legs": [5, 100],
-        ...         "animal": ["Brittle stars", "Centipede"],
-        ...     }
-        ... )
-
-        >>> t1.join_asof(t2, on="year", by="id", tolerance=-2)
-        pyarrow.Table
-        id: int64
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        id: [[1,3,2,3,3]]
-        year: [[2020,2021,2022,2022,2023]]
-        n_legs: [[null,5,null,5,null]]
-        animal: [[null,"Brittle stars",null,"Brittle stars",null]]
-        """
-    def __arrow_c_stream__(self, requested_schema=None):
-        """
-        Export the table as an Arrow C stream PyCapsule.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule, default None
-            The schema to which the stream should be casted, passed as a
-            PyCapsule containing a C ArrowSchema representation of the
-            requested schema.
-            Currently, this is not supported and will raise a
-            NotImplementedError if the schema doesn't match the current schema.
-
-        Returns
-        -------
-        PyCapsule
-        """
-    @property
-    def is_cpu(self) -> bool:
-        """
-        Whether all ChunkedArrays are CPU-accessible.
-        """
-
-def record_batch(
-    data: list[ArrayOrChunkedArray[Any]]
-    | dict[str, list[Any] | Array[Any]]
-    | Iterable[Array[Any]]
-    | pd.DataFrame
-    | SupportArrowArray
-    | SupportArrowDeviceArray,
-    names: list[str] | None = None,
-    schema: Schema | None = None,
-    metadata: Mapping[str | bytes, str | bytes] | None = None,
-) -> RecordBatch:
-    """
-    Create a pyarrow.RecordBatch from another Python data structure or sequence
-    of arrays.
-
-    Parameters
-    ----------
-    data : dict, list, pandas.DataFrame, Arrow-compatible table
-        A mapping of strings to Arrays or Python lists, a list of Arrays,
-        a pandas DataFame, or any tabular object implementing the
-        Arrow PyCapsule Protocol (has an ``__arrow_c_array__`` or
-        ``__arrow_c_device_array__`` method).
-    names : list, default None
-        Column names if list of arrays passed as data. Mutually exclusive with
-        'schema' argument.
-    schema : Schema, default None
-        The expected schema of the RecordBatch. If not passed, will be inferred
-        from the data. Mutually exclusive with 'names' argument.
-    metadata : dict or Mapping, default None
-        Optional metadata for the schema (if schema not passed).
-
-    Returns
-    -------
-    RecordBatch
-
-    See Also
-    --------
-    RecordBatch.from_arrays, RecordBatch.from_pandas, table
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
-    >>> names = ["n_legs", "animals"]
-
-    Construct a RecordBatch from a python dictionary:
-
-    >>> pa.record_batch({"n_legs": n_legs, "animals": animals})
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,2,4,4,5,100]
-    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-    >>> pa.record_batch({"n_legs": n_legs, "animals": animals}).to_pandas()
-       n_legs        animals
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-
-    Creating a RecordBatch from a list of arrays with names:
-
-    >>> pa.record_batch([n_legs, animals], names=names)
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,2,4,4,5,100]
-    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-
-    Creating a RecordBatch from a list of arrays with names and metadata:
-
-    >>> my_metadata = {"n_legs": "How many legs does an animal have?"}
-    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata)
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,2,4,4,5,100]
-    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'How many legs does an animal have?'
-
-    Creating a RecordBatch from a pandas DataFrame:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022],
-    ...         "month": [3, 5, 7, 9],
-    ...         "day": [1, 5, 9, 13],
-    ...         "n_legs": [2, 4, 5, 100],
-    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> pa.record_batch(df)
-    pyarrow.RecordBatch
-    year: int64
-    month: int64
-    day: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [2020,2022,2021,2022]
-    month: [3,5,7,9]
-    day: [1,5,9,13]
-    n_legs: [2,4,5,100]
-    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-
-    >>> pa.record_batch(df).to_pandas()
-       year  month  day  n_legs        animals
-    0  2020      3    1       2       Flamingo
-    1  2022      5    5       4          Horse
-    2  2021      7    9       5  Brittle stars
-    3  2022      9   13     100      Centipede
-
-    Creating a RecordBatch from a pandas DataFrame with schema:
-
-    >>> my_schema = pa.schema(
-    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-    ...     metadata={"n_legs": "Number of legs per animal"},
-    ... )
-    >>> pa.record_batch(df, my_schema).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'Number of legs per animal'
-    pandas: ...
-    >>> pa.record_batch(df, my_schema).to_pandas()
-       n_legs        animals
-    0       2       Flamingo
-    1       4          Horse
-    2       5  Brittle stars
-    3     100      Centipede
-    """
-
-@overload
-def table(
-    data: dict[str, list[Any] | Array[Any]],
-    schema: Schema | None = None,
-    metadata: Mapping[str | bytes, str | bytes] | None = None,
-    nthreads: int | None = None,
-) -> Table: ...
-@overload
-def table(
-    data: Collection[ArrayOrChunkedArray[Any]]
-    | pd.DataFrame
-    | SupportArrowArray
-    | SupportArrowStream
-    | SupportArrowDeviceArray,
-    names: list[str] | None = None,
-    schema: Schema | None = None,
-    metadata: Mapping[str | bytes, str | bytes] | None = None,
-    nthreads: int | None = None,
-) -> Table: ...
-def table(*args, **kwargs):
-    """
-    Create a pyarrow.Table from a Python data structure or sequence of arrays.
-
-    Parameters
-    ----------
-    data : dict, list, pandas.DataFrame, Arrow-compatible table
-        A mapping of strings to Arrays or Python lists, a list of arrays or
-        chunked arrays, a pandas DataFame, or any tabular object implementing
-        the Arrow PyCapsule Protocol (has an ``__arrow_c_array__``,
-        ``__arrow_c_device_array__`` or ``__arrow_c_stream__`` method).
-    names : list, default None
-        Column names if list of arrays passed as data. Mutually exclusive with
-        'schema' argument.
-    schema : Schema, default None
-        The expected schema of the Arrow Table. If not passed, will be inferred
-        from the data. Mutually exclusive with 'names' argument.
-        If passed, the output will have exactly this schema (raising an error
-        when columns are not found in the data and ignoring additional data not
-        specified in the schema, when data is a dict or DataFrame).
-    metadata : dict or Mapping, default None
-        Optional metadata for the schema (if schema not passed).
-    nthreads : int, default None
-        For pandas.DataFrame inputs: if greater than 1, convert columns to
-        Arrow in parallel using indicated number of threads. By default,
-        this follows :func:`pyarrow.cpu_count` (may use up to system CPU count
-        threads).
-
-    Returns
-    -------
-    Table
-
-    See Also
-    --------
-    Table.from_arrays, Table.from_pandas, Table.from_pydict
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> n_legs = pa.array([2, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-    >>> names = ["n_legs", "animals"]
-
-    Construct a Table from a python dictionary:
-
-    >>> pa.table({"n_legs": n_legs, "animals": animals})
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from arrays:
-
-    >>> pa.table([n_legs, animals], names=names)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from arrays with metadata:
-
-    >>> my_metadata = {"n_legs": "Number of legs per animal"}
-    >>> pa.table([n_legs, animals], names=names, metadata=my_metadata).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'Number of legs per animal'
-
-    Construct a Table from pandas DataFrame:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "year": [2020, 2022, 2019, 2021],
-    ...         "n_legs": [2, 4, 5, 100],
-    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> pa.table(df)
-    pyarrow.Table
-    year: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [[2020,2022,2019,2021]]
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from pandas DataFrame with pyarrow schema:
-
-    >>> my_schema = pa.schema(
-    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-    ...     metadata={"n_legs": "Number of legs per animal"},
-    ... )
-    >>> pa.table(df, my_schema).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'Number of legs per animal'
-    pandas: '{"index_columns": [], "column_indexes": [{"name": null, ...
-
-    Construct a Table from chunked arrays:
-
-    >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-    >>> animals = pa.chunked_array(
-    ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
-    ... )
-    >>> table = pa.table([n_legs, animals], names=names)
-    >>> table
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,2,4],[4,5,100]]
-    animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
-    """
-
-def concat_tables(
-    tables: Iterable[Table],
-    memory_pool: MemoryPool | None = None,
-    promote_options: Literal["none", "default", "permissive"] = "none",
-    **kwargs: Any,
-) -> Table:
-    """
-    Concatenate pyarrow.Table objects.
-
-    If promote_options="none", a zero-copy concatenation will be performed. The schemas
-    of all the Tables must be the same (except the metadata), otherwise an
-    exception will be raised. The result Table will share the metadata with the
-    first table.
-
-    If promote_options="default", any null type arrays will be casted to the type of other
-    arrays in the column of the same name. If a table is missing a particular
-    field, null values of the appropriate type will be generated to take the
-    place of the missing field. The new schema will share the metadata with the
-    first table. Each field in the new schema will share the metadata with the
-    first table which has the field defined. Note that type promotions may
-    involve additional allocations on the given ``memory_pool``.
-
-    If promote_options="permissive", the behavior of default plus types will be promoted
-    to the common denominator that fits all the fields.
-
-    Parameters
-    ----------
-    tables : iterable of pyarrow.Table objects
-        Pyarrow tables to concatenate into a single Table.
-    memory_pool : MemoryPool, default None
-        For memory allocations, if required, otherwise use default pool.
-    promote_options : str, default none
-        Accepts strings "none", "default" and "permissive".
-    **kwargs : dict, optional
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> t1 = pa.table(
-    ...     [
-    ...         pa.array([2, 4, 5, 100]),
-    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
-    ...     ],
-    ...     names=["n_legs", "animals"],
-    ... )
-    >>> t2 = pa.table([pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"])
-    >>> pa.concat_tables([t1, t2])
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100],[2,4]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Parrot","Dog"]]
-
-    """
-
-class TableGroupBy:
-    """
-    A grouping of columns in a table on which to perform aggregations.
-
-    Parameters
-    ----------
-    table : pyarrow.Table
-        Input table to execute the aggregation on.
-    keys : str or list[str]
-        Name of the grouped columns.
-    use_threads : bool, default True
-        Whether to use multithreading or not. When set to True (the default),
-        no stable ordering of the output is guaranteed.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> t = pa.table(
-    ...     [
-    ...         pa.array(["a", "a", "b", "b", "c"]),
-    ...         pa.array([1, 2, 3, 4, 5]),
-    ...     ],
-    ...     names=["keys", "values"],
-    ... )
-
-    Grouping of columns:
-
-    >>> pa.TableGroupBy(t, "keys")
-    <pyarrow.lib.TableGroupBy object at ...>
-
-    Perform aggregations:
-
-    >>> pa.TableGroupBy(t, "keys").aggregate([("values", "sum")])
-    pyarrow.Table
-    keys: string
-    values_sum: int64
-    ----
-    keys: [["a","b","c"]]
-    values_sum: [[3,7,5]]
-    """
-
-    keys: str | list[str]
-    def __init__(self, table: Table, keys: str | list[str], use_threads: bool = True): ...
-    def aggregate(
-        self,
-        aggregations: Iterable[
-            tuple[ColumnSelector, Aggregation]
-            | tuple[ColumnSelector, Aggregation, AggregateOptions | None]
-        ],
-    ) -> Table:
-        """
-        Perform an aggregation over the grouped columns of the table.
-
-        Parameters
-        ----------
-        aggregations : list[tuple(str, str)] or \
-list[tuple(str, str, FunctionOptions)]
-            List of tuples, where each tuple is one aggregation specification
-            and consists of: aggregation column name followed
-            by function name and optionally aggregation function option.
-            Pass empty list to get a single row for each group.
-            The column name can be a string, an empty list or a list of
-            column names, for unary, nullary and n-ary aggregation functions
-            respectively.
-
-            For the list of function names and respective aggregation
-            function options see :ref:`py-grouped-aggrs`.
-
-        Returns
-        -------
-        Table
-            Results of the aggregation functions.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.table([
-        ...       pa.array(["a", "a", "b", "b", "c"]),
-        ...       pa.array([1, 2, 3, 4, 5]),
-        ... ], names=["keys", "values"])
-
-        Sum the column "values" over the grouped column "keys":
-
-        >>> t.group_by("keys").aggregate([("values", "sum")])
-        pyarrow.Table
-        keys: string
-        values_sum: int64
-        ----
-        keys: [["a","b","c"]]
-        values_sum: [[3,7,5]]
-
-        Count the rows over the grouped column "keys":
-
-        >>> t.group_by("keys").aggregate([([], "count_all")])
-        pyarrow.Table
-        keys: string
-        count_all: int64
-        ----
-        keys: [["a","b","c"]]
-        count_all: [[2,2,1]]
-
-        Do multiple aggregations:
-
-        >>> t.group_by("keys").aggregate([
-        ...    ("values", "sum"),
-        ...    ("keys", "count")
-        ... ])
-        pyarrow.Table
-        keys: string
-        values_sum: int64
-        keys_count: int64
-        ----
-        keys: [["a","b","c"]]
-        values_sum: [[3,7,5]]
-        keys_count: [[2,2,1]]
-
-        Count the number of non-null values for column "values"
-        over the grouped column "keys":
-
-        >>> import pyarrow.compute as pc
-        >>> t.group_by(["keys"]).aggregate([
-        ...    ("values", "count", pc.CountOptions(mode="only_valid"))
-        ... ])
-        pyarrow.Table
-        keys: string
-        values_count: int64
-        ----
-        keys: [["a","b","c"]]
-        values_count: [[2,2,1]]
-
-        Get a single row for each group in column "keys":
-
-        >>> t.group_by("keys").aggregate([])
-        pyarrow.Table
-        keys: string
-        ----
-        keys: [["a","b","c"]]
-        """
-    def _table(self) -> Table: ...
-    @property
-    def _use_threads(self) -> bool: ...
-
-def concat_batches(
-    recordbatches: Iterable[RecordBatch], memory_pool: MemoryPool | None = None
-) -> RecordBatch:
-    """
-    Concatenate pyarrow.RecordBatch objects.
-
-    All recordbatches must share the same Schema,
-    the operation implies a copy of the data to merge
-    the arrays of the different RecordBatches.
-
-    Parameters
-    ----------
-    recordbatches : iterable of pyarrow.RecordBatch objects
-        Pyarrow record batches to concatenate into a single RecordBatch.
-    memory_pool : MemoryPool, default None
-        For memory allocations, if required, otherwise use default pool.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> t1 = pa.record_batch(
-    ...     [
-    ...         pa.array([2, 4, 5, 100]),
-    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
-    ...     ],
-    ...     names=["n_legs", "animals"],
-    ... )
-    >>> t2 = pa.record_batch(
-    ...     [pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"]
-    ... )
-    >>> pa.concat_batches([t1, t2])
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,4,5,100,2,4]
-    animals: ["Flamingo","Horse","Brittle stars","Centipede","Parrot","Dog"]
-
-    """
-
-__all__ = [
-    "ChunkedArray",
-    "chunked_array",
-    "_Tabular",
-    "RecordBatch",
-    "table_to_blocks",
-    "Table",
-    "record_batch",
-    "table",
-    "concat_tables",
-    "TableGroupBy",
-    "concat_batches",
-]
diff --git a/python/pyarrow/_azurefs.pyi b/python/pyarrow/_azurefs.pyi
deleted file mode 100644
index b9a83f01c56..00000000000
--- a/python/pyarrow/_azurefs.pyi
+++ /dev/null
@@ -1,91 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Literal
-
-from ._fs import FileSystem
-
-class AzureFileSystem(FileSystem):
-    """
-    Azure Blob Storage backed FileSystem implementation
-
-    This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a.
-    Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific
-    features will be used when they provide a performance advantage. Azurite emulator is
-    also supported. Note: `/` is the only supported delimiter.
-
-    The storage account is considered the root of the filesystem. When enabled, containers
-    will be created or deleted during relevant directory operations. Obviously, this also
-    requires authentication with the additional permissions.
-
-    By default `DefaultAzureCredential <https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
-    is used for authentication. This means it will try several types of authentication
-    and go with the first one that works. If any authentication parameters are provided when
-    initialising the FileSystem, they will be used instead of the default credential.
-
-    Parameters
-    ----------
-    account_name : str
-        Azure Blob Storage account name. This is the globally unique identifier for the
-        storage account.
-    account_key : str, default None
-        Account key of the storage account. If sas_token and account_key are None the
-        default credential will be used. The parameters account_key and sas_token are
-        mutually exclusive.
-    blob_storage_authority : str, default None
-        hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful
-        for connecting to a local emulator, like Azurite.
-    dfs_storage_authority : str, default None
-        hostname[:port] of the Data Lake Gen 2 Service. Defaults to
-        `.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite.
-    blob_storage_scheme : str, default None
-        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
-        emulator, like Azurite.
-    dfs_storage_scheme : str, default None
-        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
-        emulator, like Azurite.
-    sas_token : str, default None
-        SAS token for the storage account, used as an alternative to account_key. If sas_token
-        and account_key are None the default credential will be used. The parameters
-        account_key and sas_token are mutually exclusive.
-
-    Examples
-    --------
-    >>> from pyarrow import fs
-    >>> azure_fs = fs.AzureFileSystem(account_name="myaccount")
-    >>> azurite_fs = fs.AzureFileSystem(
-    ...     account_name="devstoreaccount1",
-    ...     account_key="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
-    ...     blob_storage_authority="127.0.0.1:10000",
-    ...     dfs_storage_authority="127.0.0.1:10000",
-    ...     blob_storage_scheme="http",
-    ...     dfs_storage_scheme="http",
-    ... )
-
-    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
-    """
-
-    def __init__(
-        self,
-        account_name: str,
-        account_key: str | None = None,
-        blob_storage_authority: str | None = None,
-        dfs_storage_authority: str | None = None,
-        blob_storage_schema: Literal["http", "https"] = "https",
-        dfs_storage_schema: Literal["http", "https"] = "https",
-        sas_token: str | None = None,
-    ) -> None: ...
diff --git a/python/pyarrow/_compute.pyi b/python/pyarrow/_compute.pyi
deleted file mode 100644
index fa80304cf91..00000000000
--- a/python/pyarrow/_compute.pyi
+++ /dev/null
@@ -1,1768 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import (
-    Any,
-    Callable,
-    Iterable,
-    Literal,
-    Sequence,
-    TypeAlias,
-    TypedDict,
-    overload,
-)
-
-from . import lib
-from .compute import _NumericScalarT
-
-_Order: TypeAlias = Literal["ascending", "descending"]
-_Placement: TypeAlias = Literal["at_start", "at_end"]
-
-class Kernel(lib._Weakrefable):
-    """
-    A kernel object.
-
-    Kernels handle the execution of a Function for a certain signature.
-    """
-
-class Function(lib._Weakrefable):
-    """
-    A compute function.
-
-    A function implements a certain logical computation over a range of
-    possible input signatures.  Each signature accepts a range of input
-    types and is implemented by a given Kernel.
-
-    Functions can be of different kinds:
-
-    * "scalar" functions apply an item-wise computation over all items
-      of their inputs.  Each item in the output only depends on the values
-      of the inputs at the same position.  Examples: addition, comparisons,
-      string predicates...
-
-    * "vector" functions apply a collection-wise computation, such that
-      each item in the output may depend on the values of several items
-      in each input.  Examples: dictionary encoding, sorting, extracting
-      unique values...
-
-    * "scalar_aggregate" functions reduce the dimensionality of the inputs by
-      applying a reduction function.  Examples: sum, min_max, mode...
-
-    * "hash_aggregate" functions apply a reduction function to an input
-      subdivided by grouping criteria.  They may not be directly called.
-      Examples: hash_sum, hash_min_max...
-
-    * "meta" functions dispatch to other functions.
-    """
-    @property
-    def arity(self) -> int:
-        """
-        The function arity.
-
-        If Ellipsis (i.e. `...`) is returned, the function takes a variable
-        number of arguments.
-        """
-    @property
-    def kind(
-        self,
-    ) -> Literal["scalar", "vector", "scalar_aggregate", "hash_aggregate", "meta"]:
-        """
-        The function kind.
-        """
-    @property
-    def name(self) -> str:
-        """
-        The function name.
-        """
-    @property
-    def num_kernels(self) -> int:
-        """
-        The number of kernels implementing this function.
-        """
-    @property
-    def kernels(self) -> list[ScalarKernel]:
-        """
-        A list of all kernels implementing this function.
-        """
-    def call(
-        self,
-        args: Iterable,
-        options: FunctionOptions | None = None,
-        memory_pool: lib.MemoryPool | None = None,
-        length: int | None = None,
-    ) -> Any:
-        """
-        Call the function on the given arguments.
-
-        Parameters
-        ----------
-        args : iterable
-            The arguments to pass to the function.  Accepted types depend
-            on the specific function.
-        options : FunctionOptions, optional
-            Options instance for executing this function.  This should have
-            the right concrete options type.
-        memory_pool : pyarrow.MemoryPool, optional
-            If not passed, will allocate memory from the default memory pool.
-        length : int, optional
-            Batch size for execution, for nullary (no argument) functions. If
-            not passed, will be inferred from passed data.
-        """
-
-class FunctionOptions(lib._Weakrefable):
-    def serialize(self) -> lib.Buffer: ...
-    @classmethod
-    def deserialize(cls, buf: lib.Buffer) -> FunctionOptions: ...
-
-class FunctionRegistry(lib._Weakrefable):
-    def get_function(self, name: str) -> Function:
-        """
-        Look up a function by name in the registry.
-
-        Parameters
-        ----------
-        name : str
-            The name of the function to lookup
-        """
-
-    def list_functions(self) -> list[str]:
-        """
-        Return all function names in the registry.
-        """
-
-class HashAggregateFunction(Function): ...
-class HashAggregateKernel(Kernel): ...
-class ScalarAggregateFunction(Function): ...
-class ScalarAggregateKernel(Kernel): ...
-class ScalarFunction(Function): ...
-class ScalarKernel(Kernel): ...
-class VectorFunction(Function): ...
-class VectorKernel(Kernel): ...
-
-# ==================== _compute.pyx Option classes ====================
-class ArraySortOptions(FunctionOptions):
-    """
-    Options for the `array_sort_indices` function.
-
-    Parameters
-    ----------
-    order : str, default "ascending"
-        Which order to sort values in.
-        Accepted values are "ascending", "descending".
-    null_placement : str, default "at_end"
-        Where nulls in the input should be sorted.
-        Accepted values are "at_start", "at_end".
-    """
-    def __init__(
-        self,
-        order: _Order = "ascending",
-        null_placement: _Placement = "at_end",
-    ) -> None: ...
-
-class AssumeTimezoneOptions(FunctionOptions):
-    """
-    Options for the `assume_timezone` function.
-
-    Parameters
-    ----------
-    timezone : str
-        Timezone to assume for the input.
-    ambiguous : str, default "raise"
-        How to handle timestamps that are ambiguous in the assumed timezone.
-        Accepted values are "raise", "earliest", "latest".
-    nonexistent : str, default "raise"
-        How to handle timestamps that don't exist in the assumed timezone.
-        Accepted values are "raise", "earliest", "latest".
-    """
-
-    def __init__(
-        self,
-        timezone: str,
-        *,
-        ambiguous: Literal["raise", "earliest", "latest"] = "raise",
-        nonexistent: Literal["raise", "earliest", "latest"] = "raise",
-    ) -> None: ...
-
-class CastOptions(FunctionOptions):
-    """
-    Options for the `cast` function.
-
-    Parameters
-    ----------
-    target_type : DataType, optional
-        The PyArrow type to cast to.
-    allow_int_overflow : bool, default False
-        Whether integer overflow is allowed when casting.
-    allow_time_truncate : bool, default False
-        Whether time precision truncation is allowed when casting.
-    allow_time_overflow : bool, default False
-        Whether date/time range overflow is allowed when casting.
-    allow_decimal_truncate : bool, default False
-        Whether decimal precision truncation is allowed when casting.
-    allow_float_truncate : bool, default False
-        Whether floating-point precision truncation is allowed when casting.
-    allow_invalid_utf8 : bool, default False
-        Whether producing invalid utf8 data is allowed when casting.
-    """
-
-    allow_int_overflow: bool
-    allow_time_truncate: bool
-    allow_time_overflow: bool
-    allow_decimal_truncate: bool
-    allow_float_truncate: bool
-    allow_invalid_utf8: bool
-
-    def __init__(
-        self,
-        target_type: lib.DataType | None = None,
-        *,
-        allow_int_overflow: bool | None = None,
-        allow_time_truncate: bool | None = None,
-        allow_time_overflow: bool | None = None,
-        allow_decimal_truncate: bool | None = None,
-        allow_float_truncate: bool | None = None,
-        allow_invalid_utf8: bool | None = None,
-    ) -> None: ...
-    @staticmethod
-    def safe(target_type: lib.DataType | None = None) -> CastOptions: ...
-    @staticmethod
-    def unsafe(target_type: lib.DataType | None = None) -> CastOptions: ...
-    def is_safe(self) -> bool: ...
-
-class CountOptions(FunctionOptions):
-    """
-    Options for the `count` function.
-
-    Parameters
-    ----------
-    mode : str, default "only_valid"
-        Which values to count in the input.
-        Accepted values are "only_valid", "only_null", "all".
-    """
-    def __init__(self, mode: Literal["only_valid", "only_null", "all"] = "only_valid") -> None: ...
-
-class CumulativeOptions(FunctionOptions):
-    """
-    Options for `cumulative_*` functions.
-
-    - cumulative_sum
-    - cumulative_sum_checked
-    - cumulative_prod
-    - cumulative_prod_checked
-    - cumulative_max
-    - cumulative_min
-
-    Parameters
-    ----------
-    start : Scalar, default None
-        Starting value for the cumulative operation. If none is given,
-        a default value depending on the operation and input type is used.
-    skip_nulls : bool, default False
-        When false, the first encountered null is propagated.
-    """
-    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
-
-class CumulativeSumOptions(FunctionOptions):
-    """
-    Options for `cumulative_sum` function.
-
-    Parameters
-    ----------
-    start : Scalar, default None
-        Starting value for sum computation
-    skip_nulls : bool, default False
-        When false, the first encountered null is propagated.
-    """
-    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
-
-class DayOfWeekOptions(FunctionOptions):
-    """
-    Options for the `day_of_week` function.
-
-    Parameters
-    ----------
-    count_from_zero : bool, default True
-        If True, number days from 0, otherwise from 1.
-    week_start : int, default 1
-        Which day does the week start with (Monday=1, Sunday=7).
-        How this value is numbered is unaffected by `count_from_zero`.
-    """
-
-    def __init__(self, *, count_from_zero: bool = True, week_start: int = 1) -> None: ...
-
-class DictionaryEncodeOptions(FunctionOptions):
-    """
-    Options for dictionary encoding.
-
-    Parameters
-    ----------
-    null_encoding : str, default "mask"
-        How to encode nulls in the input.
-        Accepted values are "mask" (null inputs emit a null in the indices
-        array), "encode" (null inputs emit a non-null index pointing to
-        a null value in the dictionary array).
-    """
-    def __init__(self, null_encoding: Literal["mask", "encode"] = "mask") -> None: ...
-
-class RunEndEncodeOptions(FunctionOptions):
-    """
-    Options for run-end encoding.
-
-    Parameters
-    ----------
-    run_end_type : DataType, default pyarrow.int32()
-        The data type of the run_ends array.
-
-        Accepted values are pyarrow.{int16(), int32(), int64()}.
-    """
-    # TODO: default is DataType(int32)
-    def __init__(self, run_end_type: lib.DataType | Literal["int16","int32","int64"] = Literal["int32"]) -> None: ...
-
-class ElementWiseAggregateOptions(FunctionOptions):
-    """
-    Options for element-wise aggregate functions.
-
-    Parameters
-    ----------
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    """
-    def __init__(self, *, skip_nulls: bool = True) -> None: ...
-
-class ExtractRegexOptions(FunctionOptions):
-    """
-    Options for the `extract_regex` function.
-
-    Parameters
-    ----------
-    pattern : str
-        Regular expression with named capture fields.
-    """
-    def __init__(self, pattern: str) -> None: ...
-
-class ExtractRegexSpanOptions(FunctionOptions):
-    """
-    Options for the `extract_regex_span` function.
-
-    Parameters
-    ----------
-    pattern : str
-        Regular expression with named capture fields.
-    """
-    def __init__(self, pattern: str) -> None: ...
-
-class FilterOptions(FunctionOptions):
-    """
-    Options for selecting with a boolean filter.
-
-    Parameters
-    ----------
-    null_selection_behavior : str, default "drop"
-        How to handle nulls in the selection filter.
-        Accepted values are "drop", "emit_null".
-    """
-
-    def __init__(self, null_selection_behavior: Literal["drop", "emit_null"] = "drop") -> None: ...
-
-class IndexOptions(FunctionOptions):
-    """
-    Options for the `index` function.
-
-    Parameters
-    ----------
-    value : Scalar
-        The value to search for.
-    """
-    def __init__(self, value: lib.Scalar) -> None: ...
-
-class JoinOptions(FunctionOptions):
-    """
-    Options for the `binary_join_element_wise` function.
-
-    Parameters
-    ----------
-    null_handling : str, default "emit_null"
-        How to handle null values in the inputs.
-        Accepted values are "emit_null", "skip", "replace".
-    null_replacement : str, default ""
-        Replacement string to emit for null inputs if `null_handling`
-        is "replace".
-    """
-    @overload
-    def __init__(self, null_handling: Literal["emit_null", "skip"] = "emit_null") -> None: ...
-    @overload
-    def __init__(self, null_handling: Literal["replace"], null_replacement: str = "") -> None: ...
-
-class ListSliceOptions(FunctionOptions):
-    """
-    Options for list array slicing.
-
-    Parameters
-    ----------
-    start : int
-        Index to start slicing inner list elements (inclusive).
-    stop : Optional[int], default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end. (NotImplemented)
-    step : int, default 1
-        Slice step.
-    return_fixed_size_list : Optional[bool], default None
-        Whether to return a FixedSizeListArray. If true _and_ stop is after
-        a list element's length, nulls will be appended to create the
-        requested slice size. The default of `None` will return the same
-        type which was passed in.
-    """
-    def __init__(
-        self,
-        start: int,
-        stop: int | None = None,
-        step: int = 1,
-        return_fixed_size_list: bool | None = None,
-    ) -> None: ...
-
-class ListFlattenOptions(FunctionOptions):
-    """
-    Options for `list_flatten` function
-
-    Parameters
-    ----------
-    recursive : bool, default False
-        When True, the list array is flattened recursively until an array
-        of non-list values is formed.
-    """
-    def __init__(self, recursive: bool = False) -> None: ...
-
-class MakeStructOptions(FunctionOptions):
-    """
-    Options for the `make_struct` function.
-
-    Parameters
-    ----------
-    field_names : sequence of str
-        Names of the struct fields to create.
-    field_nullability : sequence of bool, optional
-        Nullability information for each struct field.
-        If omitted, all fields are nullable.
-    field_metadata : sequence of KeyValueMetadata, optional
-        Metadata for each struct field.
-    """
-    def __init__(
-        self,
-        field_names: Sequence[str] = (),
-        *,
-        field_nullability: Sequence[bool] | None = None,
-        field_metadata: Sequence[lib.KeyValueMetadata] | None = None,
-    ) -> None: ...
-
-class MapLookupOptions(FunctionOptions):
-    """
-    Options for the `map_lookup` function.
-
-    Parameters
-    ----------
-    query_key : Scalar or Object can be converted to Scalar
-        The key to search for.
-    occurrence : str
-        The occurrence(s) to return from the Map
-        Accepted values are "first", "last", or "all".
-    """
-    # TODO: query_key: Scalar or Object can be converted to Scalar
-    def __init__(
-        self, query_key: lib.Scalar, occurrence: Literal["first", "last", "all"]
-    ) -> None: ...
-
-class MatchSubstringOptions(FunctionOptions):
-    """
-    Options for looking for a substring.
-
-    Parameters
-    ----------
-    pattern : str
-        Substring pattern to look for inside input values.
-    ignore_case : bool, default False
-        Whether to perform a case-insensitive match.
-    """
-
-    def __init__(self, pattern: str, *, ignore_case: bool = False) -> None: ...
-
-class ModeOptions(FunctionOptions):
-    """
-    Options for the `mode` function.
-
-    Parameters
-    ----------
-    n : int, default 1
-        Number of distinct most-common values to return.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
-    def __init__(self, n: int = 1, *, skip_nulls: bool = True, min_count: int = 0) -> None: ...
-
-class NullOptions(FunctionOptions):
-    """
-    Options for the `is_null` function.
-
-    Parameters
-    ----------
-    nan_is_null : bool, default False
-        Whether floating-point NaN values are considered null.
-    """
-    def __init__(self, *, nan_is_null: bool = False) -> None: ...
-
-class PadOptions(FunctionOptions):
-    """
-    Options for padding strings.
-
-    Parameters
-    ----------
-    width : int
-        Desired string length.
-    padding : str, default " "
-        What to pad the string with. Should be one byte or codepoint.
-    lean_left_on_odd_padding : bool, default True
-        What to do if there is an odd number of padding characters (in case
-        of centered padding). Defaults to aligning on the left (i.e. adding
-        the extra padding character on the right).
-    """
-    def __init__(
-        self, width: int, padding: str = " ", lean_left_on_odd_padding: bool = True
-    ) -> None: ...
-
-class PairwiseOptions(FunctionOptions):
-    """
-    Options for `pairwise` functions.
-
-    Parameters
-    ----------
-    period : int, default 1
-        Period for applying the period function.
-    """
-    def __init__(self, period: int = 1) -> None: ...
-
-class PartitionNthOptions(FunctionOptions):
-    """
-    Options for the `partition_nth_indices` function.
-
-    Parameters
-    ----------
-    pivot : int
-        Index into the equivalent sorted array of the pivot element.
-    null_placement : str, default "at_end"
-        Where nulls in the input should be partitioned.
-        Accepted values are "at_start", "at_end".
-    """
-    def __init__(self, pivot: int, *, null_placement: _Placement = "at_end") -> None: ...
-
-class WinsorizeOptions(FunctionOptions):
-    """
-    Options for the `winsorize` function.
-
-    Parameters
-    ----------
-    lower_limit : float, between 0 and 1
-        The quantile below which all values are replaced with the quantile's value.
-    upper_limit : float, between 0 and 1
-        The quantile above which all values are replaced with the quantile's value.
-    """
-    def __init__(self, lower_limit: float, upper_limit: float) -> None: ...
-
-class QuantileOptions(FunctionOptions):
-    """
-    Options for the `quantile` function.
-
-    Parameters
-    ----------
-    q : double or sequence of double, default 0.5
-        Probability levels of the quantiles to compute. All values must be in
-        [0, 1].
-    interpolation : str, default "linear"
-        How to break ties between competing data points for a given quantile.
-        Accepted values are:
-
-        - "linear": compute an interpolation
-        - "lower": always use the smallest of the two data points
-        - "higher": always use the largest of the two data points
-        - "nearest": select the data point that is closest to the quantile
-        - "midpoint": compute the (unweighted) mean of the two data points
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
-    def __init__(
-        self,
-        q: float | Sequence[float] = 0.5,
-        *,
-        interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
-        skip_nulls: bool = True,
-        min_count: int = 0,
-    ) -> None: ...
-
-class RandomOptions(FunctionOptions):
-    """
-    Options for random generation.
-
-    Parameters
-    ----------
-    initializer : int or str
-        How to initialize the underlying random generator.
-        If an integer is given, it is used as a seed.
-        If "system" is given, the random generator is initialized with
-        a system-specific source of (hopefully true) randomness.
-        Other values are invalid.
-    """
-    def __init__(self, *, initializer: int | Literal["system"] = "system") -> None: ...
-
-class RankOptions(FunctionOptions):
-    """
-    Options for the `rank` function.
-
-    Parameters
-    ----------
-    sort_keys : sequence of (name, order) tuples or str, default "ascending"
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-        Alternatively, one can simply pass "ascending" or "descending" as a string
-        if the input is array-like.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted.
-        Accepted values are "at_start", "at_end".
-    tiebreaker : str, default "first"
-        Configure how ties between equal values are handled.
-        Accepted values are:
-
-        - "min": Ties get the smallest possible rank in sorted order.
-        - "max": Ties get the largest possible rank in sorted order.
-        - "first": Ranks are assigned in order of when ties appear in the
-                   input. This ensures the ranks are a stable permutation
-                   of the input.
-        - "dense": The ranks span a dense [1, M] interval where M is the
-                   number of distinct values in the input.
-    """
-    def __init__(
-        self,
-        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
-        *,
-        null_placement: _Placement = "at_end",
-        tiebreaker: Literal["min", "max", "first", "dense"] = "first",
-    ) -> None: ...
-
-class RankQuantileOptions(FunctionOptions):
-    """
-    Options for the `rank_quantile` function.
-
-    Parameters
-    ----------
-    sort_keys : sequence of (name, order) tuples or str, default "ascending"
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-        Alternatively, one can simply pass "ascending" or "descending" as a string
-        if the input is array-like.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted.
-        Accepted values are "at_start", "at_end".
-    """
-
-    def __init__(
-        self,
-        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
-        *,
-        null_placement: _Placement = "at_end",
-    ) -> None: ...
-
-class PivotWiderOptions(FunctionOptions):
-    """
-    Options for the `pivot_wider` function.
-
-    Parameters
-    ----------
-    key_names : sequence of str
-        The pivot key names expected in the pivot key column.
-        For each entry in `key_names`, a column with the same name is emitted
-        in the struct output.
-    unexpected_key_behavior : str, default "ignore"
-        The behavior when pivot keys not in `key_names` are encountered.
-        Accepted values are "ignore", "raise".
-        If "ignore", unexpected keys are silently ignored.
-        If "raise", unexpected keys raise a KeyError.
-    """
-    def __init__(
-        self,
-        key_names: Sequence[str],
-        *,
-        unexpected_key_behavior: Literal["ignore", "raise"] = "ignore",
-    ) -> None: ...
-
-class ReplaceSliceOptions(FunctionOptions):
-    """
-    Options for replacing slices.
-
-    Parameters
-    ----------
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int
-        Index to stop slicing at (exclusive).
-    replacement : str
-        What to replace the slice with.
-    """
-    def __init__(self, start: int, stop: int, replacement: str) -> None: ...
-
-class ReplaceSubstringOptions(FunctionOptions):
-    """
-    Options for replacing matched substrings.
-
-    Parameters
-    ----------
-    pattern : str
-        Substring pattern to look for inside input values.
-    replacement : str
-        What to replace the pattern with.
-    max_replacements : int or None, default None
-        The maximum number of strings to replace in each
-        input value (unlimited if None).
-    """
-    def __init__(
-        self, pattern: str, replacement: str, *, max_replacements: int | None = None
-    ) -> None: ...
-
-_RoundMode: TypeAlias = Literal[
-    "down",
-    "up",
-    "towards_zero",
-    "towards_infinity",
-    "half_down",
-    "half_up",
-    "half_towards_zero",
-    "half_towards_infinity",
-    "half_to_even",
-    "half_to_odd",
-]
-
-class RoundBinaryOptions(FunctionOptions):
-    """
-    Options for rounding numbers when ndigits is provided by a second array
-
-    Parameters
-    ----------
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    """
-    def __init__(
-        self,
-        round_mode: _RoundMode = "half_to_even",
-    ) -> None: ...
-
-class RoundOptions(FunctionOptions):
-    """
-    Options for rounding numbers.
-
-    Parameters
-    ----------
-    ndigits : int, default 0
-        Number of fractional digits to round to.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    """
-    def __init__(
-        self,
-        ndigits: int = 0,
-        round_mode: _RoundMode = "half_to_even",
-    ) -> None: ...
-
-_DateTimeUint: TypeAlias = Literal[
-    "year",
-    "quarter",
-    "month",
-    "week",
-    "day",
-    "hour",
-    "minute",
-    "second",
-    "millisecond",
-    "microsecond",
-    "nanosecond",
-]
-
-class RoundTemporalOptions(FunctionOptions):
-    """
-    Options for rounding temporal values.
-
-    Parameters
-    ----------
-    multiple : int, default 1
-        Number of units to round to.
-    unit : str, default "day"
-        The unit in which `multiple` is expressed.
-        Accepted values are "year", "quarter", "month", "week", "day",
-        "hour", "minute", "second", "millisecond", "microsecond",
-        "nanosecond".
-    week_starts_monday : bool, default True
-        If True, weeks start on Monday; if False, on Sunday.
-    ceil_is_strictly_greater : bool, default False
-        If True, ceil returns a rounded value that is strictly greater than the
-        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
-        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
-        if set to False.
-        This applies to the ceil_temporal function only.
-    calendar_based_origin : bool, default False
-        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
-        rounding origin will be beginning of one less precise calendar unit.
-        E.g.: rounding to hours will use beginning of day as origin.
-
-        By default time is rounded to a multiple of units since
-        1970-01-01T00:00:00. By setting calendar_based_origin to true,
-        time will be rounded to number of units since the last greater
-        calendar unit.
-        For example: rounding to multiple of days since the beginning of the
-        month or to hours since the beginning of the day.
-        Exceptions: week and quarter are not used as greater units,
-        therefore days will be rounded to the beginning of the month not
-        week. Greater unit of week is a year.
-        Note that ceiling and rounding might change sorting order of an array
-        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
-        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
-        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
-        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
-        order of an already ordered array.
-    """
-    def __init__(
-        self,
-        multiple: int = 1,
-        unit: _DateTimeUint = "day",
-        *,
-        week_starts_monday: bool = True,
-        ceil_is_strictly_greater: bool = False,
-        calendar_based_origin: bool = False,
-    ) -> None: ...
-
-class RoundToMultipleOptions(FunctionOptions):
-    """
-    Options for rounding numbers to a multiple.
-
-    Parameters
-    ----------
-    multiple : numeric scalar, default 1.0
-        Multiple to round to. Should be a scalar of a type compatible
-        with the argument to be rounded.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    """
-    def __init__(self, multiple: int | float | _NumericScalarT = 1.0, round_mode: _RoundMode = "half_to_even") -> None: ...
-
-class ScalarAggregateOptions(FunctionOptions):
-    """
-    Options for scalar aggregations.
-
-    Parameters
-    ----------
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
-    def __init__(self, *, skip_nulls: bool = True, min_count: int = 1) -> None: ...
-
-class SelectKOptions(FunctionOptions):
-    """
-    Options for top/bottom k-selection.
-
-    Parameters
-    ----------
-    k : int
-        Number of leading values to select in sorted order
-        (i.e. the largest values if sort order is "descending",
-        the smallest otherwise).
-    sort_keys : sequence of (name, order) tuples
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-    """
-
-    def __init__(self, k: int, sort_keys: Sequence[tuple[str, _Order]]) -> None: ...
-
-class SetLookupOptions(FunctionOptions):
-    """
-    Options for the `is_in` and `index_in` functions.
-
-    Parameters
-    ----------
-    value_set : Array
-        Set of values to look for in the input.
-    skip_nulls : bool, default False
-        If False, nulls in the input are matched in the value_set just
-        like regular values.
-        If True, nulls in the input always fail matching.
-    """
-    def __init__(self, value_set: lib.Array, *, skip_nulls: bool = True) -> None: ...
-
-class SliceOptions(FunctionOptions):
-    """
-    Options for slicing.
-
-    Parameters
-    ----------
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int or None, default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end.
-    step : int, default 1
-        Slice step.
-    """
-
-    def __init__(self, start: int, stop: int | None = None, step: int = 1) -> None: ...
-
-class SortOptions(FunctionOptions):
-    """
-    Options for the `sort_indices` function.
-
-    Parameters
-    ----------
-    sort_keys : sequence of (name, order) tuples
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted, only applying to
-        columns/fields mentioned in `sort_keys`.
-        Accepted values are "at_start", "at_end".
-    """
-    def __init__(
-        self, sort_keys: Sequence[tuple[str, _Order]], *, null_placement: _Placement = "at_end"
-    ) -> None: ...
-
-class SplitOptions(FunctionOptions):
-    """
-    Options for splitting on whitespace.
-
-    Parameters
-    ----------
-    max_splits : int or None, default None
-        Maximum number of splits for each input value (unlimited if None).
-    reverse : bool, default False
-        Whether to start splitting from the end of each input value.
-        This only has an effect if `max_splits` is not None.
-    """
-
-    def __init__(self, *, max_splits: int | None = None, reverse: bool = False) -> None: ...
-
-class SplitPatternOptions(FunctionOptions):
-    """
-    Options for splitting on a string pattern.
-
-    Parameters
-    ----------
-    pattern : str
-        String pattern to split on.
-    max_splits : int or None, default None
-        Maximum number of splits for each input value (unlimited if None).
-    reverse : bool, default False
-        Whether to start splitting from the end of each input value.
-        This only has an effect if `max_splits` is not None.
-    """
-    def __init__(
-        self, pattern: str, *, max_splits: int | None = None, reverse: bool = False
-    ) -> None: ...
-
-class StrftimeOptions(FunctionOptions):
-    """
-    Options for the `strftime` function.
-
-    Parameters
-    ----------
-    format : str, default "%Y-%m-%dT%H:%M:%S"
-        Pattern for formatting input values.
-    locale : str, default "C"
-        Locale to use for locale-specific format specifiers.
-    """
-    def __init__(self, format: str = "%Y-%m-%dT%H:%M:%S", locale: str = "C") -> None: ...
-
-class StrptimeOptions(FunctionOptions):
-    """
-    Options for the `strptime` function.
-
-    Parameters
-    ----------
-    format : str
-        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
-        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
-        There are differences in behavior, for example how the "%y" placeholder
-        handles years with less than four digits.
-    unit : str
-        Timestamp unit of the output.
-        Accepted values are "s", "ms", "us", "ns".
-    error_is_null : boolean, default False
-        Return null on parsing errors if true or raise if false.
-    """
-    def __init__(
-        self, format: str, unit: Literal["s", "ms", "us", "ns"], error_is_null: bool = False
-    ) -> None: ...
-
-class StructFieldOptions(FunctionOptions):
-    """
-    Options for the `struct_field` function.
-
-    Parameters
-    ----------
-    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
-        List of indices for chained field lookup, for example `[4, 1]`
-        will look up the second nested field in the fifth outer field.
-    """
-    def __init__(
-        self, indices: list[str] | list[bytes] | list[int] | Expression | bytes | str | int
-    ) -> None: ...
-
-class TakeOptions(FunctionOptions):
-    """
-    Options for the `take` and `array_take` functions.
-
-    Parameters
-    ----------
-    boundscheck : boolean, default True
-        Whether to check indices are within bounds. If False and an
-        index is out of bounds, behavior is undefined (the process
-        may crash).
-    """
-    def __init__(self, boundscheck: bool = True) -> None: ...
-
-class TDigestOptions(FunctionOptions):
-    """
-    Options for the `tdigest` function.
-
-    Parameters
-    ----------
-    q : double or sequence of double, default 0.5
-        Probability levels of the quantiles to approximate. All values must be
-        in [0, 1].
-    delta : int, default 100
-        Compression parameter for the T-digest algorithm.
-    buffer_size : int, default 500
-        Buffer size for the T-digest algorithm.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
-    def __init__(
-        self,
-        q: float | Sequence[float] = 0.5,
-        *,
-        delta: int = 100,
-        buffer_size: int = 500,
-        skip_nulls: bool = True,
-        min_count: int = 0,
-    ) -> None: ...
-
-class TrimOptions(FunctionOptions):
-    """
-    Options for trimming characters from strings.
-
-    Parameters
-    ----------
-    characters : str
-        Individual characters to be trimmed from the string.
-    """
-    def __init__(self, characters: str) -> None: ...
-
-class Utf8NormalizeOptions(FunctionOptions):
-    """
-    Options for the `utf8_normalize` function.
-
-    Parameters
-    ----------
-    form : str
-        Unicode normalization form.
-        Accepted values are "NFC", "NFKC", "NFD", NFKD".
-    """
-
-    def __init__(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: ...
-
-class ZeroFillOptions(FunctionOptions):
-    """
-    Options for utf8_zero_fill.
-
-    Parameters
-    ----------
-    width : int
-        Desired string length.
-    padding : str, default "0"
-        Padding character. Should be one Unicode codepoint.
-    """
-    def __init__(self, width: int, padding: str = '0') -> None: ...
-
-class VarianceOptions(FunctionOptions):
-    """
-    Options for the `variance` and `stddev` functions.
-
-    Parameters
-    ----------
-    ddof : int, default 0
-        Number of degrees of freedom.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
-    def __init__(self, *, ddof: int = 0, skip_nulls: bool = True, min_count: int = 0) -> None: ...
-
-class SkewOptions(FunctionOptions):
-    """
-    Options for the `skew` and `kurtosis` functions.
-
-    Parameters
-    ----------
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    biased : bool, default True
-        Whether the calculated value is biased.
-        If False, the value computed includes a correction factor to reduce bias.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
-    def __init__(
-        self, *, skip_nulls: bool = True, biased: bool = True, min_count: int = 0
-    ) -> None: ...
-
-class WeekOptions(FunctionOptions):
-    """
-    Options for the `week` function.
-
-    Parameters
-    ----------
-    week_starts_monday : bool, default True
-        If True, weeks start on Monday; if False, on Sunday.
-    count_from_zero : bool, default False
-        If True, dates at the start of a year that fall into the last week
-        of the previous year emit 0.
-        If False, they emit 52 or 53 (the week number of the last week
-        of the previous year).
-    first_week_is_fully_in_year : bool, default False
-        If True, week number 0 is fully in January.
-        If False, a week that begins on December 29, 30 or 31 is considered
-        to be week number 0 of the following year.
-    """
-    def __init__(
-        self,
-        *,
-        week_starts_monday: bool = True,
-        count_from_zero: bool = False,
-        first_week_is_fully_in_year: bool = False,
-    ) -> None: ...
-
-# ==================== _compute.pyx Functions ====================
-
-def call_function(
-    name: str,
-    args: list,
-    options: FunctionOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-    length: int | None = None,
-) -> Any:
-    """
-    Call a named function.
-
-    The function is looked up in the global registry
-    (as returned by `function_registry()`).
-
-    Parameters
-    ----------
-    name : str
-        The name of the function to call.
-    args : list
-        The arguments to the function.
-    options : optional
-        options provided to the function.
-    memory_pool : MemoryPool, optional
-        memory pool to use for allocations during function execution.
-    length : int, optional
-        Batch size for execution, for nullary (no argument) functions. If not
-        passed, inferred from data.
-    """
-
-def function_registry() -> FunctionRegistry: ...
-def get_function(name: str) -> Function:
-    """
-    Get a function by name.
-
-    The function is looked up in the global registry
-    (as returned by `function_registry()`).
-
-    Parameters
-    ----------
-    name : str
-        The name of the function to lookup
-    """
-
-def list_functions() -> list[str]:
-    """
-    Return all function names in the global registry.
-    """
-
-# ==================== _compute.pyx Udf ====================
-
-def call_tabular_function(
-    function_name: str, args: Iterable | None = None, func_registry: FunctionRegistry | None = None
-) -> lib.RecordBatchReader:
-    """
-    Get a record batch iterator from a tabular function.
-
-    Parameters
-    ----------
-    function_name : str
-        Name of the function.
-    args : iterable
-        The arguments to pass to the function.  Accepted types depend
-        on the specific function.  Currently, only an empty args is supported.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-    """
-
-class _FunctionDoc(TypedDict):
-    summary: str
-    description: str
-
-def register_scalar_function(
-    func: Callable,
-    function_name: str,
-    function_doc: _FunctionDoc,
-    in_types: dict[str, lib.DataType],
-    out_type: lib.DataType,
-    func_registry: FunctionRegistry | None = None,
-) -> None:
-    """
-    Register a user-defined scalar function.
-
-    This API is EXPERIMENTAL.
-
-    A scalar function is a function that executes elementwise
-    operations on arrays or scalars, i.e. a scalar function must
-    be computed row-by-row with no state where each output row
-    is computed only from its corresponding input row.
-    In other words, all argument arrays have the same length,
-    and the output array is of the same length as the arguments.
-    Scalar functions are the only functions allowed in query engine
-    expressions.
-
-    Parameters
-    ----------
-    func : callable
-        A callable implementing the user-defined function.
-        The first argument is the context argument of type
-        UdfContext.
-        Then, it must take arguments equal to the number of
-        in_types defined. It must return an Array or Scalar
-        matching the out_type. It must return a Scalar if
-        all arguments are scalar, else it must return an Array.
-
-        To define a varargs function, pass a callable that takes
-        *args. The last in_type will be the type of all varargs
-        arguments.
-    function_name : str
-        Name of the function. There should only be one function
-        registered with this name in the function registry.
-    function_doc : dict
-        A dictionary object with keys "summary" (str),
-        and "description" (str).
-    in_types : Dict[str, DataType]
-        A dictionary mapping function argument names to
-        their respective DataType.
-        The argument names will be used to generate
-        documentation for the function. The number of
-        arguments specified here determines the function
-        arity.
-    out_type : DataType
-        Output type of the function.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>>
-    >>> func_doc = {}
-    >>> func_doc["summary"] = "simple udf"
-    >>> func_doc["description"] = "add a constant to a scalar"
-    >>>
-    >>> def add_constant(ctx, array):
-    ...     return pc.add(array, 1, memory_pool=ctx.memory_pool)
-    >>>
-    >>> func_name = "py_add_func"
-    >>> in_types = {"array": pa.int64()}
-    >>> out_type = pa.int64()
-    >>> pc.register_scalar_function(add_constant, func_name, func_doc, in_types, out_type)
-    >>>
-    >>> func = pc.get_function(func_name)
-    >>> func.name
-    'py_add_func'
-    >>> answer = pc.call_function(func_name, [pa.array([20])])
-    >>> answer
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      21
-    ]
-    """
-
-def register_tabular_function(
-    func: Callable,
-    function_name: str,
-    function_doc: _FunctionDoc,
-    in_types: dict[str, lib.DataType],
-    out_type: lib.DataType,
-    func_registry: FunctionRegistry | None = None,
-) -> None:
-    """
-    Register a user-defined tabular function.
-
-    This API is EXPERIMENTAL.
-
-    A tabular function is one accepting a context argument of type
-    UdfContext and returning a generator of struct arrays.
-    The in_types argument must be empty and the out_type argument
-    specifies a schema. Each struct array must have field types
-    corresponding to the schema.
-
-    Parameters
-    ----------
-    func : callable
-        A callable implementing the user-defined function.
-        The only argument is the context argument of type
-        UdfContext. It must return a callable that
-        returns on each invocation a StructArray matching
-        the out_type, where an empty array indicates end.
-    function_name : str
-        Name of the function. There should only be one function
-        registered with this name in the function registry.
-    function_doc : dict
-        A dictionary object with keys "summary" (str),
-        and "description" (str).
-    in_types : Dict[str, DataType]
-        Must be an empty dictionary (reserved for future use).
-    out_type : Union[Schema, DataType]
-        Schema of the function's output, or a corresponding flat struct type.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-    """
-
-def register_aggregate_function(
-    func: Callable,
-    function_name: str,
-    function_doc: _FunctionDoc,
-    in_types: dict[str, lib.DataType],
-    out_type: lib.DataType,
-    func_registry: FunctionRegistry | None = None,
-) -> None:
-    """
-    Register a user-defined non-decomposable aggregate function.
-
-    This API is EXPERIMENTAL.
-
-    A non-decomposable aggregation function is a function that executes
-    aggregate operations on the whole data that it is aggregating.
-    In other words, non-decomposable aggregate function cannot be
-    split into consume/merge/finalize steps.
-
-    This is often used with ordered or segmented aggregation where groups
-    can be emit before accumulating all of the input data.
-
-    Note that currently the size of any input column cannot exceed 2 GB
-    for a single segment (all groups combined).
-
-    Parameters
-    ----------
-    func : callable
-        A callable implementing the user-defined function.
-        The first argument is the context argument of type
-        UdfContext.
-        Then, it must take arguments equal to the number of
-        in_types defined. It must return a Scalar matching the
-        out_type.
-        To define a varargs function, pass a callable that takes
-        *args. The in_type needs to match in type of inputs when
-        the function gets called.
-    function_name : str
-        Name of the function. This name must be unique, i.e.,
-        there should only be one function registered with
-        this name in the function registry.
-    function_doc : dict
-        A dictionary object with keys "summary" (str),
-        and "description" (str).
-    in_types : Dict[str, DataType]
-        A dictionary mapping function argument names to
-        their respective DataType.
-        The argument names will be used to generate
-        documentation for the function. The number of
-        arguments specified here determines the function
-        arity.
-    out_type : DataType
-        Output type of the function.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>>
-    >>> func_doc = {}
-    >>> func_doc["summary"] = "simple median udf"
-    >>> func_doc["description"] = "compute median"
-    >>>
-    >>> def compute_median(ctx, array):
-    ...     return pa.scalar(np.median(array))
-    >>>
-    >>> func_name = "py_compute_median"
-    >>> in_types = {"array": pa.int64()}
-    >>> out_type = pa.float64()
-    >>> pc.register_aggregate_function(compute_median, func_name, func_doc, in_types, out_type)
-    >>>
-    >>> func = pc.get_function(func_name)
-    >>> func.name
-    'py_compute_median'
-    >>> answer = pc.call_function(func_name, [pa.array([20, 40])])
-    >>> answer
-    <pyarrow.DoubleScalar: 30.0>
-    >>> table = pa.table([pa.array([1, 1, 2, 2]), pa.array([10, 20, 30, 40])], names=["k", "v"])
-    >>> result = table.group_by("k").aggregate([("v", "py_compute_median")])
-    >>> result
-    pyarrow.Table
-    k: int64
-    v_py_compute_median: double
-    ----
-    k: [[1,2]]
-    v_py_compute_median: [[15,35]]
-    """
-
-def register_vector_function(
-    func: Callable,
-    function_name: str,
-    function_doc: _FunctionDoc,
-    in_types: dict[str, lib.DataType],
-    out_type: lib.DataType,
-    func_registry: FunctionRegistry | None = None,
-) -> None:
-    """
-    Register a user-defined vector function.
-
-    This API is EXPERIMENTAL.
-
-    A vector function is a function that executes vector
-    operations on arrays. Vector function is often used
-    when compute doesn't fit other more specific types of
-    functions (e.g., scalar and aggregate).
-
-    Parameters
-    ----------
-    func : callable
-        A callable implementing the user-defined function.
-        The first argument is the context argument of type
-        UdfContext.
-        Then, it must take arguments equal to the number of
-        in_types defined. It must return an Array or Scalar
-        matching the out_type. It must return a Scalar if
-        all arguments are scalar, else it must return an Array.
-
-        To define a varargs function, pass a callable that takes
-        *args. The last in_type will be the type of all varargs
-        arguments.
-    function_name : str
-        Name of the function. There should only be one function
-        registered with this name in the function registry.
-    function_doc : dict
-        A dictionary object with keys "summary" (str),
-        and "description" (str).
-    in_types : Dict[str, DataType]
-        A dictionary mapping function argument names to
-        their respective DataType.
-        The argument names will be used to generate
-        documentation for the function. The number of
-        arguments specified here determines the function
-        arity.
-    out_type : DataType
-        Output type of the function.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>>
-    >>> func_doc = {}
-    >>> func_doc["summary"] = "percent rank"
-    >>> func_doc["description"] = "compute percent rank"
-    >>>
-    >>> def list_flatten_udf(ctx, x):
-    ...     return pc.list_flatten(x)
-    >>>
-    >>> func_name = "list_flatten_udf"
-    >>> in_types = {"array": pa.list_(pa.int64())}
-    >>> out_type = pa.int64()
-    >>> pc.register_vector_function(list_flatten_udf, func_name, func_doc, in_types, out_type)
-    >>>
-    >>> answer = pc.call_function(func_name, [pa.array([[1, 2], [3, 4]])])
-    >>> answer
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      1,
-      2,
-      3,
-      4
-    ]
-    """
-
-class UdfContext:
-    """
-    Per-invocation function context/state.
-
-    This object will always be the first argument to a user-defined
-    function. It should not be used outside of a call to the function.
-    """
-
-    @property
-    def batch_length(self) -> int:
-        """
-        The common length of all input arguments (int).
-
-        In the case that all arguments are scalars, this value
-        is used to pass the "actual length" of the arguments,
-        e.g. because the scalar values are encoding a column
-        with a constant value.
-        """
-    @property
-    def memory_pool(self) -> lib.MemoryPool:
-        """
-        A memory pool for allocations (:class:`MemoryPool`).
-
-        This is the memory pool supplied by the user when they invoked
-        the function and it should be used in any calls to arrow that the
-        UDF makes if that call accepts a memory_pool.
-        """
-
-# ==================== _compute.pyx Expression ====================
-class Expression(lib._Weakrefable):
-    """
-    A logical expression to be evaluated against some input.
-
-    To create an expression:
-
-    - Use the factory function ``pyarrow.compute.scalar()`` to create a
-      scalar (not necessary when combined, see example below).
-    - Use the factory function ``pyarrow.compute.field()`` to reference
-      a field (column in table).
-    - Compare fields and scalars with ``<``, ``<=``, ``==``, ``>=``, ``>``.
-    - Combine expressions using python operators ``&`` (logical and),
-      ``|`` (logical or) and ``~`` (logical not).
-      Note: python keywords ``and``, ``or`` and ``not`` cannot be used
-      to combine expressions.
-    - Create expression predicates using Expression methods such as
-      ``pyarrow.compute.Expression.isin()``.
-
-    Examples
-    --------
-
-    >>> import pyarrow.compute as pc
-    >>> (pc.field("a") < pc.scalar(3)) | (pc.field("b") > 7)
-    <pyarrow.compute.Expression ((a < 3) or (b > 7))>
-    >>> pc.field("a") != 3
-    <pyarrow.compute.Expression (a != 3)>
-    >>> pc.field("a").isin([1, 2, 3])
-    <pyarrow.compute.Expression is_in(a, {value_set=int64:[
-      1,
-      2,
-      3
-    ], null_matching_behavior=MATCH})>
-    """
-
-    def equals(self, other: Expression | lib.Array | Iterable) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.dataset.Expression
-
-        Returns
-        -------
-        bool
-        """
-
-    @staticmethod
-    def from_substrait(message: bytes | lib.Buffer) -> Expression:
-        """
-        Deserialize an expression from Substrait
-
-        The serialized message must be an ExtendedExpression message that has
-        only a single expression.  The name of the expression and the schema
-        the expression was bound to will be ignored.  Use
-        pyarrow.substrait.deserialize_expressions if this information is needed
-        or if the message might contain multiple expressions.
-
-        Parameters
-        ----------
-        message : bytes or Buffer or a protobuf Message
-            The Substrait message to deserialize
-
-        Returns
-        -------
-        Expression
-            The deserialized expression
-        """
-    def to_substrait(self, schema: lib.Schema, allow_arrow_extensions: bool = False) -> lib.Buffer:
-        """
-        Serialize the expression using Substrait
-
-        The expression will be serialized as an ExtendedExpression message that has a
-        single expression named "expression"
-
-        Parameters
-        ----------
-        schema : Schema
-            The input schema the expression will be bound to
-        allow_arrow_extensions : bool, default False
-            If False then only functions that are part of the core Substrait function
-            definitions will be allowed.  Set this to True to allow pyarrow-specific functions
-            but the result may not be accepted by other compute libraries.
-
-        Returns
-        -------
-        Buffer
-            A buffer containing the serialized Protobuf plan.
-        """
-    def __invert__(self) -> Expression: ...
-    def __and__(self, other) -> Expression: ...
-    def __or__(self, other) -> Expression: ...
-    def __add__(self, other) -> Expression: ...
-    def __mul__(self, other) -> Expression: ...
-    def __sub__(self, other) -> Expression: ...
-    def __eq__(self, value: object) -> Expression: ...  # type: ignore[override]
-    def __ne__(self, value: object) -> Expression: ...  # type: ignore[override]
-    def __gt__(self, value: object) -> Expression: ...  # type: ignore[override]
-    def __lt__(self, value: object) -> Expression: ...  # type: ignore[override]
-    def __ge__(self, value: object) -> Expression: ...  # type: ignore[override]
-    def __le__(self, value: object) -> Expression: ...  # type: ignore[override]
-    def __truediv__(self, other) -> Expression: ...
-    def is_valid(self) -> bool:
-        """
-        Check whether the expression is not-null (valid).
-
-        This creates a new expression equivalent to calling the
-        `is_valid` compute function on this expression.
-
-        Returns
-        -------
-        is_valid : Expression
-        """
-    def is_null(self, nan_is_null: bool = False) -> Expression:
-        """
-        Check whether the expression is null.
-
-        This creates a new expression equivalent to calling the
-        `is_null` compute function on this expression.
-
-        Parameters
-        ----------
-        nan_is_null : boolean, default False
-            Whether floating-point NaNs are considered null.
-
-        Returns
-        -------
-        is_null : Expression
-        """
-    def is_nan(self) -> Expression:
-        """
-        Check whether the expression is NaN.
-
-        This creates a new expression equivalent to calling the
-        `is_nan` compute function on this expression.
-
-        Returns
-        -------
-        is_nan : Expression
-        """
-    def cast(
-        self, type: lib.DataType | Literal["bool"], safe: bool = True, options: CastOptions | None = None
-    ) -> Expression:
-        """
-        Explicitly set or change the expression's data type.
-
-        This creates a new expression equivalent to calling the
-        `cast` compute function on this expression.
-
-        Parameters
-        ----------
-        type : DataType, default None
-            Type to cast array to.
-        safe : boolean, default True
-            Whether to check for conversion errors such as overflow.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-
-        Returns
-        -------
-        cast : Expression
-        """
-    def isin(self, values: lib.Array | Iterable) -> Expression:
-        """
-        Check whether the expression is contained in values.
-
-        This creates a new expression equivalent to calling the
-        `is_in` compute function on this expression.
-
-        Parameters
-        ----------
-        values : Array or iterable
-            The values to check for.
-
-        Returns
-        -------
-        isin : Expression
-            A new expression that, when evaluated, checks whether
-            this expression's value is contained in `values`.
-        """
-
-# ==================== _compute.py ====================
diff --git a/python/pyarrow/_csv.pyi b/python/pyarrow/_csv.pyi
deleted file mode 100644
index c490d6be93a..00000000000
--- a/python/pyarrow/_csv.pyi
+++ /dev/null
@@ -1,658 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from dataclasses import dataclass, field
-from typing import IO, Any, Callable, Literal
-
-from _typeshed import StrPath
-
-from . import lib
-
-@dataclass(kw_only=True)
-class ReadOptions(lib._Weakrefable):
-    """
-    Options for reading CSV files.
-
-    Parameters
-    ----------
-    use_threads : bool, optional (default True)
-        Whether to use multiple threads to accelerate reading
-    block_size : int, optional
-        How much bytes to process at a time from the input stream.
-        This will determine multi-threading granularity as well as
-        the size of individual record batches or table chunks.
-        Minimum valid value for block size is 1
-    skip_rows : int, optional (default 0)
-        The number of rows to skip before the column names (if any)
-        and the CSV data.
-    skip_rows_after_names : int, optional (default 0)
-        The number of rows to skip after the column names.
-        This number can be larger than the number of rows in one
-        block, and empty rows are counted.
-        The order of application is as follows:
-        - `skip_rows` is applied (if non-zero);
-        - column names are read (unless `column_names` is set);
-        - `skip_rows_after_names` is applied (if non-zero).
-    column_names : list, optional
-        The column names of the target table.  If empty, fall back on
-        `autogenerate_column_names`.
-    autogenerate_column_names : bool, optional (default False)
-        Whether to autogenerate column names if `column_names` is empty.
-        If true, column names will be of the form "f0", "f1"...
-        If false, column names will be read from the first CSV row
-        after `skip_rows`.
-    encoding : str, optional (default 'utf8')
-        The character encoding of the CSV data.  Columns that cannot
-        decode using this encoding can still be read as Binary.
-
-    Examples
-    --------
-
-    Defining an example data:
-
-    >>> import io
-    >>> s = "1,2,3\\nFlamingo,2,2022-03-01\\nHorse,4,2022-03-02\\nBrittle stars,5,2022-03-03\\nCentipede,100,2022-03-04"
-    >>> print(s)
-    1,2,3
-    Flamingo,2,2022-03-01
-    Horse,4,2022-03-02
-    Brittle stars,5,2022-03-03
-    Centipede,100,2022-03-04
-
-    Ignore the first numbered row and substitute it with defined
-    or autogenerated column names:
-
-    >>> from pyarrow import csv
-    >>> read_options = csv.ReadOptions(column_names=["animals", "n_legs", "entry"], skip_rows=1)
-    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    entry: date32[day]
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    n_legs: [[2,4,5,100]]
-    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
-
-    >>> read_options = csv.ReadOptions(autogenerate_column_names=True, skip_rows=1)
-    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
-    pyarrow.Table
-    f0: string
-    f1: int64
-    f2: date32[day]
-    ----
-    f0: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    f1: [[2,4,5,100]]
-    f2: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
-
-    Remove the first 2 rows of the data:
-
-    >>> read_options = csv.ReadOptions(skip_rows_after_names=2)
-    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
-    pyarrow.Table
-    1: string
-    2: int64
-    3: date32[day]
-    ----
-    1: [["Brittle stars","Centipede"]]
-    2: [[5,100]]
-    3: [[2022-03-03,2022-03-04]]
-    """
-
-    use_threads: bool = field(default=True, kw_only=False)
-    block_size: int | None = None
-    skip_rows: int = 0
-    skip_rows_after_names: int = 0
-    column_names: list[str] | None = None
-    autogenerate_column_names: bool = False
-    encoding: str = "utf8"
-
-    def validate(self) -> None: ...
-
-@dataclass(kw_only=True)
-class ParseOptions(lib._Weakrefable):
-    """
-    Options for parsing CSV files.
-
-    Parameters
-    ----------
-    delimiter : 1-character string, optional (default ',')
-        The character delimiting individual cells in the CSV data.
-    quote_char : 1-character string or False, optional (default '"')
-        The character used optionally for quoting CSV values
-        (False if quoting is not allowed).
-    double_quote : bool, optional (default True)
-        Whether two quotes in a quoted CSV value denote a single quote
-        in the data.
-    escape_char : 1-character string or False, optional (default False)
-        The character used optionally for escaping special characters
-        (False if escaping is not allowed).
-    newlines_in_values : bool, optional (default False)
-        Whether newline characters are allowed in CSV values.
-        Setting this to True reduces the performance of multi-threaded
-        CSV reading.
-    ignore_empty_lines : bool, optional (default True)
-        Whether empty lines are ignored in CSV input.
-        If False, an empty line is interpreted as containing a single empty
-        value (assuming a one-column CSV file).
-    invalid_row_handler : callable, optional (default None)
-        If not None, this object is called for each CSV row that fails
-        parsing (because of a mismatching number of columns).
-        It should accept a single InvalidRow argument and return either
-        "skip" or "error" depending on the desired outcome.
-
-    Examples
-    --------
-
-    Defining an example file from bytes object:
-
-    >>> import io
-    >>> s = (
-    ...     "animals;n_legs;entry\\n"
-    ...     "Flamingo;2;2022-03-01\\n"
-    ...     "# Comment here:\\n"
-    ...     "Horse;4;2022-03-02\\n"
-    ...     "Brittle stars;5;2022-03-03\\n"
-    ...     "Centipede;100;2022-03-04"
-    ... )
-    >>> print(s)
-    animals;n_legs;entry
-    Flamingo;2;2022-03-01
-    # Comment here:
-    Horse;4;2022-03-02
-    Brittle stars;5;2022-03-03
-    Centipede;100;2022-03-04
-    >>> source = io.BytesIO(s.encode())
-
-    Read the data from a file skipping rows with comments
-    and defining the delimiter:
-
-    >>> from pyarrow import csv
-    >>> def skip_comment(row):
-    ...     if row.text.startswith("# "):
-    ...         return "skip"
-    ...     else:
-    ...         return "error"
-    >>> parse_options = csv.ParseOptions(delimiter=";", invalid_row_handler=skip_comment)
-    >>> csv.read_csv(source, parse_options=parse_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    entry: date32[day]
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    n_legs: [[2,4,5,100]]
-    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
-    """
-
-    delimiter: str = field(default=",", kw_only=False)
-    quote_char: str | Literal[False] = '"'
-    double_quote: bool = True
-    escape_char: str | Literal[False] = False
-    newlines_in_values: bool = False
-    ignore_empty_lines: bool = True
-    invalid_row_handler: Callable[[InvalidRow], Literal["skip", "error"]] | None = None
-
-    def validate(self) -> None: ...
-
-@dataclass(kw_only=True)
-class ConvertOptions(lib._Weakrefable):
-    """
-    Options for converting CSV data.
-
-    Parameters
-    ----------
-    check_utf8 : bool, optional (default True)
-        Whether to check UTF8 validity of string columns.
-    column_types : pyarrow.Schema or dict, optional
-        Explicitly map column names to column types. Passing this argument
-        disables type inference on the defined columns.
-    null_values : list, optional
-        A sequence of strings that denote nulls in the data
-        (defaults are appropriate in most cases). Note that by default,
-        string columns are not checked for null values. To enable
-        null checking for those, specify ``strings_can_be_null=True``.
-    true_values : list, optional
-        A sequence of strings that denote true booleans in the data
-        (defaults are appropriate in most cases).
-    false_values : list, optional
-        A sequence of strings that denote false booleans in the data
-        (defaults are appropriate in most cases).
-    decimal_point : 1-character string, optional (default '.')
-        The character used as decimal point in floating-point and decimal
-        data.
-    strings_can_be_null : bool, optional (default False)
-        Whether string / binary columns can have null values.
-        If true, then strings in null_values are considered null for
-        string columns.
-        If false, then all strings are valid string values.
-    quoted_strings_can_be_null : bool, optional (default True)
-        Whether quoted values can be null.
-        If true, then strings in "null_values" are also considered null
-        when they appear quoted in the CSV file. Otherwise, quoted values
-        are never considered null.
-    include_columns : list, optional
-        The names of columns to include in the Table.
-        If empty, the Table will include all columns from the CSV file.
-        If not empty, only these columns will be included, in this order.
-    include_missing_columns : bool, optional (default False)
-        If false, columns in `include_columns` but not in the CSV file will
-        error out.
-        If true, columns in `include_columns` but not in the CSV file will
-        produce a column of nulls (whose type is selected using
-        `column_types`, or null by default).
-        This option is ignored if `include_columns` is empty.
-    auto_dict_encode : bool, optional (default False)
-        Whether to try to automatically dict-encode string / binary data.
-        If true, then when type inference detects a string or binary column,
-        it it dict-encoded up to `auto_dict_max_cardinality` distinct values
-        (per chunk), after which it switches to regular encoding.
-        This setting is ignored for non-inferred columns (those in
-        `column_types`).
-    auto_dict_max_cardinality : int, optional
-        The maximum dictionary cardinality for `auto_dict_encode`.
-        This value is per chunk.
-    timestamp_parsers : list, optional
-        A sequence of strptime()-compatible format strings, tried in order
-        when attempting to infer or convert timestamp values (the special
-        value ISO8601() can also be given).  By default, a fast built-in
-        ISO-8601 parser is used.
-
-    Examples
-    --------
-
-    Defining an example data:
-
-    >>> import io
-    >>> s = (
-    ...     "animals,n_legs,entry,fast\\n"
-    ...     "Flamingo,2,01/03/2022,Yes\\n"
-    ...     "Horse,4,02/03/2022,Yes\\n"
-    ...     "Brittle stars,5,03/03/2022,No\\n"
-    ...     "Centipede,100,04/03/2022,No\\n"
-    ...     ",6,05/03/2022,"
-    ... )
-    >>> print(s)
-    animals,n_legs,entry,fast
-    Flamingo,2,01/03/2022,Yes
-    Horse,4,02/03/2022,Yes
-    Brittle stars,5,03/03/2022,No
-    Centipede,100,04/03/2022,No
-    ,6,05/03/2022,
-
-    Change the type of a column:
-
-    >>> import pyarrow as pa
-    >>> from pyarrow import csv
-    >>> convert_options = csv.ConvertOptions(column_types={"n_legs": pa.float64()})
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: double
-    entry: string
-    fast: string
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-    n_legs: [[2,4,5,100,6]]
-    entry: [["01/03/2022","02/03/2022","03/03/2022","04/03/2022","05/03/2022"]]
-    fast: [["Yes","Yes","No","No",""]]
-
-    Define a date parsing format to get a timestamp type column
-    (in case dates are not in ISO format and not converted by default):
-
-    >>> convert_options = csv.ConvertOptions(timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"])
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    entry: timestamp[s]
-    fast: string
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-    n_legs: [[2,4,5,100,6]]
-    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
-    fast: [["Yes","Yes","No","No",""]]
-
-    Specify a subset of columns to be read:
-
-    >>> convert_options = csv.ConvertOptions(include_columns=["animals", "n_legs"])
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-    n_legs: [[2,4,5,100,6]]
-
-    List additional column to be included as a null typed column:
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     include_columns=["animals", "n_legs", "location"], include_missing_columns=True
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    location: null
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-    n_legs: [[2,4,5,100,6]]
-    location: [5 nulls]
-
-    Define columns as dictionary type (by default only the
-    string/binary columns are dictionary encoded):
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"], auto_dict_encode=True
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: dictionary<values=string, indices=int32, ordered=0>
-    n_legs: int64
-    entry: timestamp[s]
-    fast: dictionary<values=string, indices=int32, ordered=0>
-    ----
-    animals: [  -- dictionary:
-    ["Flamingo","Horse","Brittle stars","Centipede",""]  -- indices:
-    [0,1,2,3,4]]
-    n_legs: [[2,4,5,100,6]]
-    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
-    fast: [  -- dictionary:
-    ["Yes","No",""]  -- indices:
-    [0,0,1,1,2]]
-
-    Set upper limit for the number of categories. If the categories
-    is more than the limit, the conversion to dictionary will not
-    happen:
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     include_columns=["animals"], auto_dict_encode=True, auto_dict_max_cardinality=2
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-
-    Set empty strings to missing values:
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     include_columns=["animals", "n_legs"], strings_can_be_null=True
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",null]]
-    n_legs: [[2,4,5,100,6]]
-
-    Define values to be True and False when converting a column
-    into a bool type:
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     include_columns=["fast"], false_values=["No"], true_values=["Yes"]
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    fast: bool
-    ----
-    fast: [[true,true,false,false,null]]
-    """
-
-    check_utf8: bool = field(default=True, kw_only=False)
-    column_types: lib.Schema | dict | None = None
-    null_values: list[str] | None = None
-    true_values: list[str] | None = None
-    false_values: list[str] | None = None
-    decimal_point: str = "."
-    strings_can_be_null: bool = False
-    quoted_strings_can_be_null: bool = True
-    include_columns: list[str] | None = None
-    include_missing_columns: bool = False
-    auto_dict_encode: bool = False
-    auto_dict_max_cardinality: int | None = None
-    timestamp_parsers: list[str] | None = None
-
-    def validate(self) -> None: ...
-
-@dataclass(kw_only=True)
-class WriteOptions(lib._Weakrefable):
-    """
-    Options for writing CSV files.
-
-    Parameters
-    ----------
-    include_header : bool, optional (default True)
-        Whether to write an initial header line with column names
-    batch_size : int, optional (default 1024)
-        How many rows to process together when converting and writing
-        CSV data
-    delimiter : 1-character string, optional (default ",")
-        The character delimiting individual cells in the CSV data.
-    quoting_style : str, optional (default "needed")
-        Whether to quote values, and if so, which quoting style to use.
-        The following values are accepted:
-
-        - "needed" (default): only enclose values in quotes when needed.
-        - "all_valid": enclose all valid values in quotes; nulls are not quoted.
-        - "none": do not enclose any values in quotes; values containing
-          special characters (such as quotes, cell delimiters or line endings)
-          will raise an error.
-    """
-
-    include_header: bool = field(default=True, kw_only=False)
-    batch_size: int = 1024
-    delimiter: str = ","
-    quoting_style: Literal["needed", "all_valid", "none"] = "needed"
-
-    def validate(self) -> None: ...
-
-@dataclass
-class InvalidRow(lib._Weakrefable):
-    """
-    Description of an invalid row in a CSV file.
-
-    Parameters
-    ----------
-    expected_columns : int
-        The expected number of columns in the row.
-    actual_columns : int
-        The actual number of columns in the row.
-    number : int or None
-        The physical row number if known, otherwise None.
-    text : str
-        The contents of the row.
-    """
-
-    expected_columns: int
-    actual_columns: int
-    number: int | None
-    text: str
-
-class CSVWriter(lib._CRecordBatchWriter):
-    """
-    Writer to create a CSV file.
-
-    Parameters
-    ----------
-    sink : str, path, pyarrow.OutputStream or file-like object
-        The location where to write the CSV data.
-    schema : pyarrow.Schema
-        The schema of the data to be written.
-    write_options : pyarrow.csv.WriteOptions
-        Options to configure writing the CSV data.
-    memory_pool : MemoryPool, optional
-        Pool for temporary allocations.
-    """
-
-    def __init__(
-        self,
-        # TODO: OutputStream
-        sink: StrPath | IO[Any],
-        schema: lib.Schema,
-        write_options: WriteOptions | None = None,
-        *,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> None: ...
-
-class CSVStreamingReader(lib.RecordBatchReader): ...
-
-ISO8601: lib._Weakrefable
-
-def open_csv(
-    input_file: StrPath | IO[Any],
-    read_options: ReadOptions | None = None,
-    parse_options: ParseOptions | None = None,
-    convert_options: ConvertOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> CSVStreamingReader:
-    """
-    Open a streaming reader of CSV data.
-
-    Reading using this function is always single-threaded.
-
-    Parameters
-    ----------
-    input_file : string, path or file-like object
-        The location of CSV data.  If a string or path, and if it ends
-        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
-        the data is automatically decompressed when reading.
-    read_options : pyarrow.csv.ReadOptions, optional
-        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
-        for defaults)
-    parse_options : pyarrow.csv.ParseOptions, optional
-        Options for the CSV parser
-        (see pyarrow.csv.ParseOptions constructor for defaults)
-    convert_options : pyarrow.csv.ConvertOptions, optional
-        Options for converting CSV data
-        (see pyarrow.csv.ConvertOptions constructor for defaults)
-    memory_pool : MemoryPool, optional
-        Pool to allocate RecordBatch memory from
-
-    Returns
-    -------
-    :class:`pyarrow.csv.CSVStreamingReader`
-    """
-
-def read_csv(
-    input_file: StrPath | IO[Any],
-    read_options: ReadOptions | None = None,
-    parse_options: ParseOptions | None = None,
-    convert_options: ConvertOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Table:
-    """
-    Read a Table from a stream of CSV data.
-
-    Parameters
-    ----------
-    input_file : string, path or file-like object
-        The location of CSV data.  If a string or path, and if it ends
-        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
-        the data is automatically decompressed when reading.
-    read_options : pyarrow.csv.ReadOptions, optional
-        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
-        for defaults)
-    parse_options : pyarrow.csv.ParseOptions, optional
-        Options for the CSV parser
-        (see pyarrow.csv.ParseOptions constructor for defaults)
-    convert_options : pyarrow.csv.ConvertOptions, optional
-        Options for converting CSV data
-        (see pyarrow.csv.ConvertOptions constructor for defaults)
-    memory_pool : MemoryPool, optional
-        Pool to allocate Table memory from
-
-    Returns
-    -------
-    :class:`pyarrow.Table`
-        Contents of the CSV file as a in-memory table.
-
-    Examples
-    --------
-
-    Defining an example file from bytes object:
-
-    >>> import io
-    >>> s = (
-    ...     "animals,n_legs,entry\\n"
-    ...     "Flamingo,2,2022-03-01\\n"
-    ...     "Horse,4,2022-03-02\\n"
-    ...     "Brittle stars,5,2022-03-03\\n"
-    ...     "Centipede,100,2022-03-04"
-    ... )
-    >>> print(s)
-    animals,n_legs,entry
-    Flamingo,2,2022-03-01
-    Horse,4,2022-03-02
-    Brittle stars,5,2022-03-03
-    Centipede,100,2022-03-04
-    >>> source = io.BytesIO(s.encode())
-
-    Reading from the file
-
-    >>> from pyarrow import csv
-    >>> csv.read_csv(source)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    entry: date32[day]
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    n_legs: [[2,4,5,100]]
-    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
-    """
-
-def write_csv(
-    data: lib.RecordBatch | lib.Table,
-    output_file: StrPath | lib.NativeFile | IO[Any],
-    write_options: WriteOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> None:
-    """
-    Write record batch or table to a CSV file.
-
-    Parameters
-    ----------
-    data : pyarrow.RecordBatch or pyarrow.Table
-        The data to write.
-    output_file : string, path, pyarrow.NativeFile, or file-like object
-        The location where to write the CSV data.
-    write_options : pyarrow.csv.WriteOptions
-        Options to configure writing the CSV data.
-    memory_pool : MemoryPool, optional
-        Pool for temporary allocations.
-
-    Examples
-    --------
-
-    >>> import pyarrow as pa
-    >>> from pyarrow import csv
-
-    >>> legs = pa.array([2, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-    >>> entry_date = pa.array(["01/03/2022", "02/03/2022", "03/03/2022", "04/03/2022"])
-    >>> table = pa.table([animals, legs, entry_date], names=["animals", "n_legs", "entry"])
-
-    >>> csv.write_csv(table, "animals.csv")
-
-    >>> write_options = csv.WriteOptions(include_header=False)
-    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
-
-    >>> write_options = csv.WriteOptions(delimiter=";")
-    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
-    """
diff --git a/python/pyarrow/_cuda.pyi b/python/pyarrow/_cuda.pyi
deleted file mode 100644
index 6bcd9868d7f..00000000000
--- a/python/pyarrow/_cuda.pyi
+++ /dev/null
@@ -1,573 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any
-
-import cuda  # type: ignore[import-not-found]
-
-from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-not-found]
-
-from . import lib
-from ._stubs_typing import ArrayLike
-
-class Context(lib._Weakrefable):
-    """
-    CUDA driver context.
-    """
-
-    def __init__(self, device_number: int = 0, handle: int | None = None) -> None:
-        """
-        Create a CUDA driver context for a particular device.
-
-        If a CUDA context handle is passed, it is wrapped, otherwise
-        a default CUDA context for the given device is requested.
-
-        Parameters
-        ----------
-        device_number : int (default 0)
-          Specify the GPU device for which the CUDA driver context is
-          requested.
-        handle : int, optional
-          Specify CUDA handle for a shared context that has been created
-          by another library.
-        """
-    @staticmethod
-    def from_numba(context: _numba_driver.Context | None = None) -> Context:
-        """
-        Create a Context instance from a Numba CUDA context.
-
-        Parameters
-        ----------
-        context : {numba.cuda.cudadrv.driver.Context, None}
-          A Numba CUDA context instance.
-          If None, the current Numba context is used.
-
-        Returns
-        -------
-        shared_context : pyarrow.cuda.Context
-          Context instance.
-        """
-    def to_numba(self) -> _numba_driver.Context:
-        """
-        Convert Context to a Numba CUDA context.
-
-        Returns
-        -------
-        context : numba.cuda.cudadrv.driver.Context
-          Numba CUDA context instance.
-        """
-    @staticmethod
-    def get_num_devices() -> int:
-        """Return the number of GPU devices."""
-    @property
-    def device_number(self) -> int:
-        """Return context device number."""
-    @property
-    def handle(self) -> int:
-        """Return pointer to context handle."""
-    def synchronize(self) -> None:
-        """Blocks until the device has completed all preceding requested
-        tasks.
-        """
-    @property
-    def bytes_allocated(self) -> int:
-        """Return the number of allocated bytes."""
-    def get_device_address(self, address: int) -> int:
-        """Return the device address that is reachable from kernels running in
-        the context
-
-        Parameters
-        ----------
-        address : int
-          Specify memory address value
-
-        Returns
-        -------
-        device_address : int
-          Device address accessible from device context
-
-        Notes
-        -----
-        The device address is defined as a memory address accessible
-        by device. While it is often a device memory address but it
-        can be also a host memory address, for instance, when the
-        memory is allocated as host memory (using cudaMallocHost or
-        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
-        or the host memory is page-locked (using cudaHostRegister).
-        """
-    def new_buffer(self, nbytes: int) -> CudaBuffer:
-        """Return new device buffer.
-
-        Parameters
-        ----------
-        nbytes : int
-          Specify the number of bytes to be allocated.
-
-        Returns
-        -------
-        buf : CudaBuffer
-          Allocated buffer.
-        """
-    @property
-    def memory_manager(self) -> lib.MemoryManager:
-        """
-        The default memory manager tied to this context's device.
-
-        Returns
-        -------
-        MemoryManager
-        """
-    @property
-    def device(self) -> lib.Device:
-        """
-        The device instance associated with this context.
-
-        Returns
-        -------
-        Device
-        """
-    def foreign_buffer(self, address: int, size: int, base: Any | None = None) -> CudaBuffer:
-        """
-        Create device buffer from address and size as a view.
-
-        The caller is responsible for allocating and freeing the
-        memory. When `address==size==0` then a new zero-sized buffer
-        is returned.
-
-        Parameters
-        ----------
-        address : int
-          Specify the starting address of the buffer. The address can
-          refer to both device or host memory but it must be
-          accessible from device after mapping it with
-          `get_device_address` method.
-        size : int
-          Specify the size of device buffer in bytes.
-        base : {None, object}
-          Specify object that owns the referenced memory.
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of device reachable memory.
-
-        """
-    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer:
-        """Open existing CUDA IPC memory handle
-
-        Parameters
-        ----------
-        ipc_handle : IpcMemHandle
-          Specify opaque pointer to CUipcMemHandle (driver API).
-
-        Returns
-        -------
-        buf : CudaBuffer
-          referencing device buffer
-        """
-    def buffer_from_data(
-        self,
-        data: CudaBuffer | HostBuffer | lib.Buffer | ArrayLike,
-        offset: int = 0,
-        size: int = -1,
-    ) -> CudaBuffer:
-        """Create device buffer and initialize with data.
-
-        Parameters
-        ----------
-        data : {CudaBuffer, HostBuffer, Buffer, array-like}
-          Specify data to be copied to device buffer.
-        offset : int
-          Specify the offset of input buffer for device data
-          buffering. Default: 0.
-        size : int
-          Specify the size of device buffer in bytes. Default: all
-          (starting from input offset)
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer with copied data.
-        """
-    def buffer_from_object(self, obj: Any) -> CudaBuffer:
-        """Create device buffer view of arbitrary object that references
-        device accessible memory.
-
-        When the object contains a non-contiguous view of device
-        accessible memory then the returned device buffer will contain
-        contiguous view of the memory, that is, including the
-        intermediate data that is otherwise invisible to the input
-        object.
-
-        Parameters
-        ----------
-        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
-          Specify an object that holds (device or host) address that
-          can be accessed from device. This includes objects with
-          types defined in pyarrow.cuda as well as arbitrary objects
-          that implement the CUDA array interface as defined by numba.
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of device accessible memory.
-
-        """
-
-class IpcMemHandle(lib._Weakrefable):
-    """A serializable container for a CUDA IPC handle."""
-    @staticmethod
-    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle:
-        """Create IpcMemHandle from opaque buffer (e.g. from another
-        process)
-
-        Parameters
-        ----------
-        opaque_handle :
-          a CUipcMemHandle as a const void*
-
-        Returns
-        -------
-        ipc_handle : IpcMemHandle
-        """
-    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer:
-        """Write IpcMemHandle to a Buffer
-
-        Parameters
-        ----------
-        pool : {MemoryPool, None}
-          Specify a pool to allocate memory from
-
-        Returns
-        -------
-        buf : Buffer
-          The serialized buffer.
-        """
-
-class CudaBuffer(lib.Buffer):
-    """An Arrow buffer with data located in a GPU device.
-
-    To create a CudaBuffer instance, use Context.device_buffer().
-
-    The memory allocated in a CudaBuffer is freed when the buffer object
-    is deleted.
-    """
-
-    @staticmethod
-    def from_buffer(buf: lib.Buffer) -> CudaBuffer:
-        """Convert back generic buffer into CudaBuffer
-
-        Parameters
-        ----------
-        buf : Buffer
-          Specify buffer containing CudaBuffer
-
-        Returns
-        -------
-        dbuf : CudaBuffer
-          Resulting device buffer.
-        """
-    @staticmethod
-    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer:
-        """Create a CudaBuffer view from numba MemoryPointer instance.
-
-        Parameters
-        ----------
-        mem :  numba.cuda.cudadrv.driver.MemoryPointer
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of numba MemoryPointer.
-        """
-    def to_numba(self) -> _numba_driver.MemoryPointer:
-        """Return numba memory pointer of CudaBuffer instance."""
-    def copy_to_host(
-        self,
-        position: int = 0,
-        nbytes: int = -1,
-        buf: lib.Buffer | None = None,
-        memory_pool: lib.MemoryPool | None = None,
-        resizable: bool = False,
-    ) -> lib.Buffer:
-        """Copy memory from GPU device to CPU host
-
-        Caller is responsible for ensuring that all tasks affecting
-        the memory are finished. Use
-
-          `<CudaBuffer instance>.context.synchronize()`
-
-        when needed.
-
-        Parameters
-        ----------
-        position : int
-          Specify the starting position of the source data in GPU
-          device buffer. Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          the position until host buffer is full).
-        buf : Buffer
-          Specify a pre-allocated output buffer in host. Default: None
-          (allocate new output buffer).
-        memory_pool : MemoryPool
-        resizable : bool
-          Specify extra arguments to allocate_buffer. Used only when
-          buf is None.
-
-        Returns
-        -------
-        buf : Buffer
-          Output buffer in host.
-
-        """
-    def copy_from_host(
-        self, data: lib.Buffer | ArrayLike, position: int = 0, nbytes: int = -1
-    ) -> int:
-        """Copy data from host to device.
-
-        The device buffer must be pre-allocated.
-
-        Parameters
-        ----------
-        data : {Buffer, array-like}
-          Specify data in host. It can be array-like that is valid
-          argument to py_buffer
-        position : int
-          Specify the starting position of the copy in device buffer.
-          Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          source until device buffer, starting from position, is full)
-
-        Returns
-        -------
-        nbytes : int
-          Number of bytes copied.
-        """
-    def copy_from_device(self, buf: CudaBuffer, position: int = 0, nbytes: int = -1) -> int:
-        """Copy data from device to device.
-
-        Parameters
-        ----------
-        buf : CudaBuffer
-          Specify source device buffer.
-        position : int
-          Specify the starting position of the copy in device buffer.
-          Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          source until device buffer, starting from position, is full)
-
-        Returns
-        -------
-        nbytes : int
-          Number of bytes copied.
-
-        """
-    def export_for_ipc(self) -> IpcMemHandle:
-        """
-        Expose this device buffer as IPC memory which can be used in other
-        processes.
-
-        After calling this function, this device memory will not be
-        freed when the CudaBuffer is destructed.
-
-        Returns
-        -------
-        ipc_handle : IpcMemHandle
-          The exported IPC handle
-
-        """
-    @property
-    def context(self) -> Context:
-        """Returns the CUDA driver context of this buffer."""
-    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer:
-        """Return slice of device buffer
-
-        Parameters
-        ----------
-        offset : int, default 0
-          Specify offset from the start of device buffer to slice
-        length : int, default None
-          Specify the length of slice (default is until end of device
-          buffer starting from offset). If the length is larger than
-          the data available, the returned slice will have a size of
-          the available data starting from the offset.
-
-        Returns
-        -------
-        sliced : CudaBuffer
-          Zero-copy slice of device buffer.
-
-        """
-    def to_pybytes(self) -> bytes:
-        """Return device buffer content as Python bytes."""
-
-class HostBuffer(lib.Buffer):
-    """Device-accessible CPU memory created using cudaHostAlloc.
-
-    To create a HostBuffer instance, use
-
-      cuda.new_host_buffer(<nbytes>)
-    """
-    @property
-    def size(self) -> int: ...
-
-class BufferReader(lib.NativeFile):
-    """File interface for zero-copy read from CUDA buffers.
-
-    Note: Read methods return pointers to device memory. This means
-    you must be careful using this interface with any Arrow code which
-    may expect to be able to do anything other than pointer arithmetic
-    on the returned buffers.
-    """
-    def __init__(self, obj: CudaBuffer) -> None: ...
-    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer:
-        """Return a slice view of the underlying device buffer.
-
-        The slice will start at the current reader position and will
-        have specified size in bytes.
-
-        Parameters
-        ----------
-        nbytes : int, default None
-          Specify the number of bytes to read. Default: None (read all
-          remaining bytes).
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          New device buffer.
-
-        """
-
-class BufferWriter(lib.NativeFile):
-    """File interface for writing to CUDA buffers.
-
-    By default writes are unbuffered. Use set_buffer_size to enable
-    buffering.
-    """
-    def __init__(self, obj: CudaBuffer) -> None: ...
-    def writeat(self, position: int, data: ArrayLike) -> None:
-        """Write data to buffer starting from position.
-
-        Parameters
-        ----------
-        position : int
-          Specify device buffer position where the data will be
-          written.
-        data : array-like
-          Specify data, the data instance must implement buffer
-          protocol.
-        """
-    @property
-    def buffer_size(self) -> int:
-        """Returns size of host (CPU) buffer, 0 for unbuffered"""
-    @buffer_size.setter
-    def buffer_size(self, buffer_size: int):
-        """Set CPU buffer size to limit calls to cudaMemcpy
-
-        Parameters
-        ----------
-        buffer_size : int
-          Specify the size of CPU buffer to allocate in bytes.
-        """
-    @property
-    def num_bytes_buffered(self) -> int:
-        """Returns number of bytes buffered on host"""
-
-def new_host_buffer(size: int, device: int = 0) -> HostBuffer:
-    """Return buffer with CUDA-accessible memory on CPU host
-
-    Parameters
-    ----------
-    size : int
-      Specify the number of bytes to be allocated.
-    device : int
-      Specify GPU device number.
-
-    Returns
-    -------
-    dbuf : HostBuffer
-      Allocated host buffer
-    """
-
-def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer:
-    """Write record batch message to GPU device memory
-
-    Parameters
-    ----------
-    batch : RecordBatch
-      Record batch to write
-    ctx : Context
-      CUDA Context to allocate device memory from
-
-    Returns
-    -------
-    dbuf : CudaBuffer
-      device buffer which contains the record batch message
-    """
-
-def read_message(
-    source: CudaBuffer | cuda.BufferReader, pool: lib.MemoryManager | None = None
-) -> lib.Message:
-    """Read Arrow IPC message located on GPU device
-
-    Parameters
-    ----------
-    source : {CudaBuffer, cuda.BufferReader}
-      Device buffer or reader of device buffer.
-    pool : MemoryPool (optional)
-      Pool to allocate CPU memory for the metadata
-
-    Returns
-    -------
-    message : Message
-      The deserialized message, body still on device
-    """
-
-def read_record_batch(
-    buffer: lib.Buffer,
-    object: lib.Schema,
-    *,
-    dictionary_memo: lib.DictionaryMemo | None = None,
-    pool: lib.MemoryPool | None = None,
-) -> lib.RecordBatch:
-    """Construct RecordBatch referencing IPC message located on CUDA device.
-
-    While the metadata is copied to host memory for deserialization,
-    the record batch data remains on the device.
-
-    Parameters
-    ----------
-    buffer :
-      Device buffer containing the complete IPC message
-    schema : Schema
-      The schema for the record batch
-    dictionary_memo : DictionaryMemo, optional
-        If message contains dictionaries, must pass a populated
-        DictionaryMemo
-    pool : MemoryPool (optional)
-      Pool to allocate metadata from
-
-    Returns
-    -------
-    batch : RecordBatch
-      Reconstructed record batch, with device pointers
-
-    """
diff --git a/python/pyarrow/_dataset.pyi b/python/pyarrow/_dataset.pyi
deleted file mode 100644
index 4980cb0420f..00000000000
--- a/python/pyarrow/_dataset.pyi
+++ /dev/null
@@ -1,2318 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-from typing import (
-    IO,
-    Any,
-    Callable,
-    Generic,
-    Iterator,
-    Literal,
-    NamedTuple,
-    TypeVar,
-    overload,
-)
-
-from _typeshed import StrPath
-
-from . import _csv, _json, _parquet, lib
-from ._fs import FileSelector, FileSystem, SupportedFileSystem
-from ._stubs_typing import Indices, JoinType, Order
-from .acero import ExecNodeOptions
-from .compute import Expression
-from .ipc import IpcWriteOptions, RecordBatchReader
-
-class Dataset(lib._Weakrefable):
-    """
-    Collection of data fragments and potentially child datasets.
-
-    Arrow Datasets allow you to query against data that has been split across
-    multiple files. This sharding of data may indicate partitioning, which
-    can accelerate queries that only touch some partitions (files).
-    """
-
-    @property
-    def partition_expression(self) -> Expression:
-        """
-        An Expression which evaluates to true for all data viewed by this
-        Dataset.
-        """
-    def replace_schema(self, schema: lib.Schema) -> None:
-        """
-        Return a copy of this Dataset with a different schema.
-
-        The copy will view the same Fragments. If the new schema is not
-        compatible with the original dataset's schema then an error will
-        be raised.
-
-        Parameters
-        ----------
-        schema : Schema
-            The new dataset schema.
-        """
-    def get_fragments(self, filter: Expression | None = None):
-        """Returns an iterator over the fragments in this dataset.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Return fragments matching the optional filter, either using the
-            partition_expression or internal information like Parquet's
-            statistics.
-
-        Returns
-        -------
-        fragments : iterator of Fragment
-        """
-    def scanner(
-        self,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner:
-        """
-        Build a scan operation against the dataset.
-
-        Data is not loaded immediately. Instead, this produces a Scanner,
-        which exposes further operations (e.g. loading all data as a
-        table, counting rows).
-
-        See the :meth:`Scanner.from_dataset` method for further information.
-
-        Parameters
-        ----------
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        scanner : Scanner
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>>
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "dataset_scanner.parquet")
-
-        >>> import pyarrow.dataset as ds
-        >>> dataset = ds.dataset("dataset_scanner.parquet")
-
-        Selecting a subset of the columns:
-
-        >>> dataset.scanner(columns=["year", "n_legs"]).to_table()
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        ----
-        year: [[2020,2022,2021,2022,2019,2021]]
-        n_legs: [[2,2,4,4,5,100]]
-
-        Projecting selected columns using an expression:
-
-        >>> dataset.scanner(
-        ...     columns={
-        ...         "n_legs_uint": ds.field("n_legs").cast("uint8"),
-        ...     }
-        ... ).to_table()
-        pyarrow.Table
-        n_legs_uint: uint8
-        ----
-        n_legs_uint: [[2,2,4,4,5,100]]
-
-        Filtering rows while scanning:
-
-        >>> dataset.scanner(filter=ds.field("year") > 2020).to_table()
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        year: [[2022,2021,2022,2021]]
-        n_legs: [[2,4,4,100]]
-        animal: [["Parrot","Dog","Horse","Centipede"]]
-        """
-    def to_batches(
-        self,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> Iterator[lib.RecordBatch]:
-        """
-        Read the dataset as materialized record batches.
-
-        Parameters
-        ----------
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
-    def to_table(
-        self,
-        columns: list[str] | dict[str, Expression] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Read the dataset to an Arrow table.
-
-        Note that this method reads all the selected data from the dataset
-        into memory.
-
-        Parameters
-        ----------
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        table : Table
-        """
-    def take(
-        self,
-        indices: Indices,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Select rows of data by index.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            indices of rows to select in the dataset.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        table : Table
-        """
-    def head(
-        self,
-        num_rows: int,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Load the first N rows of the dataset.
-
-        Parameters
-        ----------
-        num_rows : int
-            The number of rows to load.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        table : Table
-        """
-    def count_rows(
-        self,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> int:
-        """
-        Count rows matching the scanner filter.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        count : int
-        """
-    @property
-    def schema(self) -> lib.Schema:
-        """The common schema of the full Dataset"""
-    def filter(self, expression: Expression) -> Self:
-        """
-        Apply a row filter to the dataset.
-
-        Parameters
-        ----------
-        expression : Expression
-            The filter that should be applied to the dataset.
-
-        Returns
-        -------
-        Dataset
-        """
-    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> InMemoryDataset:
-        """
-        Sort the Dataset by one or multiple columns.
-
-        Parameters
-        ----------
-        sorting : str or list[tuple(name, order)]
-            Name of the column to use to sort (ascending), or
-            a list of multiple sorting conditions where
-            each entry is a tuple with column name
-            and sorting order ("ascending" or "descending")
-        **kwargs : dict, optional
-            Additional sorting options.
-            As allowed by :class:`SortOptions`
-
-        Returns
-        -------
-        InMemoryDataset
-            A new dataset sorted according to the sort keys.
-        """
-    def join(
-        self,
-        right_dataset: Dataset,
-        keys: str | list[str],
-        right_keys: str | list[str] | None = None,
-        join_type: JoinType = "left outer",
-        left_suffix: str | None = None,
-        right_suffix: str | None = None,
-        coalesce_keys: bool = True,
-        use_threads: bool = True,
-    ) -> InMemoryDataset:
-        """
-        Perform a join between this dataset and another one.
-
-        Result of the join will be a new dataset, where further
-        operations can be applied.
-
-        Parameters
-        ----------
-        right_dataset : dataset
-            The dataset to join to the current one, acting as the right dataset
-            in the join operation.
-        keys : str or list[str]
-            The columns from current dataset that should be used as keys
-            of the join operation left side.
-        right_keys : str or list[str], default None
-            The columns from the right_dataset that should be used as keys
-            on the join operation right side.
-            When ``None`` use the same key names as the left dataset.
-        join_type : str, default "left outer"
-            The kind of join that should be performed, one of
-            ("left semi", "right semi", "left anti", "right anti",
-            "inner", "left outer", "right outer", "full outer")
-        left_suffix : str, default None
-            Which suffix to add to right column names. This prevents confusion
-            when the columns in left and right datasets have colliding names.
-        right_suffix : str, default None
-            Which suffix to add to the left column names. This prevents confusion
-            when the columns in left and right datasets have colliding names.
-        coalesce_keys : bool, default True
-            If the duplicated keys should be omitted from one of the sides
-            in the join result.
-        use_threads : bool, default True
-            Whenever to use multithreading or not.
-
-        Returns
-        -------
-        InMemoryDataset
-        """
-    def join_asof(
-        self,
-        right_dataset: Dataset,
-        on: str,
-        by: str | list[str],
-        tolerance: int,
-        right_on: str | list[str] | None = None,
-        right_by: str | list[str] | None = None,
-    ) -> InMemoryDataset:
-        """
-        Perform an asof join between this dataset and another one.
-
-        This is similar to a left-join except that we match on nearest key rather
-        than equal keys. Both datasets must be sorted by the key. This type of join
-        is most useful for time series data that are not perfectly aligned.
-
-        Optionally match on equivalent keys with "by" before searching with "on".
-
-        Result of the join will be a new Dataset, where further
-        operations can be applied.
-
-        Parameters
-        ----------
-        right_dataset : dataset
-            The dataset to join to the current one, acting as the right dataset
-            in the join operation.
-        on : str
-            The column from current dataset that should be used as the "on" key
-            of the join operation left side.
-
-            An inexact match is used on the "on" key, i.e. a row is considered a
-            match if and only if left_on - tolerance <= right_on <= left_on.
-
-            The input table must be sorted by the "on" key. Must be a single
-            field of a common type.
-
-            Currently, the "on" key must be an integer, date, or timestamp type.
-        by : str or list[str]
-            The columns from current dataset that should be used as the keys
-            of the join operation left side. The join operation is then done
-            only for the matches in these columns.
-        tolerance : int
-            The tolerance for inexact "on" key matching. A right row is considered
-            a match with the left row `right.on - left.on <= tolerance`. The
-            `tolerance` may be:
-
-            - negative, in which case a past-as-of-join occurs;
-            - or positive, in which case a future-as-of-join occurs;
-            - or zero, in which case an exact-as-of-join occurs.
-
-            The tolerance is interpreted in the same units as the "on" key.
-        right_on : str or list[str], default None
-            The columns from the right_dataset that should be used as the on key
-            on the join operation right side.
-            When ``None`` use the same key name as the left dataset.
-        right_by : str or list[str], default None
-            The columns from the right_dataset that should be used as by keys
-            on the join operation right side.
-            When ``None`` use the same key names as the left dataset.
-
-        Returns
-        -------
-        InMemoryDataset
-        """
-
-class InMemoryDataset(Dataset):
-    """
-    A Dataset wrapping in-memory data.
-
-    Parameters
-    ----------
-    source : RecordBatch, Table, list, tuple
-        The data for this dataset. Can be a RecordBatch, Table, list of
-        RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
-        If an iterable is provided, the schema must also be provided.
-    schema : Schema, optional
-        Only required if passing an iterable as the source
-    """
-
-class UnionDataset(Dataset):
-    """
-    A Dataset wrapping child datasets.
-
-    Children's schemas must agree with the provided schema.
-
-    Parameters
-    ----------
-    schema : Schema
-        A known schema to conform to.
-    children : list of Dataset
-        One or more input children
-    """
-
-    @property
-    def children(self) -> list[Dataset]: ...
-
-class FileSystemDataset(Dataset):
-    """
-    A Dataset of file fragments.
-
-    A FileSystemDataset is composed of one or more FileFragment.
-
-    Parameters
-    ----------
-    fragments : list[Fragments]
-        List of fragments to consume.
-    schema : Schema
-        The top-level schema of the Dataset.
-    format : FileFormat
-        File format of the fragments, currently only ParquetFileFormat,
-        IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
-    filesystem : FileSystem
-        FileSystem of the fragments.
-    root_partition : Expression, optional
-        The top-level partition of the DataDataset.
-    """
-
-    def __init__(
-        self,
-        fragments: list[Fragment],
-        schema: lib.Schema,
-        format: FileFormat,
-        filesystem: SupportedFileSystem | None = None,
-        root_partition: Expression | None = None,
-    ) -> None: ...
-    @classmethod
-    def from_paths(
-        cls,
-        paths: list[str],
-        schema: lib.Schema | None = None,
-        format: FileFormat | None = None,
-        filesystem: SupportedFileSystem | None = None,
-        partitions: list[Expression] | None = None,
-        root_partition: Expression | None = None,
-    ) -> FileSystemDataset:
-        """
-        A Dataset created from a list of paths on a particular filesystem.
-
-        Parameters
-        ----------
-        paths : list of str
-            List of file paths to create the fragments from.
-        schema : Schema
-            The top-level schema of the DataDataset.
-        format : FileFormat
-            File format to create fragments from, currently only
-            ParquetFileFormat, IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
-        filesystem : FileSystem
-            The filesystem which files are from.
-        partitions : list[Expression], optional
-            Attach additional partition information for the file paths.
-        root_partition : Expression, optional
-            The top-level partition of the DataDataset.
-        """
-    @property
-    def filesystem(self) -> FileSystem: ...
-    @property
-    def partitioning(self) -> Partitioning | None:
-        """
-        The partitioning of the Dataset source, if discovered.
-
-        If the FileSystemDataset is created using the ``dataset()`` factory
-        function with a partitioning specified, this will return the
-        finalized Partitioning object from the dataset discovery. In all
-        other cases, this returns None.
-        """
-    @property
-    def files(self) -> list[str]:
-        """List of the files"""
-    @property
-    def format(self) -> FileFormat:
-        """The FileFormat of this source."""
-
-class FileWriteOptions(lib._Weakrefable):
-    @property
-    def format(self) -> FileFormat: ...
-
-class FileFormat(lib._Weakrefable):
-    def inspect(
-        self, file: StrPath | IO, filesystem: SupportedFileSystem | None = None
-    ) -> lib.Schema:
-        """
-        Infer the schema of a file.
-
-        Parameters
-        ----------
-        file : file-like object, path-like or str
-            The file or file path to infer a schema from.
-        filesystem : Filesystem, optional
-            If `filesystem` is given, `file` must be a string and specifies
-            the path of the file to read from the filesystem.
-
-        Returns
-        -------
-        schema : Schema
-            The schema inferred from the file
-        """
-    def make_fragment(
-        self,
-        file: StrPath | IO,
-        filesystem: SupportedFileSystem | None = None,
-        partition_expression: Expression | None = None,
-        *,
-        file_size: int | None = None,
-    ) -> Fragment:
-        """
-        Make a FileFragment from a given file.
-
-        Parameters
-        ----------
-        file : file-like object, path-like or str
-            The file or file path to make a fragment from.
-        filesystem : Filesystem, optional
-            If `filesystem` is given, `file` must be a string and specifies
-            the path of the file to read from the filesystem.
-        partition_expression : Expression, optional
-            An expression that is guaranteed true for all rows in the fragment.  Allows
-            fragment to be potentially skipped while scanning with a filter.
-        file_size : int, optional
-            The size of the file in bytes. Can improve performance with high-latency filesystems
-            when file size needs to be known before reading.
-
-        Returns
-        -------
-        fragment : Fragment
-            The file fragment
-        """
-    def make_write_options(self) -> FileWriteOptions: ...
-    @property
-    def default_extname(self) -> str: ...
-    @property
-    def default_fragment_scan_options(self) -> FragmentScanOptions: ...
-    @default_fragment_scan_options.setter
-    def default_fragment_scan_options(self, options: FragmentScanOptions) -> None: ...
-
-class Fragment(lib._Weakrefable):
-    """Fragment of data from a Dataset."""
-    @property
-    def physical_schema(self) -> lib.Schema:
-        """Return the physical schema of this Fragment. This schema can be
-        different from the dataset read schema."""
-    @property
-    def partition_expression(self) -> Expression:
-        """An Expression which evaluates to true for all data viewed by this
-        Fragment.
-        """
-    def scanner(
-        self,
-        schema: lib.Schema | None = None,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner:
-        """
-        Build a scan operation against the fragment.
-
-        Data is not loaded immediately. Instead, this produces a Scanner,
-        which exposes further operations (e.g. loading all data as a
-        table, counting rows).
-
-        Parameters
-        ----------
-        schema : Schema
-            Schema to use for scanning. This is used to unify a Fragment to
-            its Dataset's schema. If not specified this will use the
-            Fragment's physical schema which might differ for each Fragment.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        scanner : Scanner
-        """
-    def to_batches(
-        self,
-        schema: lib.Schema | None = None,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> Iterator[lib.RecordBatch]:
-        """
-        Read the fragment as materialized record batches.
-
-        Parameters
-        ----------
-        schema : Schema, optional
-            Concrete schema to use for scanning.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
-    def to_table(
-        self,
-        schema: lib.Schema | None = None,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Convert this Fragment into a Table.
-
-        Use this convenience utility with care. This will serially materialize
-        the Scan result in memory before creating the Table.
-
-        Parameters
-        ----------
-        schema : Schema, optional
-            Concrete schema to use for scanning.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        table : Table
-        """
-    def take(
-        self,
-        indices: Indices,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Select rows of data by index.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            The indices of row to select in the dataset.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        Table
-        """
-    def head(
-        self,
-        num_rows: int,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Load the first N rows of the fragment.
-
-        Parameters
-        ----------
-        num_rows : int
-            The number of rows to load.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        Table
-        """
-    def count_rows(
-        self,
-        columns: list[str] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> int:
-        """
-        Count rows matching the scanner filter.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        count : int
-        """
-
-class FileFragment(Fragment):
-    """A Fragment representing a data file."""
-
-    def open(self) -> lib.NativeFile:
-        """
-        Open a NativeFile of the buffer or file viewed by this fragment.
-        """
-    @property
-    def path(self) -> str:
-        """
-        The path of the data file viewed by this fragment, if it views a
-        file. If instead it views a buffer, this will be "<Buffer>".
-        """
-    @property
-    def filesystem(self) -> FileSystem:
-        """
-        The FileSystem containing the data file viewed by this fragment, if
-        it views a file. If instead it views a buffer, this will be None.
-        """
-    @property
-    def buffer(self) -> lib.Buffer:
-        """
-        The buffer viewed by this fragment, if it views a buffer. If
-        instead it views a file, this will be None.
-        """
-    @property
-    def format(self) -> FileFormat:
-        """
-        The format of the data file viewed by this fragment.
-        """
-
-class FragmentScanOptions(lib._Weakrefable):
-    """Scan options specific to a particular fragment and scan operation."""
-
-    @property
-    def type_name(self) -> str: ...
-
-class IpcFileWriteOptions(FileWriteOptions):
-    @property
-    def write_options(self) -> IpcWriteOptions: ...
-    @write_options.setter
-    def write_options(self, write_options: IpcWriteOptions) -> None: ...
-
-class IpcFileFormat(FileFormat):
-    def equals(self, other: IpcFileFormat) -> bool: ...
-    def make_write_options(self, **kwargs) -> IpcFileWriteOptions: ...
-    @property
-    def default_extname(self) -> str: ...
-
-class FeatherFileFormat(IpcFileFormat): ...
-
-class CsvFileFormat(FileFormat):
-    """
-    FileFormat for CSV files.
-
-    Parameters
-    ----------
-    parse_options : pyarrow.csv.ParseOptions
-        Options regarding CSV parsing.
-    default_fragment_scan_options : CsvFragmentScanOptions
-        Default options for fragments scan.
-    convert_options : pyarrow.csv.ConvertOptions
-        Options regarding value conversion.
-    read_options : pyarrow.csv.ReadOptions
-        General read options.
-    """
-    def __init__(
-        self,
-        parse_options: _csv.ParseOptions | None = None,
-        default_fragment_scan_options: CsvFragmentScanOptions | None = None,
-        convert_options: _csv.ConvertOptions | None = None,
-        read_options: _csv.ReadOptions | None = None,
-    ) -> None: ...
-    def make_write_options(self) -> _csv.WriteOptions: ...  # type: ignore[override]
-    @property
-    def parse_options(self) -> _csv.ParseOptions: ...
-    @parse_options.setter
-    def parse_options(self, parse_options: _csv.ParseOptions) -> None: ...
-    def equals(self, other: CsvFileFormat) -> bool: ...
-
-class CsvFragmentScanOptions(FragmentScanOptions):
-    """
-    Scan-specific options for CSV fragments.
-
-    Parameters
-    ----------
-    convert_options : pyarrow.csv.ConvertOptions
-        Options regarding value conversion.
-    read_options : pyarrow.csv.ReadOptions
-        General read options.
-    """
-
-    convert_options: _csv.ConvertOptions
-    read_options: _csv.ReadOptions
-
-    def __init__(
-        self, convert_options: _csv.ConvertOptions, read_options: _csv.ReadOptions
-    ) -> None: ...
-    def equals(self, other: CsvFragmentScanOptions) -> bool: ...
-
-class CsvFileWriteOptions(FileWriteOptions):
-    write_options: _csv.WriteOptions
-
-class JsonFileFormat(FileFormat):
-    """
-    FileFormat for JSON files.
-
-    Parameters
-    ----------
-    default_fragment_scan_options : JsonFragmentScanOptions
-        Default options for fragments scan.
-    parse_options : pyarrow.json.ParseOptions
-        Options regarding json parsing.
-    read_options : pyarrow.json.ReadOptions
-        General read options.
-    """
-    def __init__(
-        self,
-        default_fragment_scan_options: JsonFragmentScanOptions | None = None,
-        parse_options: _json.ParseOptions | None = None,
-        read_options: _json.ReadOptions | None = None,
-    ) -> None: ...
-    def equals(self, other: JsonFileFormat) -> bool: ...
-
-class JsonFragmentScanOptions(FragmentScanOptions):
-    """
-    Scan-specific options for JSON fragments.
-
-    Parameters
-    ----------
-    parse_options : pyarrow.json.ParseOptions
-        Options regarding JSON parsing.
-    read_options : pyarrow.json.ReadOptions
-        General read options.
-    """
-
-    parse_options: _json.ParseOptions
-    read_options: _json.ReadOptions
-    def __init__(
-        self, parse_options: _json.ParseOptions, read_options: _json.ReadOptions
-    ) -> None: ...
-    def equals(self, other: JsonFragmentScanOptions) -> bool: ...
-
-class Partitioning(lib._Weakrefable):
-    def parse(self, path: str) -> Expression:
-        """
-        Parse a path into a partition expression.
-
-        Parameters
-        ----------
-        path : str
-
-        Returns
-        -------
-        pyarrow.dataset.Expression
-        """
-    def format(self, expr: Expression) -> tuple[str, str]:
-        """
-        Convert a filter expression into a tuple of (directory, filename) using
-        the current partitioning scheme
-
-        Parameters
-        ----------
-        expr : pyarrow.dataset.Expression
-
-        Returns
-        -------
-        tuple[str, str]
-
-        Examples
-        --------
-
-        Specify the Schema for paths like "/2009/June":
-
-        >>> import pyarrow as pa
-        >>> import pyarrow.dataset as ds
-        >>> import pyarrow.compute as pc
-        >>> part = ds.partitioning(pa.schema([("year", pa.int16()), ("month", pa.string())]))
-        >>> part.format((pc.field("year") == 1862) & (pc.field("month") == "Jan"))
-        ('1862/Jan', '')
-        """
-    @property
-    def schema(self) -> lib.Schema:
-        """The arrow Schema attached to the partitioning."""
-
-class PartitioningFactory(lib._Weakrefable):
-    @property
-    def type_name(self) -> str: ...
-
-class KeyValuePartitioning(Partitioning):
-    @property
-    def dictionaries(self) -> list[lib.Array | None]:
-        """
-        The unique values for each partition field, if available.
-
-        Those values are only available if the Partitioning object was
-        created through dataset discovery from a PartitioningFactory, or
-        if the dictionaries were manually specified in the constructor.
-        If no dictionary field is available, this returns an empty list.
-        """
-
-class DirectoryPartitioning(KeyValuePartitioning):
-    """
-    A Partitioning based on a specified Schema.
-
-    The DirectoryPartitioning expects one segment in the file path for each
-    field in the schema (all fields are required to be present).
-    For example given schema<year:int16, month:int8> the path "/2009/11" would
-    be parsed to ("year"_ == 2009 and "month"_ == 11).
-
-    Parameters
-    ----------
-    schema : Schema
-        The schema that describes the partitions present in the file path.
-    dictionaries : dict[str, Array]
-        If the type of any field of `schema` is a dictionary type, the
-        corresponding entry of `dictionaries` must be an array containing
-        every value which may be taken by the corresponding column or an
-        error will be raised in parsing.
-    segment_encoding : str, default "uri"
-        After splitting paths into segments, decode the segments. Valid
-        values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-    Returns
-    -------
-    DirectoryPartitioning
-
-    Examples
-    --------
-    >>> from pyarrow.dataset import DirectoryPartitioning
-    >>> partitioning = DirectoryPartitioning(
-    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
-    ... )
-    >>> print(partitioning.parse("/2009/11/"))
-    ((year == 2009) and (month == 11))
-    """
-
-    @staticmethod
-    def discover(
-        field_names: list[str] | None = None,
-        infer_dictionary: bool = False,
-        max_partition_dictionary_size: int = 0,
-        schema: lib.Schema | None = None,
-        segment_encoding: Literal["uri", "none"] = "uri",
-    ) -> PartitioningFactory:
-        """
-        Discover a DirectoryPartitioning.
-
-        Parameters
-        ----------
-        field_names : list of str
-            The names to associate with the values from the subdirectory names.
-            If schema is given, will be populated from the schema.
-        infer_dictionary : bool, default False
-            When inferring a schema for partition fields, yield dictionary
-            encoded types instead of plain types. This can be more efficient
-            when materializing virtual columns, and Expressions parsed by the
-            finished Partitioning will include dictionaries of all unique
-            inspected values for each field.
-        max_partition_dictionary_size : int, default 0
-            Synonymous with infer_dictionary for backwards compatibility with
-            1.0: setting this to -1 or None is equivalent to passing
-            infer_dictionary=True.
-        schema : Schema, default None
-            Use this schema instead of inferring a schema from partition
-            values. Partition values will be validated against this schema
-            before accumulation into the Partitioning's dictionary.
-        segment_encoding : str, default "uri"
-            After splitting paths into segments, decode the segments. Valid
-            values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-        Returns
-        -------
-        PartitioningFactory
-            To be used in the FileSystemFactoryOptions.
-        """
-    def __init__(
-        self,
-        schema: lib.Schema,
-        dictionaries: dict[str, lib.Array] | None = None,
-        segment_encoding: Literal["uri", "none"] = "uri",
-    ) -> None: ...
-
-class HivePartitioning(KeyValuePartitioning):
-    """
-    A Partitioning for "/$key=$value/" nested directories as found in
-    Apache Hive.
-
-    Multi-level, directory based partitioning scheme originating from
-    Apache Hive with all data files stored in the leaf directories. Data is
-    partitioned by static values of a particular column in the schema.
-    Partition keys are represented in the form $key=$value in directory names.
-    Field order is ignored, as are missing or unrecognized field names.
-
-    For example, given schema<year:int16, month:int8, day:int8>, a possible
-    path would be "/year=2009/month=11/day=15".
-
-    Parameters
-    ----------
-    schema : Schema
-        The schema that describes the partitions present in the file path.
-    dictionaries : dict[str, Array]
-        If the type of any field of `schema` is a dictionary type, the
-        corresponding entry of `dictionaries` must be an array containing
-        every value which may be taken by the corresponding column or an
-        error will be raised in parsing.
-    null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
-        If any field is None then this fallback will be used as a label
-    segment_encoding : str, default "uri"
-        After splitting paths into segments, decode the segments. Valid
-        values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-    Returns
-    -------
-    HivePartitioning
-
-    Examples
-    --------
-    >>> from pyarrow.dataset import HivePartitioning
-    >>> partitioning = HivePartitioning(pa.schema([("year", pa.int16()), ("month", pa.int8())]))
-    >>> print(partitioning.parse("/year=2009/month=11/"))
-    ((year == 2009) and (month == 11))
-
-    """
-    def __init__(
-        self,
-        schema: lib.Schema,
-        dictionaries: dict[str, lib.Array] | None = None,
-        null_fallback: str = "__HIVE_DEFAULT_PARTITION__",
-        segment_encoding: Literal["uri", "none"] = "uri",
-    ) -> None: ...
-    @staticmethod
-    def discover(
-        infer_dictionary: bool = False,
-        max_partition_dictionary_size: int = 0,
-        null_fallback="__HIVE_DEFAULT_PARTITION__",
-        schema: lib.Schema | None = None,
-        segment_encoding: Literal["uri", "none"] = "uri",
-    ) -> PartitioningFactory:
-        """
-        Discover a HivePartitioning.
-
-        Parameters
-        ----------
-        infer_dictionary : bool, default False
-            When inferring a schema for partition fields, yield dictionary
-            encoded types instead of plain. This can be more efficient when
-            materializing virtual columns, and Expressions parsed by the
-            finished Partitioning will include dictionaries of all unique
-            inspected values for each field.
-        max_partition_dictionary_size : int, default 0
-            Synonymous with infer_dictionary for backwards compatibility with
-            1.0: setting this to -1 or None is equivalent to passing
-            infer_dictionary=True.
-        null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
-            When inferring a schema for partition fields this value will be
-            replaced by null.  The default is set to __HIVE_DEFAULT_PARTITION__
-            for compatibility with Spark
-        schema : Schema, default None
-            Use this schema instead of inferring a schema from partition
-            values. Partition values will be validated against this schema
-            before accumulation into the Partitioning's dictionary.
-        segment_encoding : str, default "uri"
-            After splitting paths into segments, decode the segments. Valid
-            values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-        Returns
-        -------
-        PartitioningFactory
-            To be used in the FileSystemFactoryOptions.
-        """
-
-class FilenamePartitioning(KeyValuePartitioning):
-    """
-    A Partitioning based on a specified Schema.
-
-    The FilenamePartitioning expects one segment in the file name for each
-    field in the schema (all fields are required to be present) separated
-    by '_'. For example given schema<year:int16, month:int8> the name
-    ``"2009_11_"`` would be parsed to ("year" == 2009 and "month" == 11).
-
-    Parameters
-    ----------
-    schema : Schema
-        The schema that describes the partitions present in the file path.
-    dictionaries : dict[str, Array]
-        If the type of any field of `schema` is a dictionary type, the
-        corresponding entry of `dictionaries` must be an array containing
-        every value which may be taken by the corresponding column or an
-        error will be raised in parsing.
-    segment_encoding : str, default "uri"
-        After splitting paths into segments, decode the segments. Valid
-        values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-    Returns
-    -------
-    FilenamePartitioning
-
-    Examples
-    --------
-    >>> from pyarrow.dataset import FilenamePartitioning
-    >>> partitioning = FilenamePartitioning(
-    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
-    ... )
-    >>> print(partitioning.parse("2009_11_data.parquet"))
-    ((year == 2009) and (month == 11))
-    """
-
-    def __init__(
-        self,
-        schema: lib.Schema,
-        dictionaries: dict[str, lib.Array] | None = None,
-        segment_encoding: Literal["uri", "none"] = "uri",
-    ) -> None: ...
-    @staticmethod
-    def discover(
-        field_names: list[str] | None = None,
-        infer_dictionary: bool = False,
-        schema: lib.Schema | None = None,
-        segment_encoding: Literal["uri", "none"] = "uri",
-    ) -> PartitioningFactory:
-        """
-        Discover a FilenamePartitioning.
-
-        Parameters
-        ----------
-        field_names : list of str
-            The names to associate with the values from the subdirectory names.
-            If schema is given, will be populated from the schema.
-        infer_dictionary : bool, default False
-            When inferring a schema for partition fields, yield dictionary
-            encoded types instead of plain types. This can be more efficient
-            when materializing virtual columns, and Expressions parsed by the
-            finished Partitioning will include dictionaries of all unique
-            inspected values for each field.
-        schema : Schema, default None
-            Use this schema instead of inferring a schema from partition
-            values. Partition values will be validated against this schema
-            before accumulation into the Partitioning's dictionary.
-        segment_encoding : str, default "uri"
-            After splitting paths into segments, decode the segments. Valid
-            values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-        Returns
-        -------
-        PartitioningFactory
-            To be used in the FileSystemFactoryOptions.
-        """
-
-class DatasetFactory(lib._Weakrefable):
-    """
-    DatasetFactory is used to create a Dataset, inspect the Schema
-    of the fragments contained in it, and declare a partitioning.
-    """
-
-    root_partition: Expression
-    def finish(self, schema: lib.Schema | None = None) -> Dataset:
-        """
-        Create a Dataset using the inspected schema or an explicit schema
-        (if given).
-
-        Parameters
-        ----------
-        schema : Schema, default None
-            The schema to conform the source to.  If None, the inspected
-            schema is used.
-
-        Returns
-        -------
-        Dataset
-        """
-    def inspect(self) -> lib.Schema:
-        """
-        Inspect all data fragments and return a common Schema.
-
-        Returns
-        -------
-        Schema
-        """
-    def inspect_schemas(self) -> list[lib.Schema]: ...
-
-class FileSystemFactoryOptions(lib._Weakrefable):
-    """
-    Influences the discovery of filesystem paths.
-
-    Parameters
-    ----------
-    partition_base_dir : str, optional
-        For the purposes of applying the partitioning, paths will be
-        stripped of the partition_base_dir. Files not matching the
-        partition_base_dir prefix will be skipped for partitioning discovery.
-        The ignored files will still be part of the Dataset, but will not
-        have partition information.
-    partitioning : Partitioning/PartitioningFactory, optional
-       Apply the Partitioning to every discovered Fragment. See Partitioning or
-       PartitioningFactory documentation.
-    exclude_invalid_files : bool, optional (default True)
-        If True, invalid files will be excluded (file format specific check).
-        This will incur IO for each files in a serial and single threaded
-        fashion. Disabling this feature will skip the IO, but unsupported
-        files may be present in the Dataset (resulting in an error at scan
-        time).
-    selector_ignore_prefixes : list, optional
-        When discovering from a Selector (and not from an explicit file list),
-        ignore files and directories matching any of these prefixes.
-        By default this is ['.', '_'].
-    """
-
-    partitioning: Partitioning
-    partitioning_factory: PartitioningFactory
-    partition_base_dir: str
-    exclude_invalid_files: bool
-    selector_ignore_prefixes: list[str]
-
-    def __init__(
-        self,
-        artition_base_dir: str | None = None,
-        partitioning: Partitioning | PartitioningFactory | None = None,
-        exclude_invalid_files: bool = True,
-        selector_ignore_prefixes: list[str] | None = None,
-    ) -> None: ...
-
-class FileSystemDatasetFactory(DatasetFactory):
-    """
-    Create a DatasetFactory from a list of paths with schema inspection.
-
-    Parameters
-    ----------
-    filesystem : pyarrow.fs.FileSystem
-        Filesystem to discover.
-    paths_or_selector : pyarrow.fs.FileSelector or list of path-likes
-        Either a Selector object or a list of path-like objects.
-    format : FileFormat
-        Currently only ParquetFileFormat and IpcFileFormat are supported.
-    options : FileSystemFactoryOptions, optional
-        Various flags influencing the discovery of filesystem paths.
-    """
-
-    def __init__(
-        self,
-        filesystem: SupportedFileSystem,
-        paths_or_selector: FileSelector,
-        format: FileFormat,
-        options: FileSystemFactoryOptions | None = None,
-    ) -> None: ...
-
-class UnionDatasetFactory(DatasetFactory):
-    """
-    Provides a way to inspect/discover a Dataset's expected schema before
-    materialization.
-
-    Parameters
-    ----------
-    factories : list of DatasetFactory
-    """
-    def __init__(self, factories: list[DatasetFactory]) -> None: ...
-
-_RecordBatchT = TypeVar("_RecordBatchT", bound=lib.RecordBatch)
-
-class RecordBatchIterator(lib._Weakrefable, Generic[_RecordBatchT]):
-    """An iterator over a sequence of record batches."""
-    def __iter__(self) -> Self: ...
-    def __next__(self) -> _RecordBatchT: ...
-
-class TaggedRecordBatch(NamedTuple):
-    """
-    A combination of a record batch and the fragment it came from.
-
-    Parameters
-    ----------
-    record_batch : RecordBatch
-        The record batch.
-    fragment : Fragment
-        Fragment of the record batch.
-    """
-
-    record_batch: lib.RecordBatch
-    fragment: Fragment
-
-class TaggedRecordBatchIterator(lib._Weakrefable):
-    """An iterator over a sequence of record batches with fragments."""
-    def __iter__(self) -> Self: ...
-    def __next__(self) -> TaggedRecordBatch: ...
-
-class Scanner(lib._Weakrefable):
-    """A materialized scan operation with context and options bound.
-
-    A scanner is the class that glues the scan tasks, data fragments and data
-    sources together.
-    """
-    @staticmethod
-    def from_dataset(
-        dataset: Dataset,
-        *,
-        columns: list[str] | dict[str, Expression] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner:
-        """
-        Create Scanner from Dataset,
-
-        Parameters
-        ----------
-        dataset : Dataset
-            Dataset to scan.
-        columns : list[str] or dict[str, Expression], default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-        """
-    @staticmethod
-    def from_fragment(
-        fragment: Fragment,
-        *,
-        schema: lib.Schema | None = None,
-        columns: list[str] | dict[str, Expression] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner:
-        """
-        Create Scanner from Fragment,
-
-        Parameters
-        ----------
-        fragment : Fragment
-            fragment to scan.
-        schema : Schema, optional
-            The schema of the fragment.
-        columns : list[str] or dict[str, Expression], default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-        """
-    @overload
-    @staticmethod
-    def from_batches(
-        source: Iterator[lib.RecordBatch],
-        *,
-        schema: lib.Schema,
-        columns: list[str] | dict[str, Expression] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner: ...
-    @overload
-    @staticmethod
-    def from_batches(
-        source: RecordBatchReader,
-        *,
-        columns: list[str] | dict[str, Expression] | None = None,
-        filter: Expression | None = None,
-        batch_size: int = ...,
-        batch_readahead: int = 16,
-        fragment_readahead: int = 4,
-        fragment_scan_options: FragmentScanOptions | None = None,
-        use_threads: bool = True,
-        cache_metadata: bool = True,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner: ...
-    @staticmethod
-    def from_batches(*args, **kwargs):
-        """
-        Create a Scanner from an iterator of batches.
-
-        This creates a scanner which can be used only once. It is
-        intended to support writing a dataset (which takes a scanner)
-        from a source which can be read only once (e.g. a
-        RecordBatchReader or generator).
-
-        Parameters
-        ----------
-        source : Iterator or Arrow-compatible stream object
-            The iterator of Batches. This can be a pyarrow RecordBatchReader,
-            any object that implements the Arrow PyCapsule Protocol for
-            streams, or an actual Python iterator of RecordBatches.
-        schema : Schema
-            The schema of the batches (required when passing a Python
-            iterator).
-        columns : list[str] or dict[str, Expression], default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-        """
-    @property
-    def dataset_schema(self) -> lib.Schema:
-        """The schema with which batches will be read from fragments."""
-    @property
-    def projected_schema(self) -> lib.Schema:
-        """
-        The materialized schema of the data, accounting for projections.
-
-        This is the schema of any data returned from the scanner.
-        """
-    def to_batches(self) -> Iterator[lib.RecordBatch]:
-        """
-        Consume a Scanner in record batches.
-
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
-    def scan_batches(self) -> TaggedRecordBatchIterator:
-        """
-        Consume a Scanner in record batches with corresponding fragments.
-
-        Returns
-        -------
-        record_batches : iterator of TaggedRecordBatch
-        """
-    def to_table(self) -> lib.Table:
-        """
-        Convert a Scanner into a Table.
-
-        Use this convenience utility with care. This will serially materialize
-        the Scan result in memory before creating the Table.
-
-        Returns
-        -------
-        Table
-        """
-    def take(self, indices: Indices) -> lib.Table:
-        """
-        Select rows of data by index.
-
-        Will only consume as many batches of the underlying dataset as
-        needed. Otherwise, this is equivalent to
-        ``to_table().take(indices)``.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            indices of rows to select in the dataset.
-
-        Returns
-        -------
-        Table
-        """
-    def head(self, num_rows: int) -> lib.Table:
-        """
-        Load the first N rows of the dataset.
-
-        Parameters
-        ----------
-        num_rows : int
-            The number of rows to load.
-
-        Returns
-        -------
-        Table
-        """
-    def count_rows(self) -> int:
-        """
-        Count rows matching the scanner filter.
-
-        Returns
-        -------
-        count : int
-        """
-    def to_reader(self) -> RecordBatchReader:
-        """Consume this scanner as a RecordBatchReader.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
-
-def get_partition_keys(partition_expression: Expression) -> dict[str, Any]:
-    """
-    Extract partition keys (equality constraints between a field and a scalar)
-    from an expression as a dict mapping the field's name to its value.
-
-    NB: All expressions yielded by a HivePartitioning or DirectoryPartitioning
-    will be conjunctions of equality conditions and are accessible through this
-    function. Other subexpressions will be ignored.
-
-    Parameters
-    ----------
-    partition_expression : pyarrow.dataset.Expression
-
-    Returns
-    -------
-    dict
-
-    Examples
-    --------
-
-    For example, an expression of
-    <pyarrow.dataset.Expression ((part == A:string) and (year == 2016:int32))>
-    is converted to {'part': 'A', 'year': 2016}
-    """
-
-class WrittenFile(lib._Weakrefable):
-    """
-    Metadata information about files written as
-    part of a dataset write operation
-
-    Parameters
-    ----------
-    path : str
-        Path to the file.
-    metadata : pyarrow.parquet.FileMetaData, optional
-        For Parquet files, the Parquet file metadata.
-    size : int
-        The size of the file in bytes.
-    """
-    def __init__(self, path: str, metadata: _parquet.FileMetaData | None, size: int) -> None: ...
-
-def _filesystemdataset_write(
-    data: Scanner,
-    base_dir: StrPath,
-    basename_template: str,
-    filesystem: SupportedFileSystem,
-    partitioning: Partitioning,
-    file_options: FileWriteOptions,
-    max_partitions: int,
-    file_visitor: Callable[[str], None],
-    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"],
-    max_open_files: int,
-    max_rows_per_file: int,
-    min_rows_per_group: int,
-    max_rows_per_group: int,
-    create_dir: bool,
-): ...
-
-class _ScanNodeOptions(ExecNodeOptions):
-    def _set_options(self, dataset: Dataset, scan_options: dict) -> None: ...
-
-class ScanNodeOptions(_ScanNodeOptions):
-    """
-    A Source node which yields batches from a Dataset scan.
-
-    This is the option class for the "scan" node factory.
-
-    This node is capable of applying pushdown projections or filters
-    to the file readers which reduce the amount of data that needs to
-    be read (if supported by the file format). But note that this does not
-    construct associated filter or project nodes to perform the final
-    filtering or projection. Rather, you may supply the same filter
-    expression or projection to the scan node that you also supply
-    to the filter or project node.
-
-    Yielded batches will be augmented with fragment/batch indices when
-    implicit_ordering=True to enable stable ordering for simple ExecPlans.
-
-    Parameters
-    ----------
-    dataset : pyarrow.dataset.Dataset
-        The table which acts as the data source.
-    **kwargs : dict, optional
-        Scan options. See `Scanner.from_dataset` for possible arguments.
-    require_sequenced_output : bool, default False
-        Batches are yielded sequentially, like single-threaded
-    implicit_ordering : bool, default False
-        Preserve implicit ordering of data.
-    """
-
-    def __init__(
-        self, dataset: Dataset, require_sequenced_output: bool = False, **kwargs
-    ) -> None: ...
diff --git a/python/pyarrow/_dataset_orc.pyi b/python/pyarrow/_dataset_orc.pyi
deleted file mode 100644
index d4e5784750f..00000000000
--- a/python/pyarrow/_dataset_orc.pyi
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from ._dataset import FileFormat
-
-class OrcFileFormat(FileFormat):
-    def equals(self, other: OrcFileFormat) -> bool: ...
-    @property
-    def default_extname(self): ...
diff --git a/python/pyarrow/_dataset_parquet.pyi b/python/pyarrow/_dataset_parquet.pyi
deleted file mode 100644
index 007d3404a18..00000000000
--- a/python/pyarrow/_dataset_parquet.pyi
+++ /dev/null
@@ -1,331 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from dataclasses import dataclass
-from typing import IO, Any, Iterable, TypedDict
-
-from _typeshed import StrPath
-
-from ._compute import Expression
-from ._dataset import (
-    DatasetFactory,
-    FileFormat,
-    FileFragment,
-    FileWriteOptions,
-    Fragment,
-    FragmentScanOptions,
-    Partitioning,
-    PartitioningFactory,
-)
-from ._dataset_parquet_encryption import ParquetDecryptionConfig
-from ._fs import SupportedFileSystem
-from ._parquet import FileDecryptionProperties, FileMetaData
-from .lib import CacheOptions, Schema, _Weakrefable
-
-parquet_encryption_enabled: bool
-
-class ParquetFileFormat(FileFormat):
-    """
-    FileFormat for Parquet
-
-    Parameters
-    ----------
-    read_options : ParquetReadOptions
-        Read options for the file.
-    default_fragment_scan_options : ParquetFragmentScanOptions
-        Scan Options for the file.
-    **kwargs : dict
-        Additional options for read option or scan option
-    """
-    def __init__(
-        self,
-        read_options: ParquetReadOptions | None = None,
-        default_fragment_scan_options: ParquetFragmentScanOptions | None = None,
-        **kwargs,
-    ) -> None: ...
-    @property
-    def read_options(self) -> ParquetReadOptions: ...
-    def make_write_options(self) -> ParquetFileWriteOptions: ...  # type: ignore[override]
-    def equals(self, other: ParquetFileFormat) -> bool: ...
-    @property
-    def default_extname(self) -> str: ...
-    def make_fragment(
-        self,
-        file: StrPath | IO,
-        filesystem: SupportedFileSystem | None = None,
-        partition_expression: Expression | None = None,
-        row_groups: Iterable[int] | None = None,
-        *,
-        file_size: int | None = None,
-    ) -> Fragment:
-        """
-        Make a FileFragment from a given file.
-
-        Parameters
-        ----------
-        file : file-like object, path-like or str
-            The file or file path to make a fragment from.
-        filesystem : Filesystem, optional
-            If `filesystem` is given, `file` must be a string and specifies
-            the path of the file to read from the filesystem.
-        partition_expression : Expression, optional
-            An expression that is guaranteed true for all rows in the fragment.  Allows
-            fragment to be potentially skipped while scanning with a filter.
-        row_groups : Iterable, optional
-            The indices of the row groups to include
-        file_size : int, optional
-            The size of the file in bytes. Can improve performance with high-latency filesystems
-            when file size needs to be known before reading.
-
-        Returns
-        -------
-        fragment : Fragment
-            The file fragment
-        """
-
-class _NameStats(TypedDict):
-    min: Any
-    max: Any
-
-class RowGroupInfo:
-    """
-    A wrapper class for RowGroup information
-
-    Parameters
-    ----------
-    id : integer
-        The group ID.
-    metadata : FileMetaData
-        The rowgroup metadata.
-    schema : Schema
-        Schema of the rows.
-    """
-
-    id: int
-    metadata: FileMetaData
-    schema: Schema
-
-    def __init__(self, id: int, metadata: FileMetaData, schema: Schema) -> None: ...
-    @property
-    def num_rows(self) -> int: ...
-    @property
-    def total_byte_size(self) -> int: ...
-    @property
-    def statistics(self) -> dict[str, _NameStats]: ...
-
-class ParquetFileFragment(FileFragment):
-    """A Fragment representing a parquet file."""
-
-    def ensure_complete_metadata(self) -> None: ...
-    @property
-    def row_groups(self) -> list[RowGroupInfo]: ...
-    @property
-    def metadata(self) -> FileMetaData: ...
-    @property
-    def num_row_groups(self) -> int:
-        """
-        Return the number of row groups viewed by this fragment (not the
-        number of row groups in the origin file).
-        """
-    def split_by_row_group(
-        self, filter: Expression | None = None, schema: Schema | None = None
-    ) -> list[Fragment]:
-        """
-        Split the fragment into multiple fragments.
-
-        Yield a Fragment wrapping each row group in this ParquetFileFragment.
-        Row groups will be excluded whose metadata contradicts the optional
-        filter.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Only include the row groups which satisfy this predicate (using
-            the Parquet RowGroup statistics).
-        schema : Schema, default None
-            Schema to use when filtering row groups. Defaults to the
-            Fragment's physical schema
-
-        Returns
-        -------
-        A list of Fragments
-        """
-    def subset(
-        self,
-        filter: Expression | None = None,
-        schema: Schema | None = None,
-        row_group_ids: list[int] | None = None,
-    ) -> ParquetFileFormat:
-        """
-        Create a subset of the fragment (viewing a subset of the row groups).
-
-        Subset can be specified by either a filter predicate (with optional
-        schema) or by a list of row group IDs. Note that when using a filter,
-        the resulting fragment can be empty (viewing no row groups).
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Only include the row groups which satisfy this predicate (using
-            the Parquet RowGroup statistics).
-        schema : Schema, default None
-            Schema to use when filtering row groups. Defaults to the
-            Fragment's physical schema
-        row_group_ids : list of ints
-            The row group IDs to include in the subset. Can only be specified
-            if `filter` is None.
-
-        Returns
-        -------
-        ParquetFileFragment
-        """
-
-class ParquetReadOptions(_Weakrefable):
-    """
-    Parquet format specific options for reading.
-
-    Parameters
-    ----------
-    dictionary_columns : list of string, default None
-        Names of columns which should be dictionary encoded as
-        they are read
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular
-        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be inferred as timestamps
-        in nanoseconds
-    """
-    def __init__(
-        self, dictionary_columns: list[str] | None, coerce_int96_timestamp_unit: str | None = None
-    ) -> None: ...
-    @property
-    def coerce_int96_timestamp_unit(self) -> str: ...
-    @coerce_int96_timestamp_unit.setter
-    def coerce_int96_timestamp_unit(self, unit: str) -> None: ...
-    def equals(self, other: ParquetReadOptions) -> bool: ...
-
-class ParquetFileWriteOptions(FileWriteOptions):
-    def update(self, **kwargs) -> None: ...
-    def _set_properties(self) -> None: ...
-    def _set_arrow_properties(self) -> None: ...
-    def _set_encryption_config(self) -> None: ...
-
-@dataclass(kw_only=True)
-class ParquetFragmentScanOptions(FragmentScanOptions):
-    """
-    Scan-specific options for Parquet fragments.
-
-    Parameters
-    ----------
-    use_buffered_stream : bool, default False
-        Read files through buffered input streams rather than loading entire
-        row groups at once. This may be enabled to reduce memory overhead.
-        Disabled by default.
-    buffer_size : int, default 8192
-        Size of buffered stream, if enabled. Default is 8KB.
-    pre_buffer : bool, default True
-        If enabled, pre-buffer the raw Parquet data instead of issuing one
-        read per column chunk. This can improve performance on high-latency
-        filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
-        parallel using a background I/O thread pool.
-        Set to False if you want to prioritize minimal memory usage
-        over maximum speed.
-    cache_options : pyarrow.CacheOptions, default None
-        Cache options used when pre_buffer is enabled. The default values should
-        be good for most use cases. You may want to adjust these for example if
-        you have exceptionally high latency to the file system.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None
-        If not None, use the provided ParquetDecryptionConfig to decrypt the
-        Parquet file.
-    decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None
-        If not None, use the provided FileDecryptionProperties to decrypt encrypted
-        Parquet file.
-    page_checksum_verification : bool, default False
-        If True, verify the page checksum for each page read from the file.
-    """
-
-    use_buffered_stream: bool = False
-    buffer_size: int = 8192
-    pre_buffer: bool = True
-    cache_options: CacheOptions | None = None
-    thrift_string_size_limit: int | None = None
-    thrift_container_size_limit: int | None = None
-    decryption_config: ParquetDecryptionConfig | None = None
-    decryption_properties: FileDecryptionProperties | None = None
-    page_checksum_verification: bool = False
-
-    def equals(self, other: ParquetFragmentScanOptions) -> bool: ...
-
-@dataclass
-class ParquetFactoryOptions(_Weakrefable):
-    """
-    Influences the discovery of parquet dataset.
-
-    Parameters
-    ----------
-    partition_base_dir : str, optional
-        For the purposes of applying the partitioning, paths will be
-        stripped of the partition_base_dir. Files not matching the
-        partition_base_dir prefix will be skipped for partitioning discovery.
-        The ignored files will still be part of the Dataset, but will not
-        have partition information.
-    partitioning : Partitioning, PartitioningFactory, optional
-        The partitioning scheme applied to fragments, see ``Partitioning``.
-    validate_column_chunk_paths : bool, default False
-        Assert that all ColumnChunk paths are consistent. The parquet spec
-        allows for ColumnChunk data to be stored in multiple files, but
-        ParquetDatasetFactory supports only a single file with all ColumnChunk
-        data. If this flag is set construction of a ParquetDatasetFactory will
-        raise an error if ColumnChunk data is not resident in a single file.
-    """
-
-    partition_base_dir: str | None = None
-    partitioning: Partitioning | PartitioningFactory | None = None
-    validate_column_chunk_paths: bool = False
-
-class ParquetDatasetFactory(DatasetFactory):
-    """
-    Create a ParquetDatasetFactory from a Parquet `_metadata` file.
-
-    Parameters
-    ----------
-    metadata_path : str
-        Path to the `_metadata` parquet metadata-only file generated with
-        `pyarrow.parquet.write_metadata`.
-    filesystem : pyarrow.fs.FileSystem
-        Filesystem to read the metadata_path from, and subsequent parquet
-        files.
-    format : ParquetFileFormat
-        Parquet format options.
-    options : ParquetFactoryOptions, optional
-        Various flags influencing the discovery of filesystem paths.
-    """
-    def __init__(
-        self,
-        metadata_path: str,
-        filesystem: SupportedFileSystem,
-        format: FileFormat,
-        options: ParquetFactoryOptions | None = None,
-    ) -> None: ...
diff --git a/python/pyarrow/_flight.pyi b/python/pyarrow/_flight.pyi
deleted file mode 100644
index a79475a8796..00000000000
--- a/python/pyarrow/_flight.pyi
+++ /dev/null
@@ -1,1397 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import asyncio
-import enum
-import sys
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-from typing import Generator, Generic, Iterable, Iterator, NamedTuple, TypeVar
-
-from typing_extensions import deprecated
-
-from .ipc import _ReadPandasMixin
-from .lib import (
-    ArrowCancelled,
-    ArrowException,
-    ArrowInvalid,
-    Buffer,
-    IpcReadOptions,
-    IpcWriteOptions,
-    RecordBatch,
-    RecordBatchReader,
-    Schema,
-    Table,
-    TimestampScalar,
-    _CRecordBatchWriter,
-    _Weakrefable,
-)
-
-_T = TypeVar("_T")
-
-class FlightCallOptions(_Weakrefable):
-    """RPC-layer options for a Flight call."""
-
-    def __init__(
-        self,
-        timeout: float | None = None,
-        write_options: IpcWriteOptions | None = None,
-        headers: list[tuple[str, str]] | None = None,
-        read_options: IpcReadOptions | None = None,
-    ) -> None:
-        """Create call options.
-
-        Parameters
-        ----------
-        timeout : float, None
-            A timeout for the call, in seconds. None means that the
-            timeout defaults to an implementation-specific value.
-        write_options : pyarrow.ipc.IpcWriteOptions, optional
-            IPC write options. The default options can be controlled
-            by environment variables (see pyarrow.ipc).
-        headers : List[Tuple[str, str]], optional
-            A list of arbitrary headers as key, value tuples
-        read_options : pyarrow.ipc.IpcReadOptions, optional
-            Serialization options for reading IPC format.
-        """
-
-class CertKeyPair(NamedTuple):
-    """A TLS certificate and key for use in Flight."""
-
-    cert: str
-    key: str
-
-class FlightError(Exception):
-    """
-    The base class for Flight-specific errors.
-
-    A server may raise this class or one of its subclasses to provide
-    a more detailed error to clients.
-
-    Parameters
-    ----------
-    message : str, optional
-        The error message.
-    extra_info : bytes, optional
-        Extra binary error details that were provided by the
-        server/will be sent to the client.
-
-    Attributes
-    ----------
-    extra_info : bytes
-        Extra binary error details that were provided by the
-        server/will be sent to the client.
-    """
-
-    extra_info: bytes
-
-class FlightInternalError(FlightError, ArrowException):
-    """An error internal to the Flight server occurred."""
-
-class FlightTimedOutError(FlightError, ArrowException):
-    """The Flight RPC call timed out."""
-
-class FlightCancelledError(FlightError, ArrowCancelled):
-    """The operation was cancelled."""
-
-class FlightServerError(FlightError, ArrowException):
-    """A server error occurred."""
-
-class FlightUnauthenticatedError(FlightError, ArrowException):
-    """The client is not authenticated."""
-
-class FlightUnauthorizedError(FlightError, ArrowException):
-    """The client is not authorized to perform the given operation."""
-
-class FlightUnavailableError(FlightError, ArrowException):
-    """The server is not reachable or available."""
-
-class FlightWriteSizeExceededError(ArrowInvalid):
-    """A write operation exceeded the client-configured limit."""
-
-    limit: int
-    actual: int
-
-class Action(_Weakrefable):
-    """An action executable on a Flight service."""
-
-    def __init__(self, action_type: bytes | str, buf: Buffer | bytes) -> None:
-        """Create an action from a type and a buffer.
-
-        Parameters
-        ----------
-        action_type : bytes or str
-        buf : Buffer or bytes-like object
-        """
-    @property
-    def type(self) -> str:
-        """The action type."""
-    @property
-    def body(self) -> Buffer:
-        """The action body (arguments for the action)."""
-    def serialize(self) -> bytes:
-        """Get the wire-format representation of this type.
-
-        Useful when interoperating with non-Flight systems (e.g. REST
-        services) that may want to return Flight types.
-
-        """
-    @classmethod
-    def deserialize(cls, serialized: bytes) -> Self:
-        """Parse the wire-format representation of this type.
-
-        Useful when interoperating with non-Flight systems (e.g. REST
-        services) that may want to return Flight types.
-
-        """
-
-class ActionType(NamedTuple):
-    """A type of action that is executable on a Flight service."""
-
-    type: str
-    description: str
-
-    def make_action(self, buf: Buffer | bytes) -> Action:
-        """Create an Action with this type.
-
-        Parameters
-        ----------
-        buf : obj
-            An Arrow buffer or Python bytes or bytes-like object.
-        """
-
-class Result(_Weakrefable):
-    """A result from executing an Action."""
-    def __init__(self, buf: Buffer | bytes) -> None:
-        """Create a new result.
-
-        Parameters
-        ----------
-        buf : Buffer or bytes-like object
-        """
-    @property
-    def body(self) -> Buffer:
-        """Get the Buffer containing the result."""
-    def serialize(self) -> bytes:
-        """Get the wire-format representation of this type.
-
-        Useful when interoperating with non-Flight systems (e.g. REST
-        services) that may want to return Flight types.
-
-        """
-    @classmethod
-    def deserialize(cls, serialized: bytes) -> Self:
-        """Parse the wire-format representation of this type.
-
-        Useful when interoperating with non-Flight systems (e.g. REST
-        services) that may want to return Flight types.
-
-        """
-
-class BasicAuth(_Weakrefable):
-    """A container for basic auth."""
-    def __init__(
-        self, username: str | bytes | None = None, password: str | bytes | None = None
-    ) -> None:
-        """Create a new basic auth object.
-
-        Parameters
-        ----------
-        username : string
-        password : string
-        """
-    @property
-    def username(self) -> bytes: ...
-    @property
-    def password(self) -> bytes: ...
-    def serialize(self) -> str: ...
-    @staticmethod
-    def deserialize(serialized: str | bytes) -> BasicAuth: ...
-
-class DescriptorType(enum.Enum):
-    """
-    The type of a FlightDescriptor.
-
-    Attributes
-    ----------
-
-    UNKNOWN
-        An unknown descriptor type.
-
-    PATH
-        A Flight stream represented by a path.
-
-    CMD
-        A Flight stream represented by an application-defined command.
-
-    """
-
-    UNKNOWN = 0
-    PATH = 1
-    CMD = 2
-
-class FlightMethod(enum.Enum):
-    """The implemented methods in Flight."""
-
-    INVALID = 0
-    HANDSHAKE = 1
-    LIST_FLIGHTS = 2
-    GET_FLIGHT_INFO = 3
-    GET_SCHEMA = 4
-    DO_GET = 5
-    DO_PUT = 6
-    DO_ACTION = 7
-    LIST_ACTIONS = 8
-    DO_EXCHANGE = 9
-
-class FlightDescriptor(_Weakrefable):
-    """A description of a data stream available from a Flight service."""
-    @staticmethod
-    def for_path(*path: str | bytes) -> FlightDescriptor:
-        """Create a FlightDescriptor for a resource path."""
-
-    @staticmethod
-    def for_command(command: str | bytes) -> FlightDescriptor:
-        """Create a FlightDescriptor for an opaque command."""
-    @property
-    def descriptor_type(self) -> DescriptorType:
-        """Get the type of this descriptor."""
-    @property
-    def path(self) -> list[bytes] | None:
-        """Get the path for this descriptor."""
-    @property
-    def command(self) -> bytes | None:
-        """Get the command for this descriptor."""
-    def serialize(self) -> bytes: ...
-    @classmethod
-    def deserialize(cls, serialized: bytes) -> Self: ...
-
-class Ticket(_Weakrefable):
-    """A ticket for requesting a Flight stream."""
-    def __init__(self, ticket: str | bytes) -> None: ...
-    @property
-    def ticket(self) -> bytes: ...
-    def serialize(self) -> bytes: ...
-    @classmethod
-    def deserialize(cls, serialized: bytes) -> Self: ...
-
-class Location(_Weakrefable):
-    """The location of a Flight service."""
-    def __init__(self, uri: str | bytes) -> None: ...
-    @property
-    def uri(self) -> bytes: ...
-    def equals(self, other: Location) -> bool: ...
-    @staticmethod
-    def for_grpc_tcp(host: str | bytes, port: int) -> Location:
-        """Create a Location for a TCP-based gRPC service."""
-    @staticmethod
-    def for_grpc_tls(host: str | bytes, port: int) -> Location:
-        """Create a Location for a TLS-based gRPC service."""
-    @staticmethod
-    def for_grpc_unix(path: str | bytes) -> Location:
-        """Create a Location for a domain socket-based gRPC service."""
-
-class FlightEndpoint(_Weakrefable):
-    """A Flight stream, along with the ticket and locations to access it."""
-    def __init__(
-        self,
-        ticket: Ticket | str | bytes,
-        locations: list[str | Location],
-        expiration_time: TimestampScalar | None = ...,
-        app_metadata: bytes | str = ...,
-    ):
-        """Create a FlightEndpoint from a ticket and list of locations.
-
-        Parameters
-        ----------
-        ticket : Ticket or bytes
-            the ticket needed to access this flight
-        locations : list of string URIs
-            locations where this flight is available
-        expiration_time : TimestampScalar, default None
-            Expiration time of this stream. If present, clients may assume
-            they can retry DoGet requests. Otherwise, clients should avoid
-            retrying DoGet requests.
-        app_metadata : bytes or str, default ""
-            Application-defined opaque metadata.
-
-        Raises
-        ------
-        ArrowException
-            If one of the location URIs is not a valid URI.
-        """
-    @property
-    def ticket(self) -> Ticket:
-        """Get the ticket in this endpoint."""
-    @property
-    def locations(self) -> list[Location]:
-        """Get locations where this flight is available."""
-    def serialize(self) -> bytes: ...
-    @property
-    def expiration_time(self) -> TimestampScalar | None:
-        """Get the expiration time of this stream.
-
-        If present, clients may assume they can retry DoGet requests.
-        Otherwise, clients should avoid retrying DoGet requests.
-
-        """
-    @property
-    def app_metadata(self) -> bytes | str:
-        """Get application-defined opaque metadata."""
-    @classmethod
-    def deserialize(cls, serialized: bytes) -> Self: ...
-
-class SchemaResult(_Weakrefable):
-    """The serialized schema returned from a GetSchema request."""
-    def __init__(self, schema: Schema) -> None:
-        """Create a SchemaResult from a schema.
-
-        Parameters
-        ----------
-        schema: Schema
-            the schema of the data in this flight.
-        """
-    @property
-    def schema(self) -> Schema:
-        """The schema of the data in this flight."""
-    def serialize(self) -> bytes: ...
-    @classmethod
-    def deserialize(cls, serialized: bytes) -> Self: ...
-
-class FlightInfo(_Weakrefable):
-    """A description of a Flight stream."""
-    def __init__(
-        self,
-        schema: Schema,
-        descriptor: FlightDescriptor,
-        endpoints: list[FlightEndpoint],
-        total_records: int = ...,
-        total_bytes: int = ...,
-        ordered: bool = ...,
-        app_metadata: bytes | str = ...,
-    ) -> None:
-        """Create a FlightInfo object from a schema, descriptor, and endpoints.
-
-        Parameters
-        ----------
-        schema : Schema
-            the schema of the data in this flight.
-        descriptor : FlightDescriptor
-            the descriptor for this flight.
-        endpoints : list of FlightEndpoint
-            a list of endpoints where this flight is available.
-        total_records : int, default None
-            the total records in this flight, -1 or None if unknown.
-        total_bytes : int, default None
-            the total bytes in this flight, -1 or None if unknown.
-        ordered : boolean, default False
-            Whether endpoints are in the same order as the data.
-        app_metadata : bytes or str, default ""
-            Application-defined opaque metadata.
-        """
-    @property
-    def schema(self) -> Schema:
-        """The schema of the data in this flight."""
-    @property
-    def descriptor(self) -> FlightDescriptor:
-        """The descriptor of the data in this flight."""
-    @property
-    def endpoints(self) -> list[FlightEndpoint]:
-        """The endpoints where this flight is available."""
-    @property
-    def total_records(self) -> int:
-        """The total record count of this flight, or -1 if unknown."""
-    @property
-    def total_bytes(self) -> int:
-        """The size in bytes of the data in this flight, or -1 if unknown."""
-    @property
-    def ordered(self) -> bool:
-        """Whether endpoints are in the same order as the data."""
-    @property
-    def app_metadata(self) -> bytes | str:
-        """
-        Application-defined opaque metadata.
-
-        There is no inherent or required relationship between this and the
-        app_metadata fields in the FlightEndpoints or resulting FlightData
-        messages. Since this metadata is application-defined, a given
-        application could define there to be a relationship, but there is
-        none required by the spec.
-
-        """
-    def serialize(self) -> bytes: ...
-    @classmethod
-    def deserialize(cls, serialized: bytes) -> Self: ...
-
-class FlightStreamChunk(_Weakrefable):
-    """A RecordBatch with application metadata on the side."""
-    @property
-    def data(self) -> RecordBatch | None: ...
-    @property
-    def app_metadata(self) -> Buffer | None: ...
-    def __iter__(self): ...
-
-class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
-    """A reader for Flight streams."""
-
-    # Needs to be separate class so the "real" class can subclass the
-    # pure-Python mixin class
-
-    def __iter__(self) -> Self: ...
-    def __next__(self) -> FlightStreamChunk: ...
-    @property
-    def schema(self) -> Schema:
-        """Get the schema for this reader."""
-    def read_all(self) -> Table:
-        """Read the entire contents of the stream as a Table."""
-    def read_chunk(self) -> FlightStreamChunk:
-        """Read the next FlightStreamChunk along with any metadata.
-
-        Returns
-        -------
-        chunk : FlightStreamChunk
-            The next FlightStreamChunk in the stream.
-
-        Raises
-        ------
-        StopIteration
-            when the stream is finished
-        """
-    def to_reader(self) -> RecordBatchReader:
-        """Convert this reader into a regular RecordBatchReader.
-
-        This may fail if the schema cannot be read from the remote end.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
-
-class MetadataRecordBatchReader(_MetadataRecordBatchReader):
-    """The base class for readers for Flight streams.
-
-    See Also
-    --------
-    FlightStreamReader
-    """
-
-class FlightStreamReader(MetadataRecordBatchReader):
-    """A reader that can also be canceled."""
-    def cancel(self) -> None:
-        """Cancel the read operation."""
-    def read_all(self) -> Table:
-        """Read the entire contents of the stream as a Table."""
-
-class MetadataRecordBatchWriter(_CRecordBatchWriter):
-    """A RecordBatchWriter that also allows writing application metadata.
-
-    This class is a context manager; on exit, close() will be called.
-    """
-
-    def begin(self, schema: Schema, options: IpcWriteOptions | None = None) -> None:
-        """Prepare to write data to this stream with the given schema."""
-    def write_metadata(self, buf: Buffer) -> None:
-        """Write Flight metadata by itself."""
-    def write_batch(self, batch: RecordBatch) -> None:  # type: ignore[override]
-        """
-        Write RecordBatch to stream.
-
-        Parameters
-        ----------
-        batch : RecordBatch
-        """
-    def write_table(self, table: Table, max_chunksize: int | None = None, **kwargs) -> None:
-        """
-        Write Table to stream in (contiguous) RecordBatch objects.
-
-        Parameters
-        ----------
-        table : Table
-        max_chunksize : int, default None
-            Maximum number of rows for RecordBatch chunks. Individual chunks may
-            be smaller depending on the chunk layout of individual columns.
-        """
-    def close(self) -> None:
-        """
-        Close stream and write end-of-stream 0 marker.
-        """
-    def write_with_metadata(self, batch: RecordBatch, buf: Buffer) -> None:
-        """Write a RecordBatch along with Flight metadata.
-
-        Parameters
-        ----------
-        batch : RecordBatch
-            The next RecordBatch in the stream.
-        buf : Buffer
-            Application-specific metadata for the batch as defined by
-            Flight.
-        """
-
-class FlightStreamWriter(MetadataRecordBatchWriter):
-    """A writer that also allows closing the write side of a stream."""
-    def done_writing(self) -> None:
-        """Indicate that the client is done writing, but not done reading."""
-
-class FlightMetadataReader(_Weakrefable):
-    """A reader for Flight metadata messages sent during a DoPut."""
-    def read(self) -> Buffer | None:
-        """Read the next metadata message."""
-
-class FlightMetadataWriter(_Weakrefable):
-    """A sender for Flight metadata messages during a DoPut."""
-    def write(self, message: Buffer) -> None:
-        """Write the next metadata message.
-
-        Parameters
-        ----------
-        message : Buffer
-        """
-
-class AsyncioCall(Generic[_T]):
-    """State for an async RPC using asyncio."""
-
-    _future: asyncio.Future[_T]
-
-    def as_awaitable(self) -> asyncio.Future[_T]: ...
-    def wakeup(self, result_or_exception: BaseException | _T) -> None: ...
-
-class AsyncioFlightClient:
-    """
-    A FlightClient with an asyncio-based async interface.
-
-    This interface is EXPERIMENTAL.
-    """
-
-    def __init__(self, client: FlightClient) -> None: ...
-    async def get_flight_info(
-        self,
-        descriptor: FlightDescriptor,
-        *,
-        options: FlightCallOptions | None = None,
-    ): ...
-
-class FlightClient(_Weakrefable):
-    """A client to a Flight service.
-
-    Connect to a Flight service on the given host and port.
-
-    Parameters
-    ----------
-    location : str, tuple or Location
-        Location to connect to. Either a gRPC URI like `grpc://localhost:port`,
-        a tuple of (host, port) pair, or a Location instance.
-    tls_root_certs : bytes or None
-        PEM-encoded
-    cert_chain: bytes or None
-        Client certificate if using mutual TLS
-    private_key: bytes or None
-        Client private key for cert_chain is using mutual TLS
-    override_hostname : str or None
-        Override the hostname checked by TLS. Insecure, use with caution.
-    middleware : list optional, default None
-        A list of ClientMiddlewareFactory instances.
-    write_size_limit_bytes : int optional, default None
-        A soft limit on the size of a data payload sent to the
-        server. Enabled if positive. If enabled, writing a record
-        batch that (when serialized) exceeds this limit will raise an
-        exception; the client can retry the write with a smaller
-        batch.
-    disable_server_verification : boolean optional, default False
-        A flag that indicates that, if the client is connecting
-        with TLS, that it skips server verification. If this is
-        enabled, all other TLS settings are overridden.
-    generic_options : list optional, default None
-        A list of generic (string, int or string) option tuples passed
-        to the underlying transport. Effect is implementation
-        dependent.
-    """
-    def __init__(
-        self,
-        location: str | tuple[str, int] | Location,
-        *,
-        tls_root_certs: str | None = None,
-        cert_chain: str | None = None,
-        private_key: str | None = None,
-        override_hostname: str | None = None,
-        middleware: list[ClientMiddlewareFactory] | None = None,
-        write_size_limit_bytes: int | None = None,
-        disable_server_verification: bool = False,
-        generic_options: list[tuple[str, int | str]] | None = None,
-    ): ...
-    @property
-    def supports_async(self) -> bool: ...
-    def as_async(self) -> AsyncioFlightClient: ...
-    def wait_for_available(self, timeout: int = 5) -> None:
-        """Block until the server can be contacted.
-
-        Parameters
-        ----------
-        timeout : int, default 5
-            The maximum seconds to wait.
-        """
-    @deprecated(
-        "Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead."
-    )
-    @classmethod
-    def connect(
-        cls,
-        location: str | tuple[str, int] | Location,
-        tls_root_certs: str | None = None,
-        cert_chain: str | None = None,
-        private_key: str | None = None,
-        override_hostname: str | None = None,
-        disable_server_verification: bool = False,
-    ) -> FlightClient:
-        """Connect to a Flight server.
-
-        .. deprecated:: 0.15.0
-            Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead.
-        """
-    def authenticate(
-        self, auth_handler: ClientAuthHandler, options: FlightCallOptions | None = None
-    ) -> None:
-        """Authenticate to the server.
-
-        Parameters
-        ----------
-        auth_handler : ClientAuthHandler
-            The authentication mechanism to use.
-        options : FlightCallOptions
-            Options for this call.
-        """
-    def authenticate_basic_token(
-        self, username: str, password: str, options: FlightCallOptions | None = None
-    ) -> tuple[str, str]:
-        """Authenticate to the server with HTTP basic authentication.
-
-        Parameters
-        ----------
-        username : string
-            Username to authenticate with
-        password : string
-            Password to authenticate with
-        options  : FlightCallOptions
-            Options for this call
-
-        Returns
-        -------
-        tuple : Tuple[str, str]
-            A tuple representing the FlightCallOptions authorization
-            header entry of a bearer token.
-        """
-    def list_actions(self, options: FlightCallOptions | None = None) -> list[Action]:
-        """List the actions available on a service."""
-    def do_action(
-        self, action: Action, options: FlightCallOptions | None = None
-    ) -> Iterator[Result]:
-        """
-        Execute an action on a service.
-
-        Parameters
-        ----------
-        action : str, tuple, or Action
-            Can be action type name (no body), type and body, or any Action
-            object
-        options : FlightCallOptions
-            RPC options
-
-        Returns
-        -------
-        results : iterator of Result values
-        """
-    def list_flights(
-        self, criteria: str | None = None, options: FlightCallOptions | None = None
-    ) -> Generator[FlightInfo, None, None]:
-        """List the flights available on a service."""
-    def get_flight_info(
-        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
-    ) -> FlightInfo:
-        """Request information about an available flight."""
-    def get_schema(
-        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
-    ) -> Schema:
-        """Request schema for an available flight."""
-    def do_get(
-        self, ticket: Ticket, options: FlightCallOptions | None = None
-    ) -> FlightStreamReader:
-        """Request the data for a flight.
-
-        Returns
-        -------
-        reader : FlightStreamReader
-        """
-    def do_put(
-        self,
-        descriptor: FlightDescriptor,
-        schema: Schema,
-        options: FlightCallOptions | None = None,
-    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
-        """Upload data to a flight.
-
-        Returns
-        -------
-        writer : FlightStreamWriter
-        reader : FlightMetadataReader
-        """
-    def do_exchange(
-        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
-    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
-        """Start a bidirectional data exchange with a server.
-
-        Parameters
-        ----------
-        descriptor : FlightDescriptor
-            A descriptor for the flight.
-        options : FlightCallOptions
-            RPC options.
-
-        Returns
-        -------
-        writer : FlightStreamWriter
-        reader : FlightStreamReader
-        """
-    def close(self) -> None:
-        """Close the client and disconnect."""
-    def __enter__(self) -> Self: ...
-    def __exit__(self, exc_type, exc_value, traceback) -> None: ...
-
-class FlightDataStream(_Weakrefable):
-    """
-    Abstract base class for Flight data streams.
-
-    See Also
-    --------
-    RecordBatchStream
-    GeneratorStream
-    """
-
-class RecordBatchStream(FlightDataStream):
-    """A Flight data stream backed by RecordBatches.
-
-    The remainder of this DoGet request will be handled in C++,
-    without having to acquire the GIL.
-
-    """
-    def __init__(
-        self, data_source: RecordBatchReader | Table, options: IpcWriteOptions | None = None
-    ) -> None:
-        """Create a RecordBatchStream from a data source.
-
-        Parameters
-        ----------
-        data_source : RecordBatchReader or Table
-            The data to stream to the client.
-        options : pyarrow.ipc.IpcWriteOptions, optional
-            Optional IPC options to control how to write the data.
-        """
-
-class GeneratorStream(FlightDataStream):
-    """A Flight data stream backed by a Python generator."""
-    def __init__(
-        self,
-        schema: Schema,
-        generator: Iterable[FlightDataStream | Table | RecordBatch | RecordBatchReader],
-        options: IpcWriteOptions | None = None,
-    ) -> None:
-        """Create a GeneratorStream from a Python generator.
-
-        Parameters
-        ----------
-        schema : Schema
-            The schema for the data to be returned.
-
-        generator : iterator or iterable
-            The generator should yield other FlightDataStream objects,
-            Tables, RecordBatches, or RecordBatchReaders.
-
-        options : pyarrow.ipc.IpcWriteOptions, optional
-        """
-
-class ServerCallContext(_Weakrefable):
-    """Per-call state/context."""
-    def peer_identity(self) -> bytes:
-        """Get the identity of the authenticated peer.
-
-        May be the empty string.
-        """
-    def peer(self) -> str:
-        """Get the address of the peer."""
-        # Set safe=True as gRPC on Windows sometimes gives garbage bytes
-    def is_cancelled(self) -> bool:
-        """Check if the current RPC call has been canceled by the client."""
-    def add_header(self, key: str, value: str) -> None:
-        """Add a response header."""
-    def add_trailer(self, key: str, value: str) -> None:
-        """Add a response trailer."""
-    def get_middleware(self, key: str) -> ServerMiddleware | None:
-        """
-        Get a middleware instance by key.
-
-        Returns None if the middleware was not found.
-        """
-
-class ServerAuthReader(_Weakrefable):
-    """A reader for messages from the client during an auth handshake."""
-    def read(self) -> str: ...
-
-class ServerAuthSender(_Weakrefable):
-    """A writer for messages to the client during an auth handshake."""
-    def write(self, message: str) -> None: ...
-
-class ClientAuthReader(_Weakrefable):
-    """A reader for messages from the server during an auth handshake."""
-    def read(self) -> str: ...
-
-class ClientAuthSender(_Weakrefable):
-    """A writer for messages to the server during an auth handshake."""
-    def write(self, message: str) -> None: ...
-
-class ServerAuthHandler(_Weakrefable):
-    """Authentication middleware for a server.
-
-    To implement an authentication mechanism, subclass this class and
-    override its methods.
-
-    """
-    def authenticate(self, outgoing: ServerAuthSender, incoming: ServerAuthReader):
-        """Conduct the handshake with the client.
-
-        May raise an error if the client cannot authenticate.
-
-        Parameters
-        ----------
-        outgoing : ServerAuthSender
-            A channel to send messages to the client.
-        incoming : ServerAuthReader
-            A channel to read messages from the client.
-        """
-    def is_valid(self, token: str) -> bool:
-        """Validate a client token, returning their identity.
-
-        May return an empty string (if the auth mechanism does not
-        name the peer) or raise an exception (if the token is
-        invalid).
-
-        Parameters
-        ----------
-        token : bytes
-            The authentication token from the client.
-
-        """
-
-class ClientAuthHandler(_Weakrefable):
-    """Authentication plugin for a client."""
-    def authenticate(self, outgoing: ClientAuthSender, incoming: ClientAuthReader):
-        """Conduct the handshake with the server.
-
-        Parameters
-        ----------
-        outgoing : ClientAuthSender
-            A channel to send messages to the server.
-        incoming : ClientAuthReader
-            A channel to read messages from the server.
-        """
-    def get_token(self) -> str:
-        """Get the auth token for a call."""
-
-class CallInfo(NamedTuple):
-    """Information about a particular RPC for Flight middleware."""
-
-    method: FlightMethod
-
-class ClientMiddlewareFactory(_Weakrefable):
-    """A factory for new middleware instances.
-
-    All middleware methods will be called from the same thread as the
-    RPC method implementation. That is, thread-locals set in the
-    client are accessible from the middleware itself.
-
-    """
-    def start_call(self, info: CallInfo) -> ClientMiddleware | None:
-        """Called at the start of an RPC.
-
-        This must be thread-safe and must not raise exceptions.
-
-        Parameters
-        ----------
-        info : CallInfo
-            Information about the call.
-
-        Returns
-        -------
-        instance : ClientMiddleware
-            An instance of ClientMiddleware (the instance to use for
-            the call), or None if this call is not intercepted.
-
-        """
-
-class ClientMiddleware(_Weakrefable):
-    """Client-side middleware for a call, instantiated per RPC.
-
-    Methods here should be fast and must be infallible: they should
-    not raise exceptions or stall indefinitely.
-
-    """
-
-    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
-        """A callback before headers are sent.
-
-        Returns
-        -------
-        headers : dict
-            A dictionary of header values to add to the request, or
-            None if no headers are to be added. The dictionary should
-            have string keys and string or list-of-string values.
-
-            Bytes values are allowed, but the underlying transport may
-            not support them or may restrict them. For gRPC, binary
-            values are only allowed on headers ending in "-bin".
-
-            Header names must be lowercase ASCII.
-
-        """
-
-    def received_headers(self, headers: dict[str, list[str] | list[bytes]]):
-        """A callback when headers are received.
-
-        The default implementation does nothing.
-
-        Parameters
-        ----------
-        headers : dict
-            A dictionary of headers from the server. Keys are strings
-            and values are lists of strings (for text headers) or
-            bytes (for binary headers).
-
-        """
-
-    def call_completed(self, exception: ArrowException):
-        """A callback when the call finishes.
-
-        The default implementation does nothing.
-
-        Parameters
-        ----------
-        exception : ArrowException
-            If the call errored, this is the equivalent
-            exception. Will be None if the call succeeded.
-
-        """
-
-class ServerMiddlewareFactory(_Weakrefable):
-    """A factory for new middleware instances.
-
-    All middleware methods will be called from the same thread as the
-    RPC method implementation. That is, thread-locals set in the
-    middleware are accessible from the method itself.
-
-    """
-
-    def start_call(
-        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
-    ) -> ServerMiddleware | None:
-        """Called at the start of an RPC.
-
-        This must be thread-safe.
-
-        Parameters
-        ----------
-        info : CallInfo
-            Information about the call.
-        headers : dict
-            A dictionary of headers from the client. Keys are strings
-            and values are lists of strings (for text headers) or
-            bytes (for binary headers).
-
-        Returns
-        -------
-        instance : ServerMiddleware
-            An instance of ServerMiddleware (the instance to use for
-            the call), or None if this call is not intercepted.
-
-        Raises
-        ------
-        exception : pyarrow.ArrowException
-            If an exception is raised, the call will be rejected with
-            the given error.
-
-        """
-
-class TracingServerMiddlewareFactory(ServerMiddlewareFactory):
-    """A factory for tracing middleware instances.
-
-    This enables OpenTelemetry support in Arrow (if Arrow was compiled
-    with OpenTelemetry support enabled). A new span will be started on
-    each RPC call. The TracingServerMiddleware instance can then be
-    retrieved within an RPC handler to get the propagated context,
-    which can be used to start a new span on the Python side.
-
-    Because the Python/C++ OpenTelemetry libraries do not
-    interoperate, spans on the C++ side are not directly visible to
-    the Python side and vice versa.
-
-    """
-
-class ServerMiddleware(_Weakrefable):
-    """Server-side middleware for a call, instantiated per RPC.
-
-    Methods here should be fast and must be infallible: they should
-    not raise exceptions or stall indefinitely.
-
-    """
-
-    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
-        """A callback before headers are sent.
-
-        Returns
-        -------
-        headers : dict
-            A dictionary of header values to add to the response, or
-            None if no headers are to be added. The dictionary should
-            have string keys and string or list-of-string values.
-
-            Bytes values are allowed, but the underlying transport may
-            not support them or may restrict them. For gRPC, binary
-            values are only allowed on headers ending in "-bin".
-
-            Header names must be lowercase ASCII.
-
-        """
-    def call_completed(self, exception: ArrowException):
-        """A callback when the call finishes.
-
-        Parameters
-        ----------
-        exception : pyarrow.ArrowException
-            If the call errored, this is the equivalent
-            exception. Will be None if the call succeeded.
-
-        """
-
-class TracingServerMiddleware(ServerMiddleware):
-    trace_context: dict
-    def __init__(self, trace_context: dict) -> None: ...
-
-class _ServerMiddlewareFactoryWrapper(ServerMiddlewareFactory):
-    """Wrapper to bundle server middleware into a single C++ one."""
-
-    def __init__(self, factories: dict[str, ServerMiddlewareFactory]) -> None: ...
-    def start_call(  # type: ignore[override]
-        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
-    ) -> _ServerMiddlewareFactoryWrapper | None: ...
-
-class _ServerMiddlewareWrapper(ServerMiddleware):
-    def __init__(self, middleware: dict[str, ServerMiddleware]) -> None: ...
-    def send_headers(self) -> dict[str, dict[str, list[str] | list[bytes]]]: ...
-    def call_completed(self, exception: ArrowException) -> None: ...
-
-class _FlightServerFinalizer(_Weakrefable):
-    """
-    A finalizer that shuts down the server on destruction.
-
-    See ARROW-16597. If the server is still active at interpreter
-    exit, the process may segfault.
-    """
-
-    def finalize(self) -> None: ...
-
-class FlightServerBase(_Weakrefable):
-    """A Flight service definition.
-
-    To start the server, create an instance of this class with an
-    appropriate location. The server will be running as soon as the
-    instance is created; it is not required to call :meth:`serve`.
-
-    Override methods to define your Flight service.
-
-    Parameters
-    ----------
-    location : str, tuple or Location optional, default None
-        Location to serve on. Either a gRPC URI like `grpc://localhost:port`,
-        a tuple of (host, port) pair, or a Location instance.
-        If None is passed then the server will be started on localhost with a
-        system provided random port.
-    auth_handler : ServerAuthHandler optional, default None
-        An authentication mechanism to use. May be None.
-    tls_certificates : list optional, default None
-        A list of (certificate, key) pairs.
-    verify_client : boolean optional, default False
-        If True, then enable mutual TLS: require the client to present
-        a client certificate, and validate the certificate.
-    root_certificates : bytes optional, default None
-        If enabling mutual TLS, this specifies the PEM-encoded root
-        certificate used to validate client certificates.
-    middleware : dict optional, default None
-        A dictionary of :class:`ServerMiddlewareFactory` instances. The
-        string keys can be used to retrieve the middleware instance within
-        RPC handlers (see :meth:`ServerCallContext.get_middleware`).
-
-    """
-    def __init__(
-        self,
-        location: str | tuple[str, int] | Location | None = None,
-        auth_handler: ServerAuthHandler | None = None,
-        tls_certificates: list[tuple[str, str]] | None = None,
-        verify_client: bool = False,
-        root_certificates: str | None = None,
-        middleware: dict[str, ServerMiddlewareFactory] | None = None,
-    ): ...
-    @property
-    def port(self) -> int:
-        """
-        Get the port that this server is listening on.
-
-        Returns a non-positive value if the operation is invalid
-        (e.g. init() was not called or server is listening on a domain
-        socket).
-        """
-    def list_flights(self, context: ServerCallContext, criteria: str) -> Iterator[FlightInfo]:
-        """List flights available on this service.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        criteria : bytes
-            Filter criteria provided by the client.
-
-        Returns
-        -------
-        iterator of FlightInfo
-
-        """
-    def get_flight_info(
-        self, context: ServerCallContext, descriptor: FlightDescriptor
-    ) -> FlightInfo:
-        """Get information about a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        descriptor : FlightDescriptor
-            The descriptor for the flight provided by the client.
-
-        Returns
-        -------
-        FlightInfo
-
-        """
-    def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor) -> Schema:
-        """Get the schema of a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        descriptor : FlightDescriptor
-            The descriptor for the flight provided by the client.
-
-        Returns
-        -------
-        Schema
-
-        """
-    def do_put(
-        self,
-        context: ServerCallContext,
-        descriptor: FlightDescriptor,
-        reader: MetadataRecordBatchReader,
-        writer: FlightMetadataWriter,
-    ) -> None:
-        """Write data to a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        descriptor : FlightDescriptor
-            The descriptor for the flight provided by the client.
-        reader : MetadataRecordBatchReader
-            A reader for data uploaded by the client.
-        writer : FlightMetadataWriter
-            A writer to send responses to the client.
-
-        """
-    def do_get(self, context: ServerCallContext, ticket: Ticket) -> FlightDataStream:
-        """Write data to a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        ticket : Ticket
-            The ticket for the flight.
-
-        Returns
-        -------
-        FlightDataStream
-            A stream of data to send back to the client.
-
-        """
-    def do_exchange(
-        self,
-        context: ServerCallContext,
-        descriptor: FlightDescriptor,
-        reader: MetadataRecordBatchReader,
-        writer: MetadataRecordBatchWriter,
-    ) -> None:
-        """Write data to a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        descriptor : FlightDescriptor
-            The descriptor for the flight provided by the client.
-        reader : MetadataRecordBatchReader
-            A reader for data uploaded by the client.
-        writer : MetadataRecordBatchWriter
-            A writer to send responses to the client.
-
-        """
-    def list_actions(self, context: ServerCallContext) -> Iterable[Action]:
-        """List custom actions available on this server.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-
-        Returns
-        -------
-        iterator of ActionType or tuple
-
-        """
-    def do_action(self, context: ServerCallContext, action: Action) -> Iterable[bytes]:
-        """Execute a custom action.
-
-        This method should return an iterator, or it should be a
-        generator. Applications should override this method to
-        implement their own behavior. The default method raises a
-        NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        action : Action
-            The action to execute.
-
-        Returns
-        -------
-        iterator of bytes
-
-        """
-    def serve(self) -> None:
-        """Block until the server shuts down.
-
-        This method only returns if shutdown() is called or a signal is
-        received.
-        """
-    def run(self) -> None:
-        """Block until the server shuts down.
-
-        .. deprecated:: 0.15.0
-            Use the ``FlightServer.serve`` method instead
-        """
-    def shutdown(self) -> None:
-        """Shut down the server, blocking until current requests finish.
-
-        Do not call this directly from the implementation of a Flight
-        method, as then the server will block forever waiting for that
-        request to finish. Instead, call this method from a background
-        thread.
-
-        This method should only be called once.
-        """
-    def wait(self) -> None:
-        """Block until server is terminated with shutdown."""
-    def __enter__(self) -> Self: ...
-    def __exit__(self, exc_type, exc_value, traceback): ...
-
-def connect(
-    location: str | tuple[str, int] | Location,
-    *,
-    tls_root_certs: str | None = None,
-    cert_chain: str | None = None,
-    private_key: str | None = None,
-    override_hostname: str | None = None,
-    middleware: list[ClientMiddlewareFactory] | None = None,
-    write_size_limit_bytes: int | None = None,
-    disable_server_verification: bool = False,
-    generic_options: list[tuple[str, int | str]] | None = None,
-) -> FlightClient:
-    """
-    Connect to a Flight server.
-
-    Parameters
-    ----------
-    location : str, tuple, or Location
-        Location to connect to. Either a URI like "grpc://localhost:port",
-        a tuple of (host, port), or a Location instance.
-    tls_root_certs : bytes or None
-        PEM-encoded.
-    cert_chain: str or None
-        If provided, enables TLS mutual authentication.
-    private_key: str or None
-        If provided, enables TLS mutual authentication.
-    override_hostname : str or None
-        Override the hostname checked by TLS. Insecure, use with caution.
-    middleware : list or None
-        A list of ClientMiddlewareFactory instances to apply.
-    write_size_limit_bytes : int or None
-        A soft limit on the size of a data payload sent to the
-        server. Enabled if positive. If enabled, writing a record
-        batch that (when serialized) exceeds this limit will raise an
-        exception; the client can retry the write with a smaller
-        batch.
-    disable_server_verification : boolean or None
-        Disable verifying the server when using TLS.
-        Insecure, use with caution.
-    generic_options : list or None
-        A list of generic (string, int or string) options to pass to
-        the underlying transport.
-
-    Returns
-    -------
-    client : FlightClient
-    """
diff --git a/python/pyarrow/_fs.pyi b/python/pyarrow/_fs.pyi
deleted file mode 100644
index 45d4d922ac2..00000000000
--- a/python/pyarrow/_fs.pyi
+++ /dev/null
@@ -1,1022 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import datetime as dt
-import enum
-import sys
-
-from abc import ABC, abstractmethod
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-if sys.version_info >= (3, 10):
-    from typing import TypeAlias
-else:
-    from typing_extensions import TypeAlias
-
-from typing import Union, overload
-
-from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
-
-from .lib import NativeFile, _Weakrefable
-
-SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
-
-class FileType(enum.IntFlag):
-    NotFound = enum.auto()
-    Unknown = enum.auto()
-    File = enum.auto()
-    Directory = enum.auto()
-
-class FileInfo(_Weakrefable):
-    """
-    FileSystem entry info.
-
-    Parameters
-    ----------
-    path : str
-        The full path to the filesystem entry.
-    type : FileType
-        The type of the filesystem entry.
-    mtime : datetime or float, default None
-        If given, the modification time of the filesystem entry.
-        If a float is given, it is the number of seconds since the
-        Unix epoch.
-    mtime_ns : int, default None
-        If given, the modification time of the filesystem entry,
-        in nanoseconds since the Unix epoch.
-        `mtime` and `mtime_ns` are mutually exclusive.
-    size : int, default None
-        If given, the filesystem entry size in bytes.  This should only
-        be given if `type` is `FileType.File`.
-
-    Examples
-    --------
-    Generate a file:
-
-    >>> from pyarrow import fs
-    >>> local = fs.LocalFileSystem()
-    >>> path_fs = local_path + "/pyarrow-fs-example.dat"
-    >>> with local.open_output_stream(path_fs) as stream:
-    ...     stream.write(b"data")
-    4
-
-    Get FileInfo object using ``get_file_info()``:
-
-    >>> file_info = local.get_file_info(path_fs)
-    >>> file_info
-    <FileInfo for '.../pyarrow-fs-example.dat': type=FileType.File, size=4>
-
-    Inspect FileInfo attributes:
-
-    >>> file_info.type
-    <FileType.File: 2>
-
-    >>> file_info.is_file
-    True
-
-    >>> file_info.path
-    '/.../pyarrow-fs-example.dat'
-
-    >>> file_info.base_name
-    'pyarrow-fs-example.dat'
-
-    >>> file_info.size
-    4
-
-    >>> file_info.extension
-    'dat'
-
-    >>> file_info.mtime  # doctest: +SKIP
-    datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
-
-    >>> file_info.mtime_ns  # doctest: +SKIP
-    1656489370873922073
-    """
-
-    def __init__(
-        self,
-        path: str,
-        type: FileType = FileType.Unknown,
-        *,
-        mtime: dt.datetime | float | None = None,
-        mtime_ns: int | None = None,
-        size: int | None = None,
-    ): ...
-    @property
-    def type(self) -> FileType:
-        """
-        Type of the file.
-
-        The returned enum values can be the following:
-
-        - FileType.NotFound: target does not exist
-        - FileType.Unknown: target exists but its type is unknown (could be a
-          special file such as a Unix socket or character device, or
-          Windows NUL / CON / ...)
-        - FileType.File: target is a regular file
-        - FileType.Directory: target is a regular directory
-
-        Returns
-        -------
-        type : FileType
-        """
-    @property
-    def is_file(self) -> bool: ...
-    @property
-    def path(self) -> str:
-        """
-        The full file path in the filesystem.
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.path
-        '/.../pyarrow-fs-example.dat'
-        """
-    @property
-    def base_name(self) -> str:
-        """
-        The file base name.
-
-        Component after the last directory separator.
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.base_name
-        'pyarrow-fs-example.dat'
-        """
-    @property
-    def size(self) -> int:
-        """
-        The size in bytes, if available.
-
-        Only regular files are guaranteed to have a size.
-
-        Returns
-        -------
-        size : int or None
-        """
-    @property
-    def extension(self) -> str:
-        """
-        The file extension.
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.extension
-        'dat'
-        """
-    @property
-    def mtime(self) -> dt.datetime | None:
-        """
-        The time of last modification, if available.
-
-        Returns
-        -------
-        mtime : datetime.datetime or None
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.mtime  # doctest: +SKIP
-        datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
-        """
-    @property
-    def mtime_ns(self) -> int | None:
-        """
-        The time of last modification, if available, expressed in nanoseconds
-        since the Unix epoch.
-
-        Returns
-        -------
-        mtime_ns : int or None
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.mtime_ns  # doctest: +SKIP
-        1656489370873922073
-        """
-
-class FileSelector(_Weakrefable):
-    """
-    File and directory selector.
-
-    It contains a set of options that describes how to search for files and
-    directories.
-
-    Parameters
-    ----------
-    base_dir : str
-        The directory in which to select files. Relative paths also work, use
-        '.' for the current directory and '..' for the parent.
-    allow_not_found : bool, default False
-        The behavior if `base_dir` doesn't exist in the filesystem.
-        If false, an error is returned.
-        If true, an empty selection is returned.
-    recursive : bool, default False
-        Whether to recurse into subdirectories.
-
-    Examples
-    --------
-    List the contents of a directory and subdirectories:
-
-    >>> selector_1 = fs.FileSelector(local_path, recursive=True)
-    >>> local.get_file_info(selector_1)  # doctest: +SKIP
-    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
-    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>,
-    <FileInfo for 'tmp/alphabet/subdir/example_copy.dat': type=FileType.File, size=4>]
-
-    List only the contents of the base directory:
-
-    >>> selector_2 = fs.FileSelector(local_path)
-    >>> local.get_file_info(selector_2)  # doctest: +SKIP
-    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
-    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>]
-
-    Return empty selection if the directory doesn't exist:
-
-    >>> selector_not_found = fs.FileSelector(
-    ...     local_path + "/missing", recursive=True, allow_not_found=True
-    ... )
-    >>> local.get_file_info(selector_not_found)
-    []
-    """
-
-    base_dir: str
-    allow_not_found: bool
-    recursive: bool
-    def __init__(self, base_dir: str, allow_not_found: bool = False, recursive: bool = False): ...
-
-class FileSystem(_Weakrefable):
-    """
-    Abstract file system API.
-    """
-
-    @classmethod
-    def from_uri(cls, uri: str) -> tuple[Self, str]:
-        """
-        Create a new FileSystem from URI or Path.
-
-        Recognized URI schemes are "file", "mock", "s3fs", "gs", "gcs", "hdfs" and "viewfs".
-        In addition, the argument can be a pathlib.Path object, or a string
-        describing an absolute local path.
-
-        Parameters
-        ----------
-        uri : string
-            URI-based path, for example: file:///some/local/path.
-
-        Returns
-        -------
-        tuple of (FileSystem, str path)
-            With (filesystem, path) tuple where path is the abstract path
-            inside the FileSystem instance.
-
-        Examples
-        --------
-        Create a new FileSystem subclass from a URI:
-
-        >>> uri = "file:///{}/pyarrow-fs-example.dat".format(local_path)
-        >>> local_new, path_new = fs.FileSystem.from_uri(uri)
-        >>> local_new
-        <pyarrow._fs.LocalFileSystem object at ...
-        >>> path_new
-        '/.../pyarrow-fs-example.dat'
-
-        Or from a s3 bucket:
-
-        >>> fs.FileSystem.from_uri("s3://usgs-landsat/collection02/")
-        (<pyarrow._s3fs.S3FileSystem object at ...>, 'usgs-landsat/collection02')
-        """
-    def equals(self, other: FileSystem) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.fs.FileSystem
-
-        Returns
-        -------
-        bool
-        """
-    @property
-    def type_name(self) -> str:
-        """
-        The filesystem's type name.
-        """
-    @overload
-    def get_file_info(self, paths_or_selector: str) -> FileInfo: ...
-    @overload
-    def get_file_info(self, paths_or_selector: FileSelector | list[str]) -> list[FileInfo]: ...
-    def get_file_info(self, paths_or_selector):
-        """
-        Get info for the given files.
-
-        Any symlink is automatically dereferenced, recursively. A non-existing
-        or unreachable file returns a FileStat object and has a FileType of
-        value NotFound. An exception indicates a truly exceptional condition
-        (low-level I/O error, etc.).
-
-        Parameters
-        ----------
-        paths_or_selector : FileSelector, path-like or list of path-likes
-            Either a selector object, a path-like object or a list of
-            path-like objects. The selector's base directory will not be
-            part of the results, even if it exists. If it doesn't exist,
-            use `allow_not_found`.
-
-        Returns
-        -------
-        FileInfo or list of FileInfo
-            Single FileInfo object is returned for a single path, otherwise
-            a list of FileInfo objects is returned.
-
-        Examples
-        --------
-        >>> local
-        <pyarrow._fs.LocalFileSystem object at ...>
-        >>> local.get_file_info("/{}/pyarrow-fs-example.dat".format(local_path))
-        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
-        """
-    def create_dir(self, path: str, *, recursive: bool = True) -> None:
-        """
-        Create a directory and subdirectories.
-
-        This function succeeds if the directory already exists.
-
-        Parameters
-        ----------
-        path : str
-            The path of the new directory.
-        recursive : bool, default True
-            Create nested directories as well.
-        """
-    def delete_dir(self, path: str) -> None:
-        """
-        Delete a directory and its contents, recursively.
-
-        Parameters
-        ----------
-        path : str
-            The path of the directory to be deleted.
-        """
-    def delete_dir_contents(
-        self, path: str, *, accept_root_dir: bool = False, missing_dir_ok: bool = False
-    ) -> None:
-        """
-        Delete a directory's contents, recursively.
-
-        Like delete_dir, but doesn't delete the directory itself.
-
-        Parameters
-        ----------
-        path : str
-            The path of the directory to be deleted.
-        accept_root_dir : boolean, default False
-            Allow deleting the root directory's contents
-            (if path is empty or "/")
-        missing_dir_ok : boolean, default False
-            If False then an error is raised if path does
-            not exist
-        """
-    def move(self, src: str, dest: str) -> None:
-        """
-        Move / rename a file or directory.
-
-        If the destination exists:
-        - if it is a non-empty directory, an error is returned
-        - otherwise, if it has the same type as the source, it is replaced
-        - otherwise, behavior is unspecified (implementation-dependent).
-
-        Parameters
-        ----------
-        src : str
-            The path of the file or the directory to be moved.
-        dest : str
-            The destination path where the file or directory is moved to.
-
-        Examples
-        --------
-        Create a new folder with a file:
-
-        >>> local.create_dir("/tmp/other_dir")
-        >>> local.copy_file(path, "/tmp/move_example.dat")
-
-        Move the file:
-
-        >>> local.move("/tmp/move_example.dat", "/tmp/other_dir/move_example_2.dat")
-
-        Inspect the file info:
-
-        >>> local.get_file_info("/tmp/other_dir/move_example_2.dat")
-        <FileInfo for '/tmp/other_dir/move_example_2.dat': type=FileType.File, size=4>
-        >>> local.get_file_info("/tmp/move_example.dat")
-        <FileInfo for '/tmp/move_example.dat': type=FileType.NotFound>
-
-        Delete the folder:
-        >>> local.delete_dir("/tmp/other_dir")
-        """
-    def copy_file(self, src: str, dest: str) -> None:
-        """
-        Copy a file.
-
-        If the destination exists and is a directory, an error is returned.
-        Otherwise, it is replaced.
-
-        Parameters
-        ----------
-        src : str
-            The path of the file to be copied from.
-        dest : str
-            The destination path where the file is copied to.
-
-        Examples
-        --------
-        >>> local.copy_file(path, local_path + "/pyarrow-fs-example_copy.dat")
-
-        Inspect the file info:
-
-        >>> local.get_file_info(local_path + "/pyarrow-fs-example_copy.dat")
-        <FileInfo for '/.../pyarrow-fs-example_copy.dat': type=FileType.File, size=4>
-        >>> local.get_file_info(path)
-        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
-        """
-    def delete_file(self, path: str) -> None:
-        """
-        Delete a file.
-
-        Parameters
-        ----------
-        path : str
-            The path of the file to be deleted.
-        """
-    def open_input_file(self, path: str) -> NativeFile:
-        """
-        Open an input file for random access reading.
-
-        Parameters
-        ----------
-        path : str
-            The source to open for reading.
-
-        Returns
-        -------
-        stream : NativeFile
-
-        Examples
-        --------
-        Print the data from the file with `open_input_file()`:
-
-        >>> with local.open_input_file(path) as f:
-        ...     print(f.readall())
-        b'data'
-        """
-    def open_input_stream(
-        self, path: str, compression: str | None = "detect", buffer_size: int | None = None
-    ) -> NativeFile:
-        """
-        Open an input stream for sequential reading.
-
-        Parameters
-        ----------
-        path : str
-            The source to open for reading.
-        compression : str optional, default 'detect'
-            The compression algorithm to use for on-the-fly decompression.
-            If "detect" and source is a file path, then compression will be
-            chosen based on the file extension.
-            If None, no compression will be applied. Otherwise, a well-known
-            algorithm name must be supplied (e.g. "gzip").
-        buffer_size : int optional, default None
-            If None or 0, no buffering will happen. Otherwise the size of the
-            temporary read buffer.
-
-        Returns
-        -------
-        stream : NativeFile
-
-        Examples
-        --------
-        Print the data from the file with `open_input_stream()`:
-
-        >>> with local.open_input_stream(path) as f:
-        ...     print(f.readall())
-        b'data'
-        """
-    def open_output_stream(
-        self,
-        path: str,
-        compression: str | None = "detect",
-        buffer_size: int | None = None,
-        metadata: dict[str, str] | None = None,
-    ) -> NativeFile:
-        """
-        Open an output stream for sequential writing.
-
-        If the target already exists, existing data is truncated.
-
-        Parameters
-        ----------
-        path : str
-            The source to open for writing.
-        compression : str optional, default 'detect'
-            The compression algorithm to use for on-the-fly compression.
-            If "detect" and source is a file path, then compression will be
-            chosen based on the file extension.
-            If None, no compression will be applied. Otherwise, a well-known
-            algorithm name must be supplied (e.g. "gzip").
-        buffer_size : int optional, default None
-            If None or 0, no buffering will happen. Otherwise the size of the
-            temporary write buffer.
-        metadata : dict optional, default None
-            If not None, a mapping of string keys to string values.
-            Some filesystems support storing metadata along the file
-            (such as "Content-Type").
-            Unsupported metadata keys will be ignored.
-
-        Returns
-        -------
-        stream : NativeFile
-
-        Examples
-        --------
-        >>> local = fs.LocalFileSystem()
-        >>> with local.open_output_stream(path) as stream:
-        ...     stream.write(b"data")
-        4
-        """
-    def open_append_stream(
-        self,
-        path: str,
-        compression: str | None = "detect",
-        buffer_size: int | None = None,
-        metadata: dict[str, str] | None = None,
-    ):
-        """
-        Open an output stream for appending.
-
-        If the target doesn't exist, a new empty file is created.
-
-        .. note::
-            Some filesystem implementations do not support efficient
-            appending to an existing file, in which case this method will
-            raise NotImplementedError.
-            Consider writing to multiple files (using e.g. the dataset layer)
-            instead.
-
-        Parameters
-        ----------
-        path : str
-            The source to open for writing.
-        compression : str optional, default 'detect'
-            The compression algorithm to use for on-the-fly compression.
-            If "detect" and source is a file path, then compression will be
-            chosen based on the file extension.
-            If None, no compression will be applied. Otherwise, a well-known
-            algorithm name must be supplied (e.g. "gzip").
-        buffer_size : int optional, default None
-            If None or 0, no buffering will happen. Otherwise the size of the
-            temporary write buffer.
-        metadata : dict optional, default None
-            If not None, a mapping of string keys to string values.
-            Some filesystems support storing metadata along the file
-            (such as "Content-Type").
-            Unsupported metadata keys will be ignored.
-
-        Returns
-        -------
-        stream : NativeFile
-
-        Examples
-        --------
-        Append new data to a FileSystem subclass with nonempty file:
-
-        >>> with local.open_append_stream(path) as f:
-        ...     f.write(b"+newly added")
-        12
-
-        Print out the content to the file:
-
-        >>> with local.open_input_file(path) as f:
-        ...     print(f.readall())
-        b'data+newly added'
-        """
-    def normalize_path(self, path: str) -> str:
-        """
-        Normalize filesystem path.
-
-        Parameters
-        ----------
-        path : str
-            The path to normalize
-
-        Returns
-        -------
-        normalized_path : str
-            The normalized path
-        """
-
-class LocalFileSystem(FileSystem):
-    """
-    A FileSystem implementation accessing files on the local machine.
-
-    Details such as symlinks are abstracted away (symlinks are always followed,
-    except when deleting an entry).
-
-    Parameters
-    ----------
-    use_mmap : bool, default False
-        Whether open_input_stream and open_input_file should return
-        a mmap'ed file or a regular file.
-
-    Examples
-    --------
-    Create a FileSystem object with LocalFileSystem constructor:
-
-    >>> from pyarrow import fs
-    >>> local = fs.LocalFileSystem()
-    >>> local
-    <pyarrow._fs.LocalFileSystem object at ...>
-
-    and write data on to the file:
-
-    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
-    ...     stream.write(b"data")
-    4
-    >>> with local.open_input_stream("/tmp/local_fs.dat") as stream:
-    ...     print(stream.readall())
-    b'data'
-
-    Create a FileSystem object inferred from a URI of the saved file:
-
-    >>> local_new, path = fs.LocalFileSystem().from_uri("/tmp/local_fs.dat")
-    >>> local_new
-    <pyarrow._fs.LocalFileSystem object at ...
-    >>> path
-    '/tmp/local_fs.dat'
-
-    Check if FileSystems `local` and `local_new` are equal:
-
-    >>> local.equals(local_new)
-    True
-
-    Compare two different FileSystems:
-
-    >>> local2 = fs.LocalFileSystem(use_mmap=True)
-    >>> local.equals(local2)
-    False
-
-    Copy a file and print out the data:
-
-    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/local_fs-copy.dat")
-    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as stream:
-    ...     print(stream.readall())
-    b'data'
-
-    Open an output stream for appending, add text and print the new data:
-
-    >>> with local.open_append_stream("/tmp/local_fs-copy.dat") as f:
-    ...     f.write(b"+newly added")
-    12
-
-    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as f:
-    ...     print(f.readall())
-    b'data+newly added'
-
-    Create a directory, copy a file into it and then delete the whole directory:
-
-    >>> local.create_dir("/tmp/new_folder")
-    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
-    >>> local.get_file_info("/tmp/new_folder")
-    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
-    >>> local.delete_dir("/tmp/new_folder")
-    >>> local.get_file_info("/tmp/new_folder")
-    <FileInfo for '/tmp/new_folder': type=FileType.NotFound>
-
-    Create a directory, copy a file into it and then delete
-    the content of the directory:
-
-    >>> local.create_dir("/tmp/new_folder")
-    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
-    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
-    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.File, size=4>
-    >>> local.delete_dir_contents("/tmp/new_folder")
-    >>> local.get_file_info("/tmp/new_folder")
-    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
-    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
-    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
-
-    Create a directory, copy a file into it and then delete
-    the file from the directory:
-
-    >>> local.create_dir("/tmp/new_folder")
-    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
-    >>> local.delete_file("/tmp/new_folder/local_fs.dat")
-    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
-    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
-    >>> local.get_file_info("/tmp/new_folder")
-    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
-
-    Move the file:
-
-    >>> local.move("/tmp/local_fs-copy.dat", "/tmp/new_folder/local_fs-copy.dat")
-    >>> local.get_file_info("/tmp/new_folder/local_fs-copy.dat")
-    <FileInfo for '/tmp/new_folder/local_fs-copy.dat': type=FileType.File, size=16>
-    >>> local.get_file_info("/tmp/local_fs-copy.dat")
-    <FileInfo for '/tmp/local_fs-copy.dat': type=FileType.NotFound>
-
-    To finish delete the file left:
-    >>> local.delete_file("/tmp/local_fs.dat")
-    """
-
-    def __init__(self, *, use_mmap: bool = False) -> None: ...
-
-class SubTreeFileSystem(FileSystem):
-    """
-    Delegates to another implementation after prepending a fixed base path.
-
-    This is useful to expose a logical view of a subtree of a filesystem,
-    for example a directory in a LocalFileSystem.
-
-    Note, that this makes no security guarantee. For example, symlinks may
-    allow to "escape" the subtree and access other parts of the underlying
-    filesystem.
-
-    Parameters
-    ----------
-    base_path : str
-        The root of the subtree.
-    base_fs : FileSystem
-        FileSystem object the operations delegated to.
-
-    Examples
-    --------
-    Create a LocalFileSystem instance:
-
-    >>> from pyarrow import fs
-    >>> local = fs.LocalFileSystem()
-    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
-    ...     stream.write(b"data")
-    4
-
-    Create a directory and a SubTreeFileSystem instance:
-
-    >>> local.create_dir("/tmp/sub_tree")
-    >>> subtree = fs.SubTreeFileSystem("/tmp/sub_tree", local)
-
-    Write data into the existing file:
-
-    >>> with subtree.open_append_stream("sub_tree_fs.dat") as f:
-    ...     f.write(b"+newly added")
-    12
-
-    Print out the attributes:
-
-    >>> subtree.base_fs
-    <pyarrow._fs.LocalFileSystem object at ...>
-    >>> subtree.base_path
-    '/tmp/sub_tree/'
-
-    Get info for the given directory or given file:
-
-    >>> subtree.get_file_info("")
-    <FileInfo for '': type=FileType.Directory>
-    >>> subtree.get_file_info("sub_tree_fs.dat")
-    <FileInfo for 'sub_tree_fs.dat': type=FileType.File, size=12>
-
-    Delete the file and directory:
-
-    >>> subtree.delete_file("sub_tree_fs.dat")
-    >>> local.delete_dir("/tmp/sub_tree")
-    >>> local.delete_file("/tmp/local_fs.dat")
-
-    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
-    """
-    def __init__(self, base_path: str, base_fs: FileSystem): ...
-    @property
-    def base_path(self) -> str: ...
-    @property
-    def base_fs(self) -> FileSystem: ...
-
-class _MockFileSystem(FileSystem):
-    def __init__(self, current_time: dt.datetime | None = None) -> None: ...
-
-class PyFileSystem(FileSystem):
-    """
-    A FileSystem with behavior implemented in Python.
-
-    Parameters
-    ----------
-    handler : FileSystemHandler
-        The handler object implementing custom filesystem behavior.
-
-    Examples
-    --------
-    Create an fsspec-based filesystem object for GitHub:
-
-    >>> from fsspec.implementations import github
-    >>> gfs = github.GithubFileSystem("apache", "arrow")  # doctest: +SKIP
-
-    Get a PyArrow FileSystem object:
-
-    >>> from pyarrow.fs import PyFileSystem, FSSpecHandler
-    >>> pa_fs = PyFileSystem(FSSpecHandler(gfs))  # doctest: +SKIP
-
-    Use :func:`~pyarrow.fs.FileSystem` functionality ``get_file_info()``:
-
-    >>> pa_fs.get_file_info("README.md")  # doctest: +SKIP
-    <FileInfo for 'README.md': type=FileType.File, size=...>
-    """
-    def __init__(self, handler: FileSystemHandler) -> None: ...
-    @property
-    def handler(self) -> FileSystemHandler:
-        """
-        The filesystem's underlying handler.
-
-        Returns
-        -------
-        handler : FileSystemHandler
-        """
-
-class FileSystemHandler(ABC):
-    """
-    An abstract class exposing methods to implement PyFileSystem's behavior.
-    """
-    @abstractmethod
-    def get_type_name(self) -> str:
-        """
-        Implement PyFileSystem.type_name.
-        """
-    @abstractmethod
-    def get_file_info(self, paths: str | list[str]) -> FileInfo | list[FileInfo]:
-        """
-        Implement PyFileSystem.get_file_info(paths).
-
-        Parameters
-        ----------
-        paths : list of str
-            paths for which we want to retrieve the info.
-        """
-    @abstractmethod
-    def get_file_info_selector(self, selector: FileSelector) -> list[FileInfo]:
-        """
-        Implement PyFileSystem.get_file_info(selector).
-
-        Parameters
-        ----------
-        selector : FileSelector
-            selector for which we want to retrieve the info.
-        """
-
-    @abstractmethod
-    def create_dir(self, path: str, recursive: bool) -> None:
-        """
-        Implement PyFileSystem.create_dir(...).
-
-        Parameters
-        ----------
-        path : str
-            path of the directory.
-        recursive : bool
-            if the parent directories should be created too.
-        """
-    @abstractmethod
-    def delete_dir(self, path: str) -> None:
-        """
-        Implement PyFileSystem.delete_dir(...).
-
-        Parameters
-        ----------
-        path : str
-            path of the directory.
-        """
-    @abstractmethod
-    def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None:
-        """
-        Implement PyFileSystem.delete_dir_contents(...).
-
-        Parameters
-        ----------
-        path : str
-            path of the directory.
-        missing_dir_ok : bool
-            if False an error should be raised if path does not exist
-        """
-    @abstractmethod
-    def delete_root_dir_contents(self) -> None:
-        """
-        Implement PyFileSystem.delete_dir_contents("/", accept_root_dir=True).
-        """
-    @abstractmethod
-    def delete_file(self, path: str) -> None:
-        """
-        Implement PyFileSystem.delete_file(...).
-
-        Parameters
-        ----------
-        path : str
-            path of the file.
-        """
-    @abstractmethod
-    def move(self, src: str, dest: str) -> None:
-        """
-        Implement PyFileSystem.move(...).
-
-        Parameters
-        ----------
-        src : str
-            path of what should be moved.
-        dest : str
-            path of where it should be moved to.
-        """
-
-    @abstractmethod
-    def copy_file(self, src: str, dest: str) -> None:
-        """
-        Implement PyFileSystem.copy_file(...).
-
-        Parameters
-        ----------
-        src : str
-            path of what should be copied.
-        dest : str
-            path of where it should be copied to.
-        """
-    @abstractmethod
-    def open_input_stream(self, path: str) -> NativeFile:
-        """
-        Implement PyFileSystem.open_input_stream(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be opened.
-        """
-    @abstractmethod
-    def open_input_file(self, path: str) -> NativeFile:
-        """
-        Implement PyFileSystem.open_input_file(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be opened.
-        """
-    @abstractmethod
-    def open_output_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
-        """
-        Implement PyFileSystem.open_output_stream(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be opened.
-        metadata :  mapping
-            Mapping of string keys to string values.
-            Some filesystems support storing metadata along the file
-            (such as "Content-Type").
-        """
-
-    @abstractmethod
-    def open_append_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
-        """
-        Implement PyFileSystem.open_append_stream(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be opened.
-        metadata :  mapping
-            Mapping of string keys to string values.
-            Some filesystems support storing metadata along the file
-            (such as "Content-Type").
-        """
-    @abstractmethod
-    def normalize_path(self, path: str) -> str:
-        """
-        Implement PyFileSystem.normalize_path(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be normalized.
-        """
diff --git a/python/pyarrow/_hdfs.pyi b/python/pyarrow/_hdfs.pyi
deleted file mode 100644
index ed367379171..00000000000
--- a/python/pyarrow/_hdfs.pyi
+++ /dev/null
@@ -1,92 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from _typeshed import StrPath
-
-from ._fs import FileSystem
-
-class HadoopFileSystem(FileSystem):
-    """
-    HDFS backed FileSystem implementation
-
-    Parameters
-    ----------
-    host : str
-        HDFS host to connect to. Set to "default" for fs.defaultFS from
-        core-site.xml.
-    port : int, default 8020
-        HDFS port to connect to. Set to 0 for default or logical (HA) nodes.
-    user : str, default None
-        Username when connecting to HDFS; None implies login user.
-    replication : int, default 3
-        Number of copies each block will have.
-    buffer_size : int, default 0
-        If 0, no buffering will happen otherwise the size of the temporary read
-        and write buffer.
-    default_block_size : int, default None
-        None means the default configuration for HDFS, a typical block size is
-        128 MB.
-    kerb_ticket : string or path, default None
-        If not None, the path to the Kerberos ticket cache.
-    extra_conf : dict, default None
-        Extra key/value pairs for configuration; will override any
-        hdfs-site.xml properties.
-
-    Examples
-    --------
-    >>> from pyarrow import fs
-    >>> hdfs = fs.HadoopFileSystem(
-    ...     host, port, user=user, kerb_ticket=ticket_cache_path
-    ... )  # doctest: +SKIP
-
-    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
-    """
-    def __init__(
-        self,
-        host: str,
-        port: int = 8020,
-        *,
-        user: str | None = None,
-        replication: int = 3,
-        buffer_size: int = 0,
-        default_block_size: int | None = None,
-        kerb_ticket: StrPath | None = None,
-        extra_conf: dict | None = None,
-    ): ...
-    @staticmethod
-    def from_uri(uri: str) -> HadoopFileSystem:  # type: ignore[override]
-        """
-        Instantiate HadoopFileSystem object from an URI string.
-
-        The following two calls are equivalent
-
-        * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
-&replication=1')``
-        * ``HadoopFileSystem('localhost', port=8020, user='test', \
-replication=1)``
-
-        Parameters
-        ----------
-        uri : str
-            A string URI describing the connection to HDFS.
-            In order to change the user, replication, buffer_size or
-            default_block_size pass the values as query parts.
-
-        Returns
-        -------
-        HadoopFileSystem
-        """
diff --git a/python/pyarrow/_json.pyi b/python/pyarrow/_json.pyi
deleted file mode 100644
index f416b4b29c6..00000000000
--- a/python/pyarrow/_json.pyi
+++ /dev/null
@@ -1,186 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import IO, Any, Literal
-
-from _typeshed import StrPath
-
-from .lib import MemoryPool, RecordBatchReader, Schema, Table, _Weakrefable
-
-class ReadOptions(_Weakrefable):
-    """
-    Options for reading JSON files.
-
-    Parameters
-    ----------
-    use_threads : bool, optional (default True)
-        Whether to use multiple threads to accelerate reading
-    block_size : int, optional
-        How much bytes to process at a time from the input stream.
-        This will determine multi-threading granularity as well as
-        the size of individual chunks in the Table.
-    """
-
-    use_threads: bool
-    """
-    Whether to use multiple threads to accelerate reading.
-    """
-    block_size: int
-    """
-    How much bytes to process at a time from the input stream.
-
-    This will determine multi-threading granularity as well as the size of
-    individual chunks in the Table.
-    """
-    def __init__(self, use_threads: bool | None = None, block_size: int | None = None): ...
-    def equals(self, other: ReadOptions) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.json.ReadOptions
-
-        Returns
-        -------
-        bool
-        """
-
-class ParseOptions(_Weakrefable):
-    """
-    Options for parsing JSON files.
-
-    Parameters
-    ----------
-    explicit_schema : Schema, optional (default None)
-        Optional explicit schema (no type inference, ignores other fields).
-    newlines_in_values : bool, optional (default False)
-        Whether objects may be printed across multiple lines (for example
-        pretty printed). If false, input must end with an empty line.
-    unexpected_field_behavior : str, default "infer"
-        How JSON fields outside of explicit_schema (if given) are treated.
-
-        Possible behaviors:
-
-         - "ignore": unexpected JSON fields are ignored
-         - "error": error out on unexpected JSON fields
-         - "infer": unexpected JSON fields are type-inferred and included in
-           the output
-    """
-
-    explicit_schema: Schema
-    """
-    Optional explicit schema (no type inference, ignores other fields)
-    """
-    newlines_in_values: bool
-    """
-    Whether newline characters are allowed in JSON values.
-    Setting this to True reduces the performance of multi-threaded
-    JSON reading.
-    """
-    unexpected_field_behavior: Literal["ignore", "error", "infer"]
-    """
-    How JSON fields outside of explicit_schema (if given) are treated.
-
-    Possible behaviors:
-
-        - "ignore": unexpected JSON fields are ignored
-        - "error": error out on unexpected JSON fields
-        - "infer": unexpected JSON fields are type-inferred and included in
-        the output
-
-    Set to "infer" by default.
-    """
-    def __init__(
-        self,
-        explicit_schema: Schema | None = None,
-        newlines_in_values: bool | None = None,
-        unexpected_field_behavior: Literal["ignore", "error", "infer"] = "infer",
-    ): ...
-    def equals(self, other: ParseOptions) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.json.ParseOptions
-
-        Returns
-        -------
-        bool
-        """
-
-class JSONStreamingReader(RecordBatchReader):
-    """An object that reads record batches incrementally from a JSON file.
-
-    Should not be instantiated directly by user code.
-    """
-
-def read_json(
-    input_file: StrPath | IO[Any],
-    read_options: ReadOptions | None = None,
-    parse_options: ParseOptions | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Table:
-    """
-    Read a Table from a stream of JSON data.
-
-    Parameters
-    ----------
-    input_file : str, path or file-like object
-        The location of JSON data. Currently only the line-delimited JSON
-        format is supported.
-    read_options : pyarrow.json.ReadOptions, optional
-        Options for the JSON reader (see ReadOptions constructor for defaults).
-    parse_options : pyarrow.json.ParseOptions, optional
-        Options for the JSON parser
-        (see ParseOptions constructor for defaults).
-    memory_pool : MemoryPool, optional
-        Pool to allocate Table memory from.
-
-    Returns
-    -------
-    :class:`pyarrow.Table`
-        Contents of the JSON file as a in-memory table.
-    """
-
-def open_json(
-    input_file: StrPath | IO[Any],
-    read_options: ReadOptions | None = None,
-    parse_options: ParseOptions | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> JSONStreamingReader:
-    """
-    Open a streaming reader of JSON data.
-
-    Reading using this function is always single-threaded.
-
-    Parameters
-    ----------
-    input_file : string, path or file-like object
-        The location of JSON data.  If a string or path, and if it ends
-        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
-        the data is automatically decompressed when reading.
-    read_options : pyarrow.json.ReadOptions, optional
-        Options for the JSON reader (see pyarrow.json.ReadOptions constructor
-        for defaults)
-    parse_options : pyarrow.json.ParseOptions, optional
-        Options for the JSON parser
-        (see pyarrow.json.ParseOptions constructor for defaults)
-    memory_pool : MemoryPool, optional
-        Pool to allocate RecordBatch memory from
-
-    Returns
-    -------
-    :class:`pyarrow.json.JSONStreamingReader`
-    """
diff --git a/python/pyarrow/_orc.pyi b/python/pyarrow/_orc.pyi
deleted file mode 100644
index 7587cc121c3..00000000000
--- a/python/pyarrow/_orc.pyi
+++ /dev/null
@@ -1,73 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import IO, Literal
-
-from .lib import (
-    Buffer,
-    KeyValueMetadata,
-    MemoryPool,
-    NativeFile,
-    RecordBatch,
-    Schema,
-    Table,
-    _Weakrefable,
-)
-
-class ORCReader(_Weakrefable):
-    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
-    def open(self, source: str | NativeFile | Buffer, use_memory_map: bool = True): ...
-    def metadata(self) -> KeyValueMetadata: ...
-    def schema(self) -> Schema: ...
-    def nrows(self) -> int: ...
-    def nstripes(self) -> int: ...
-    def file_version(self) -> str: ...
-    def software_version(self) -> str: ...
-    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
-    def compression_size(self) -> int: ...
-    def row_index_stride(self) -> int: ...
-    def writer(self) -> str: ...
-    def writer_version(self) -> str: ...
-    def nstripe_statistics(self) -> int: ...
-    def content_length(self) -> int: ...
-    def stripe_statistics_length(self) -> int: ...
-    def file_footer_length(self) -> int: ...
-    def file_postscript_length(self) -> int: ...
-    def file_length(self) -> int: ...
-    def serialized_file_tail(self) -> int: ...
-    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch: ...
-    def read(self, columns: list[str] | None = None) -> Table: ...
-
-class ORCWriter(_Weakrefable):
-    def open(
-        self,
-        where: str | NativeFile | IO,
-        *,
-        file_version: str | None = None,
-        batch_size: int | None = None,
-        stripe_size: int | None = None,
-        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] | None = None,
-        compression_block_size: int | None = None,
-        compression_strategy: Literal["COMPRESSION", "SPEED"] | None = None,
-        row_index_stride: int | None = None,
-        padding_tolerance: float | None = None,
-        dictionary_key_size_threshold: float | None = None,
-        bloom_filter_columns: list[int] | None = None,
-        bloom_filter_fpp: float | None = None,
-    ) -> None: ...
-    def write(self, table: Table) -> None: ...
-    def close(self) -> None: ...
diff --git a/python/pyarrow/_parquet.pyi b/python/pyarrow/_parquet.pyi
deleted file mode 100644
index c75337cbf3b..00000000000
--- a/python/pyarrow/_parquet.pyi
+++ /dev/null
@@ -1,462 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import IO, Any, Iterable, Iterator, Literal, Sequence, TypeAlias, TypedDict
-
-from _typeshed import StrPath
-
-from ._stubs_typing import Order
-from .lib import (
-    Buffer,
-    ChunkedArray,
-    KeyValueMetadata,
-    MemoryPool,
-    NativeFile,
-    RecordBatch,
-    Schema,
-    Table,
-    _Weakrefable,
-)
-
-_PhysicalType: TypeAlias = Literal[
-    "BOOLEAN",
-    "INT32",
-    "INT64",
-    "INT96",
-    "FLOAT",
-    "DOUBLE",
-    "BYTE_ARRAY",
-    "FIXED_LEN_BYTE_ARRAY",
-    "UNKNOWN",
-]
-_LogicTypeName: TypeAlias = Literal[
-    "UNDEFINED",
-    "STRING",
-    "MAP",
-    "LIST",
-    "ENUM",
-    "DECIMAL",
-    "DATE",
-    "TIME",
-    "TIMESTAMP",
-    "INT",
-    "FLOAT16",
-    "JSON",
-    "BSON",
-    "UUID",
-    "NONE",
-    "UNKNOWN",
-]
-_ConvertedType: TypeAlias = Literal[
-    "NONE",
-    "UTF8",
-    "MAP",
-    "MAP_KEY_VALUE",
-    "LIST",
-    "ENUM",
-    "DECIMAL",
-    "DATE",
-    "TIME_MILLIS",
-    "TIME_MICROS",
-    "TIMESTAMP_MILLIS",
-    "TIMESTAMP_MICROS",
-    "UINT_8",
-    "UINT_16",
-    "UINT_32",
-    "UINT_64",
-    "INT_8",
-    "INT_16",
-    "INT_32",
-    "INT_64",
-    "JSON",
-    "BSON",
-    "INTERVAL",
-    "UNKNOWN",
-]
-_Encoding: TypeAlias = Literal[
-    "PLAIN",
-    "PLAIN_DICTIONARY",
-    "RLE",
-    "BIT_PACKED",
-    "DELTA_BINARY_PACKED",
-    "DELTA_LENGTH_BYTE_ARRAY",
-    "DELTA_BYTE_ARRAY",
-    "RLE_DICTIONARY",
-    "BYTE_STREAM_SPLIT",
-    "UNKNOWN",
-]
-_Compression: TypeAlias = Literal[
-    "UNCOMPRESSED",
-    "SNAPPY",
-    "GZIP",
-    "LZO",
-    "BROTLI",
-    "LZ4",
-    "ZSTD",
-    "UNKNOWN",
-]
-
-class _Statistics(TypedDict):
-    has_min_max: bool
-    min: Any | None
-    max: Any | None
-    null_count: int | None
-    distinct_count: int | None
-    num_values: int
-    physical_type: _PhysicalType
-
-class Statistics(_Weakrefable):
-    def to_dict(self) -> _Statistics: ...
-    def equals(self, other: Statistics) -> bool: ...
-    @property
-    def has_min_max(self) -> bool: ...
-    @property
-    def hash_null_count(self) -> bool: ...
-    @property
-    def has_distinct_count(self) -> bool: ...
-    @property
-    def min_raw(self) -> Any | None: ...
-    @property
-    def max_raw(self) -> Any | None: ...
-    @property
-    def min(self) -> Any | None: ...
-    @property
-    def max(self) -> Any | None: ...
-    @property
-    def null_count(self) -> int | None: ...
-    @property
-    def distinct_count(self) -> int | None: ...
-    @property
-    def num_values(self) -> int: ...
-    @property
-    def physical_type(self) -> _PhysicalType: ...
-    @property
-    def logical_type(self) -> ParquetLogicalType: ...
-    @property
-    def converted_type(self) -> _ConvertedType | None: ...
-
-class ParquetLogicalType(_Weakrefable):
-    def to_json(self) -> str: ...
-    @property
-    def type(self) -> _LogicTypeName: ...
-
-class _ColumnChunkMetaData(TypedDict):
-    file_offset: int
-    file_path: str | None
-    physical_type: _PhysicalType
-    num_values: int
-    path_in_schema: str
-    is_stats_set: bool
-    statistics: Statistics | None
-    compression: _Compression
-    encodings: tuple[_Encoding, ...]
-    has_dictionary_page: bool
-    dictionary_page_offset: int | None
-    data_page_offset: int
-    total_compressed_size: int
-    total_uncompressed_size: int
-
-class ColumnChunkMetaData(_Weakrefable):
-    def to_dict(self) -> _ColumnChunkMetaData: ...
-    def equals(self, other: ColumnChunkMetaData) -> bool: ...
-    @property
-    def file_offset(self) -> int: ...
-    @property
-    def file_path(self) -> str | None: ...
-    @property
-    def physical_type(self) -> _PhysicalType: ...
-    @property
-    def num_values(self) -> int: ...
-    @property
-    def path_in_schema(self) -> str: ...
-    @property
-    def is_stats_set(self) -> bool: ...
-    @property
-    def statistics(self) -> Statistics | None: ...
-    @property
-    def compression(self) -> _Compression: ...
-    @property
-    def encodings(self) -> tuple[_Encoding, ...]: ...
-    @property
-    def has_dictionary_page(self) -> bool: ...
-    @property
-    def dictionary_page_offset(self) -> int | None: ...
-    @property
-    def data_page_offset(self) -> int: ...
-    @property
-    def has_index_page(self) -> bool: ...
-    @property
-    def index_page_offset(self) -> int: ...
-    @property
-    def total_compressed_size(self) -> int: ...
-    @property
-    def total_uncompressed_size(self) -> int: ...
-    @property
-    def has_offset_index(self) -> bool: ...
-    @property
-    def has_column_index(self) -> bool: ...
-    @property
-    def metadata(self) -> dict[bytes, bytes] | None: ...
-
-class _SortingColumn(TypedDict):
-    column_index: int
-    descending: bool
-    nulls_first: bool
-
-class SortingColumn:
-    def __init__(
-        self, column_index: int, descending: bool = False, nulls_first: bool = False
-    ) -> None: ...
-    @classmethod
-    def from_ordering(
-        cls,
-        schema: Schema,
-        sort_keys: Sequence[tuple[str, Order]],
-        null_placement: Literal["at_start", "at_end"] = "at_end",
-    ) -> tuple[SortingColumn, ...]: ...
-    @staticmethod
-    def to_ordering(
-        schema: Schema, sorting_columns: tuple[SortingColumn, ...]
-    ) -> tuple[Sequence[tuple[str, Order]], Literal["at_start", "at_end"]]: ...
-    def __hash__(self) -> int: ...
-    @property
-    def column_index(self) -> int: ...
-    @property
-    def descending(self) -> bool: ...
-    @property
-    def nulls_first(self) -> bool: ...
-    def to_dict(self) -> _SortingColumn: ...
-
-class _RowGroupMetaData(TypedDict):
-    num_columns: int
-    num_rows: int
-    total_byte_size: int
-    columns: list[ColumnChunkMetaData]
-    sorting_columns: list[SortingColumn]
-
-class RowGroupMetaData(_Weakrefable):
-    def __init__(self, parent: FileMetaData, index: int) -> None: ...
-    def equals(self, other: RowGroupMetaData) -> bool: ...
-    def column(self, i: int) -> ColumnChunkMetaData: ...
-    def to_dict(self) -> _RowGroupMetaData: ...
-    @property
-    def num_columns(self) -> int: ...
-    @property
-    def num_rows(self) -> int: ...
-    @property
-    def total_byte_size(self) -> int: ...
-    @property
-    def sorting_columns(self) -> list[SortingColumn]: ...
-
-class _FileMetaData(TypedDict):
-    created_by: str
-    num_columns: int
-    num_rows: int
-    num_row_groups: int
-    format_version: str
-    serialized_size: int
-
-class FileMetaData(_Weakrefable):
-    def __hash__(self) -> int: ...
-    def to_dict(self) -> _FileMetaData: ...
-    def equals(self, other: FileMetaData) -> bool: ...
-    @property
-    def schema(self) -> ParquetSchema: ...
-    @property
-    def serialized_size(self) -> int: ...
-    @property
-    def num_columns(self) -> int: ...
-    @property
-    def num_rows(self) -> int: ...
-    @property
-    def num_row_groups(self) -> int: ...
-    @property
-    def format_version(self) -> str: ...
-    @property
-    def created_by(self) -> str: ...
-    @property
-    def metadata(self) -> dict[bytes, bytes] | None: ...
-    def row_group(self, i: int) -> RowGroupMetaData: ...
-    def set_file_path(self, path: str) -> None: ...
-    def append_row_groups(self, other: FileMetaData) -> None: ...
-    def write_metadata_file(self, where: StrPath | Buffer | NativeFile | IO) -> None: ...
-
-class ParquetSchema(_Weakrefable):
-    def __init__(self, container: FileMetaData) -> None: ...
-    def __getitem__(self, i: int) -> ColumnChunkMetaData: ...
-    def __hash__(self) -> int: ...
-    def __len__(self) -> int: ...
-    @property
-    def names(self) -> list[str]: ...
-    def to_arrow_schema(self) -> Schema: ...
-    def equals(self, other: ParquetSchema) -> bool: ...
-    def column(self, i: int) -> ColumnSchema: ...
-
-class ColumnSchema(_Weakrefable):
-    def __init__(self, schema: ParquetSchema, index: int) -> None: ...
-    def equals(self, other: ColumnSchema) -> bool: ...
-    @property
-    def name(self) -> str: ...
-    @property
-    def path(self) -> str: ...
-    @property
-    def max_definition_level(self) -> int: ...
-    @property
-    def max_repetition_level(self) -> int: ...
-    @property
-    def physical_type(self) -> _PhysicalType: ...
-    @property
-    def logical_type(self) -> ParquetLogicalType: ...
-    @property
-    def converted_type(self) -> _ConvertedType | None: ...
-    @property
-    def length(self) -> int | None: ...
-    @property
-    def precision(self) -> int | None: ...
-    @property
-    def scale(self) -> int | None: ...
-
-class ParquetReader(_Weakrefable):
-    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
-    def open(
-        self,
-        source: StrPath | NativeFile | IO,
-        *,
-        use_memory_map: bool = False,
-        read_dictionary: Iterable[int] | Iterable[str] | None = None,
-        metadata: FileMetaData | None = None,
-        buffer_size: int = 0,
-        pre_buffer: bool = False,
-        coerce_int96_timestamp_unit: str | None = None,
-        decryption_properties: FileDecryptionProperties | None = None,
-        thrift_string_size_limit: int | None = None,
-        thrift_container_size_limit: int | None = None,
-        page_checksum_verification: bool = False,
-    ): ...
-    @property
-    def column_paths(self) -> list[str]: ...
-    @property
-    def metadata(self) -> FileMetaData: ...
-    @property
-    def schema_arrow(self) -> Schema: ...
-    @property
-    def num_row_groups(self) -> int: ...
-    def set_use_threads(self, use_threads: bool) -> None: ...
-    def set_batch_size(self, batch_size: int) -> None: ...
-    def iter_batches(
-        self,
-        batch_size: int,
-        row_groups: list[int],
-        column_indices: list[int] | None = None,
-        use_threads: bool = True,
-    ) -> Iterator[RecordBatch]: ...
-    def read_row_group(
-        self, i: int, column_indices: list[int] | None = None, use_threads: bool = True
-    ) -> Table: ...
-    def read_row_groups(
-        self,
-        row_groups: list[int],
-        column_indices: list[int] | None = None,
-        use_threads: bool = True,
-    ) -> Table: ...
-    def read_all(
-        self, column_indices: list[int] | None = None, use_threads: bool = True
-    ) -> Table: ...
-    def scan_contents(self, column_indices: list[int] | None = None, batch_size: int = 65536): ...
-    def column_name_idx(self, column_name: str) -> int: ...
-    def read_column(self, column_index: int) -> ChunkedArray: ...
-    def close(self) -> None: ...
-    @property
-    def closed(self) -> bool: ...
-
-class ParquetWriter(_Weakrefable):
-    def __init__(
-        self,
-        where: StrPath | NativeFile | IO,
-        schema: Schema,
-        use_dictionary: bool | list[str] | None = None,
-        compression: _Compression | dict[str, _Compression] | None = None,
-        version: str | None = None,
-        write_statistics: bool | list[str] | None = None,
-        memory_pool: MemoryPool | None = None,
-        use_deprecated_int96_timestamps: bool = False,
-        coerce_timestamps: Literal["ms", "us"] | None = None,
-        data_page_size: int | None = None,
-        allow_truncated_timestamps: bool = False,
-        compression_level: int | dict[str, int] | None = None,
-        use_byte_stream_split: bool | list[str] = False,
-        column_encoding: _Encoding | dict[str, _Encoding] | None = None,
-        writer_engine_version: str | None = None,
-        data_page_version: str | None = None,
-        use_compliant_nested_type: bool = True,
-        encryption_properties: FileDecryptionProperties | None = None,
-        write_batch_size: int | None = None,
-        dictionary_pagesize_limit: int | None = None,
-        store_schema: bool = True,
-        write_page_index: bool = False,
-        write_page_checksum: bool = False,
-        sorting_columns: tuple[SortingColumn, ...] | None = None,
-        store_decimal_as_integer: bool = False,
-    ): ...
-    def close(self) -> None: ...
-    def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
-    def add_key_value_metadata(self, key_value_metadata: KeyValueMetadata) -> None: ...
-    @property
-    def metadata(self) -> FileMetaData: ...
-    @property
-    def use_dictionary(self) -> bool | list[str] | None: ...
-    @property
-    def use_deprecated_int96_timestamps(self) -> bool: ...
-    @property
-    def use_byte_stream_split(self) -> bool | list[str]: ...
-    @property
-    def column_encoding(self) -> _Encoding | dict[str, _Encoding] | None: ...
-    @property
-    def coerce_timestamps(self) -> Literal["ms", "us"] | None: ...
-    @property
-    def allow_truncated_timestamps(self) -> bool: ...
-    @property
-    def compression(self) -> _Compression | dict[str, _Compression] | None: ...
-    @property
-    def compression_level(self) -> int | dict[str, int] | None: ...
-    @property
-    def data_page_version(self) -> str | None: ...
-    @property
-    def use_compliant_nested_type(self) -> bool: ...
-    @property
-    def version(self) -> str | None: ...
-    @property
-    def write_statistics(self) -> bool | list[str] | None: ...
-    @property
-    def writer_engine_version(self) -> str: ...
-    @property
-    def row_group_size(self) -> int: ...
-    @property
-    def data_page_size(self) -> int: ...
-    @property
-    def encryption_properties(self) -> FileDecryptionProperties: ...
-    @property
-    def write_batch_size(self) -> int: ...
-    @property
-    def dictionary_pagesize_limit(self) -> int: ...
-    @property
-    def store_schema(self) -> bool: ...
-    @property
-    def store_decimal_as_integer(self) -> bool: ...
-
-class FileEncryptionProperties: ...
-class FileDecryptionProperties: ...
diff --git a/python/pyarrow/_s3fs.pyi b/python/pyarrow/_s3fs.pyi
deleted file mode 100644
index e2f5f147096..00000000000
--- a/python/pyarrow/_s3fs.pyi
+++ /dev/null
@@ -1,91 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import enum
-
-from typing import Literal, NotRequired, Required, TypedDict
-
-from ._fs import FileSystem
-from .lib import KeyValueMetadata
-
-class _ProxyOptions(TypedDict):
-    schema: Required[Literal["http", "https"]]
-    host: Required[str]
-    port: Required[int]
-    username: NotRequired[str]
-    password: NotRequired[str]
-
-class S3LogLevel(enum.IntEnum):
-    Off = enum.auto()
-    Fatal = enum.auto()
-    Error = enum.auto()
-    Warn = enum.auto()
-    Info = enum.auto()
-    Debug = enum.auto()
-    Trace = enum.auto()
-
-Off = S3LogLevel.Off
-Fatal = S3LogLevel.Fatal
-Error = S3LogLevel.Error
-Warn = S3LogLevel.Warn
-Info = S3LogLevel.Info
-Debug = S3LogLevel.Debug
-Trace = S3LogLevel.Trace
-
-def initialize_s3(
-    log_level: S3LogLevel = S3LogLevel.Fatal, num_event_loop_threads: int = 1
-) -> None: ...
-def ensure_s3_initialized() -> None: ...
-def finalize_s3() -> None: ...
-def ensure_s3_finalized() -> None: ...
-def resolve_s3_region(bucket: str) -> str: ...
-
-class S3RetryStrategy:
-    max_attempts: int
-    def __init__(self, max_attempts=3) -> None: ...
-
-class AwsStandardS3RetryStrategy(S3RetryStrategy): ...
-class AwsDefaultS3RetryStrategy(S3RetryStrategy): ...
-
-class S3FileSystem(FileSystem):
-    def __init__(
-        self,
-        *,
-        access_key: str | None = None,
-        secret_key: str | None = None,
-        session_token: str | None = None,
-        anonymous: bool = False,
-        region: str | None = None,
-        request_timeout: float | None = None,
-        connect_timeout: float | None = None,
-        scheme: Literal["http", "https"] = "https",
-        endpoint_override: str | None = None,
-        background_writes: bool = True,
-        default_metadata: dict | KeyValueMetadata | None = None,
-        role_arn: str | None = None,
-        session_name: str | None = None,
-        external_id: str | None = None,
-        load_frequency: int = 900,
-        proxy_options: _ProxyOptions | str | None = None,
-        allow_bucket_creation: bool = False,
-        allow_bucket_deletion: bool = False,
-        check_directory_existence_before_creation: bool = False,
-        retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3),
-        force_virtual_addressing: bool = False,
-    ): ...
-    @property
-    def region(self) -> str: ...
diff --git a/python/pyarrow/_substrait.pyi b/python/pyarrow/_substrait.pyi
deleted file mode 100644
index ee78e9720fe..00000000000
--- a/python/pyarrow/_substrait.pyi
+++ /dev/null
@@ -1,56 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Callable
-
-from ._compute import Expression
-from .lib import Buffer, RecordBatchReader, Schema, Table, _Weakrefable
-
-def run_query(
-    plan: Buffer | int,
-    *,
-    table_provider: Callable[[list[str], Schema], Table] | None = None,
-    use_threads: bool = True,
-) -> RecordBatchReader: ...
-def _parse_json_plan(plan: bytes) -> Buffer: ...
-
-class SubstraitSchema:
-    schema: Schema
-    expression: Expression
-    def __init__(self, schema: Schema, expression: Expression) -> None: ...
-    def to_pysubstrait(self) -> Any: ...
-
-def serialize_schema(schema: Schema) -> SubstraitSchema: ...
-def deserialize_schema(buf: Buffer | bytes) -> Schema: ...
-def serialize_expressions(
-    exprs: list[Expression],
-    names: list[str],
-    schema: Schema,
-    *,
-    allow_arrow_extensions: bool = False,
-) -> Buffer: ...
-
-class BoundExpressions(_Weakrefable):
-    @property
-    def schema(self) -> Schema: ...
-    @property
-    def expressions(self) -> dict[str, Expression]: ...
-    @classmethod
-    def from_substrait(cls, message: Buffer | bytes) -> BoundExpressions: ...
-
-def deserialize_expressions(buf: Buffer | bytes) -> BoundExpressions: ...
-def get_supported_functions() -> list[str]: ...
diff --git a/python/pyarrow/acero.pyi b/python/pyarrow/acero.pyi
deleted file mode 100644
index 2abb608b32c..00000000000
--- a/python/pyarrow/acero.pyi
+++ /dev/null
@@ -1,102 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-if sys.version_info >= (3, 10):
-    from typing import TypeAlias
-else:
-    from typing_extensions import TypeAlias
-from typing import Literal
-
-from . import lib
-from .compute import Expression, FunctionOptions
-
-_StrOrExpr: TypeAlias = str | Expression
-
-class Declaration(lib._Weakrefable):
-    def __init__(
-        self,
-        factory_name: str,
-        options: ExecNodeOptions,
-        inputs: list[Declaration] | None = None,
-    ) -> None: ...
-    @classmethod
-    def from_sequence(cls, decls: list[Declaration]) -> Self: ...
-    def to_reader(self, use_threads: bool = True) -> lib.RecordBatchReader: ...
-    def to_table(self, use_threads: bool = True) -> lib.Table: ...
-
-class ExecNodeOptions(lib._Weakrefable): ...
-
-class TableSourceNodeOptions(ExecNodeOptions):
-    def __init__(self, table: lib.Table) -> None: ...
-
-class FilterNodeOptions(ExecNodeOptions):
-    def __init__(self, filter_expression: Expression) -> None: ...
-
-class ProjectNodeOptions(ExecNodeOptions):
-    def __init__(self, expressions: list[Expression], names: list[str] | None = None) -> None: ...
-
-class AggregateNodeOptions(ExecNodeOptions):
-    def __init__(
-        self,
-        aggregates: list[tuple[list[str], str, FunctionOptions, str]],
-        keys: list[_StrOrExpr] | None = None,
-    ) -> None: ...
-
-class OrderByNodeOptions(ExecNodeOptions):
-    def __init__(
-        self,
-        sort_keys: tuple[tuple[str, Literal["ascending", "descending"]], ...] = (),
-        *,
-        null_placement: Literal["at_start", "at_end"] = "at_end",
-    ) -> None: ...
-
-class HashJoinNodeOptions(ExecNodeOptions):
-    def __init__(
-        self,
-        join_type: Literal[
-            "left semi",
-            "right semi",
-            "left anti",
-            "right anti",
-            "inner",
-            "left outer",
-            "right outer",
-            "full outer",
-        ],
-        left_keys: _StrOrExpr | list[_StrOrExpr],
-        right_keys: _StrOrExpr | list[_StrOrExpr],
-        left_output: list[_StrOrExpr] | None = None,
-        right_output: list[_StrOrExpr] | None = None,
-        output_suffix_for_left: str = "",
-        output_suffix_for_right: str = "",
-    ) -> None: ...
-
-class AsofJoinNodeOptions(ExecNodeOptions):
-    def __init__(
-        self,
-        left_on: _StrOrExpr,
-        left_by: _StrOrExpr | list[_StrOrExpr],
-        right_on: _StrOrExpr,
-        right_by: _StrOrExpr | list[_StrOrExpr],
-        tolerance: int,
-    ) -> None: ...
diff --git a/python/pyarrow/benchmark.pyi b/python/pyarrow/benchmark.pyi
deleted file mode 100644
index 3ea8f70bc34..00000000000
--- a/python/pyarrow/benchmark.pyi
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow.lib import benchmark_PandasObjectIsNull
-
-__all__ = ["benchmark_PandasObjectIsNull"]
diff --git a/python/pyarrow/cffi.pyi b/python/pyarrow/cffi.pyi
deleted file mode 100644
index e4f077d7155..00000000000
--- a/python/pyarrow/cffi.pyi
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import cffi
-
-c_source: str
-ffi: cffi.FFI
diff --git a/python/pyarrow/compute.pyi b/python/pyarrow/compute.pyi
deleted file mode 100644
index cbbb9b0efcc..00000000000
--- a/python/pyarrow/compute.pyi
+++ /dev/null
@@ -1,8332 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence, Hashable
-from collections.abc import Callable
-from numpy.typing import NDArray
-
-# Option classes
-from pyarrow._compute import ArraySortOptions as ArraySortOptions
-from pyarrow._compute import AssumeTimezoneOptions as AssumeTimezoneOptions
-from pyarrow._compute import CastOptions as CastOptions
-from pyarrow._compute import CountOptions as CountOptions
-from pyarrow._compute import CumulativeOptions as CumulativeOptions
-from pyarrow._compute import CumulativeSumOptions as CumulativeSumOptions
-from pyarrow._compute import DayOfWeekOptions as DayOfWeekOptions
-from pyarrow._compute import DictionaryEncodeOptions as DictionaryEncodeOptions
-from pyarrow._compute import ElementWiseAggregateOptions as ElementWiseAggregateOptions
-
-# Expressions
-from pyarrow._compute import Expression as Expression
-from pyarrow._compute import ExtractRegexOptions as ExtractRegexOptions
-from pyarrow._compute import ExtractRegexSpanOptions as ExtractRegexSpanOptions
-from pyarrow._compute import FilterOptions as FilterOptions
-from pyarrow._compute import Function as Function
-from pyarrow._compute import FunctionOptions as FunctionOptions
-from pyarrow._compute import FunctionRegistry as FunctionRegistry
-from pyarrow._compute import HashAggregateFunction as HashAggregateFunction
-from pyarrow._compute import HashAggregateKernel as HashAggregateKernel
-from pyarrow._compute import IndexOptions as IndexOptions
-from pyarrow._compute import JoinOptions as JoinOptions
-from pyarrow._compute import Kernel as Kernel
-from pyarrow._compute import ListFlattenOptions as ListFlattenOptions
-from pyarrow._compute import ListSliceOptions as ListSliceOptions
-from pyarrow._compute import MakeStructOptions as MakeStructOptions
-from pyarrow._compute import MapLookupOptions as MapLookupOptions
-from pyarrow._compute import MatchSubstringOptions as MatchSubstringOptions
-from pyarrow._compute import ModeOptions as ModeOptions
-from pyarrow._compute import NullOptions as NullOptions
-from pyarrow._compute import PadOptions as PadOptions
-from pyarrow._compute import PairwiseOptions as PairwiseOptions
-from pyarrow._compute import PartitionNthOptions as PartitionNthOptions
-from pyarrow._compute import PivotWiderOptions as PivotWiderOptions
-from pyarrow._compute import QuantileOptions as QuantileOptions
-from pyarrow._compute import RandomOptions as RandomOptions
-from pyarrow._compute import RankOptions as RankOptions
-from pyarrow._compute import RankQuantileOptions as RankQuantileOptions
-from pyarrow._compute import ReplaceSliceOptions as ReplaceSliceOptions
-from pyarrow._compute import ReplaceSubstringOptions as ReplaceSubstringOptions
-from pyarrow._compute import RoundBinaryOptions as RoundBinaryOptions
-from pyarrow._compute import RoundOptions as RoundOptions
-from pyarrow._compute import RoundTemporalOptions as RoundTemporalOptions
-from pyarrow._compute import RoundToMultipleOptions as RoundToMultipleOptions
-from pyarrow._compute import RunEndEncodeOptions as RunEndEncodeOptions
-from pyarrow._compute import ScalarAggregateFunction as ScalarAggregateFunction
-from pyarrow._compute import ScalarAggregateKernel as ScalarAggregateKernel
-from pyarrow._compute import ScalarAggregateOptions as ScalarAggregateOptions
-from pyarrow._compute import ScalarFunction as ScalarFunction
-from pyarrow._compute import ScalarKernel as ScalarKernel
-from pyarrow._compute import SelectKOptions as SelectKOptions
-from pyarrow._compute import SetLookupOptions as SetLookupOptions
-from pyarrow._compute import SkewOptions as SkewOptions
-from pyarrow._compute import SliceOptions as SliceOptions
-from pyarrow._compute import SortOptions as SortOptions
-from pyarrow._compute import SplitOptions as SplitOptions
-from pyarrow._compute import SplitPatternOptions as SplitPatternOptions
-from pyarrow._compute import StrftimeOptions as StrftimeOptions
-from pyarrow._compute import StrptimeOptions as StrptimeOptions
-from pyarrow._compute import StructFieldOptions as StructFieldOptions
-from pyarrow._compute import TakeOptions as TakeOptions
-from pyarrow._compute import TDigestOptions as TDigestOptions
-from pyarrow._compute import TrimOptions as TrimOptions
-from pyarrow._compute import UdfContext as UdfContext
-from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions
-from pyarrow._compute import ZeroFillOptions as ZeroFillOptions
-from pyarrow._compute import VarianceOptions as VarianceOptions
-from pyarrow._compute import VectorFunction as VectorFunction
-from pyarrow._compute import VectorKernel as VectorKernel
-from pyarrow._compute import WeekOptions as WeekOptions
-from pyarrow._compute import WinsorizeOptions as WinsorizeOptions
-
-# Functions
-from pyarrow._compute import call_function as call_function
-
-# Udf
-from pyarrow._compute import call_tabular_function as call_tabular_function
-from pyarrow._compute import function_registry as function_registry
-from pyarrow._compute import get_function as get_function
-from pyarrow._compute import list_functions as list_functions
-from pyarrow._compute import register_aggregate_function as register_aggregate_function
-from pyarrow._compute import register_scalar_function as register_scalar_function
-from pyarrow._compute import register_tabular_function as register_tabular_function
-from pyarrow._compute import register_vector_function as register_vector_function
-
-from pyarrow._compute import _Order, _Placement
-from pyarrow._stubs_typing import ArrayLike, ScalarLike
-from . import lib
-from _stubs_typing import Indices
-
-_P = ParamSpec("_P")
-_R = TypeVar("_R")
-
-def field(*name_or_index: str | bytes | tuple[str | int, ...] | int) -> Expression:
-    """Reference a column of the dataset.
-
-    Stores only the field's name. Type and other information is known only when
-    the expression is bound to a dataset having an explicit scheme.
-
-    Nested references are allowed by passing multiple names or a tuple of
-    names. For example ``('foo', 'bar')`` references the field named "bar"
-    inside the field named "foo".
-
-    Parameters
-    ----------
-    *name_or_index : string, multiple strings, tuple or int
-        The name or index of the (possibly nested) field the expression
-        references to.
-
-    Returns
-    -------
-    field_expr : Expression
-        Reference to the given field
-
-    Examples
-    --------
-    >>> import pyarrow.compute as pc
-    >>> pc.field("a")
-    <pyarrow.compute.Expression a>
-    >>> pc.field(1)
-    <pyarrow.compute.Expression FieldPath(1)>
-    >>> pc.field(("a", "b"))
-    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
-    >>> pc.field("a", "b")
-    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
-    """
-
-def scalar(value: bool | int | float | NumericScalar | None | str | dict[bool | float | str, bool | float | str]) -> Expression:
-    """Expression representing a scalar value.
-
-    Creates an Expression object representing a scalar value that can be used
-    in compute expressions and predicates.
-
-    Parameters
-    ----------
-    value : bool, int, float or string
-        Python value of the scalar. This function accepts any value that can be
-        converted to a ``pyarrow.Scalar`` using ``pa.scalar()``.
-
-    Notes
-    -----
-    This function differs from ``pyarrow.scalar()`` in the following way:
-
-    * ``pyarrow.scalar()`` creates a ``pyarrow.Scalar`` object that represents
-      a single value in Arrow's memory model.
-    * ``pyarrow.compute.scalar()`` creates an ``Expression`` object representing
-      a scalar value that can be used in compute expressions, predicates, and
-      dataset filtering operations.
-
-    Returns
-    -------
-    scalar_expr : Expression
-        An Expression representing the scalar value
-    """
-
-def _clone_signature(f: Callable[_P, _R]) -> Callable[_P, _R]: ...
-
-# ============= compute functions =============
-_DataTypeT = TypeVar("_DataTypeT", bound=lib.DataType)
-_Scalar_CoT = TypeVar("_Scalar_CoT", bound=lib.Scalar, covariant=True)
-_ScalarT = TypeVar("_ScalarT", bound=lib.Scalar)
-_ArrayT = TypeVar("_ArrayT", bound=lib.Array | lib.ChunkedArray)
-_ScalarOrArrayT = TypeVar("_ScalarOrArrayT", bound=lib.Array | lib.Scalar | lib.ChunkedArray)
-ArrayOrChunkedArray: TypeAlias = lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]
-ScalarOrArray: TypeAlias = ArrayOrChunkedArray[_Scalar_CoT] | _Scalar_CoT
-_ZonedTimestampArrayT: TypeAlias = ArrayOrChunkedArray[lib.Scalar[lib.TimestampType[Any, Any]]]
-_ZonelessTimestampArrayT: TypeAlias = ArrayOrChunkedArray[lib.Scalar[lib.TimestampType[Any, None]]]
-_ZonedTimestampScalarT: TypeAlias = lib.Scalar[lib.TimestampType[Any, Any]]
-_ZonelessTimestampScalarT: TypeAlias = lib.Scalar[lib.TimestampType[Any, None]]
-
-SignedIntegerScalar: TypeAlias = (
-    lib.Scalar[lib.Int8Type]
-    | lib.Scalar[lib.Int16Type]
-    | lib.Scalar[lib.Int32Type]
-    | lib.Scalar[lib.Int64Type]
-)
-UnsignedIntegerScalar: TypeAlias = (
-    lib.Scalar[lib.UInt8Type]
-    | lib.Scalar[lib.UInt16Type]
-    | lib.Scalar[lib.Uint32Type]
-    | lib.Scalar[lib.UInt64Type]
-)
-IntegerScalar: TypeAlias = SignedIntegerScalar | UnsignedIntegerScalar
-FloatScalar: TypeAlias = (
-    lib.Scalar[lib.Float16Type] | lib.Scalar[lib.Float32Type] | lib.Scalar[lib.Float64Type]
-)
-DecimalScalar: TypeAlias = (
-    lib.Scalar[lib.Decimal32Type]
-    | lib.Scalar[lib.Decimal64Type]
-    | lib.Scalar[lib.Decimal128Type]
-    | lib.Scalar[lib.Decimal256Type]
-)
-NonFloatNumericScalar: TypeAlias = IntegerScalar | DecimalScalar
-NumericScalar: TypeAlias = IntegerScalar | FloatScalar | DecimalScalar
-BinaryScalar: TypeAlias = (
-    lib.Scalar[lib.BinaryType]
-    | lib.Scalar[lib.LargeBinaryType]
-    | lib.Scalar[lib.FixedSizeBinaryType]
-)
-StringScalar: TypeAlias = lib.Scalar[lib.StringType] | lib.Scalar[lib.LargeStringType]
-StringOrBinaryScalar: TypeAlias = StringScalar | BinaryScalar
-_ListScalar: TypeAlias = lib.ListViewScalar[_DataTypeT] | lib.FixedSizeListScalar[_DataTypeT, Any]
-_LargeListScalar: TypeAlias = lib.LargeListScalar[_DataTypeT] | lib.LargeListViewScalar[_DataTypeT]
-ListScalar: TypeAlias = (
-    lib.ListScalar[_DataTypeT] | _ListScalar[_DataTypeT] | _LargeListScalar[_DataTypeT]
-)
-TemporalScalar: TypeAlias = (
-    lib.Date32Scalar
-    | lib.Date64Scalar
-    | lib.Time32Scalar[Any]
-    | lib.Time64Scalar[Any]
-    | lib.TimestampScalar[Any]
-    | lib.TimestampScalar[Any, None]
-    | lib.DurationScalar[Any]
-    | lib.MonthDayNanoIntervalScalar
-)
-NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
-NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
-
-_NumericOrTemporalScalarT = TypeVar("_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar)
-_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
-NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT]
-_NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray)
-_NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar)
-NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar]
-_NumericOrDurationArrayT = TypeVar("_NumericOrDurationArrayT", bound=NumericOrDurationArray)
-NumericOrTemporalArray: TypeAlias = ArrayOrChunkedArray[_NumericOrTemporalScalarT]
-_NumericOrTemporalArrayT = TypeVar("_NumericOrTemporalArrayT", bound=NumericOrTemporalArray)
-BooleanArray: TypeAlias = ArrayOrChunkedArray[lib.BooleanScalar]
-_BooleanArrayT = TypeVar("_BooleanArrayT", bound=BooleanArray)
-IntegerArray: TypeAlias = ArrayOrChunkedArray[IntegerScalar]
-_FloatScalarT = TypeVar("_FloatScalarT", bound=FloatScalar)
-FloatArray: TypeAlias = ArrayOrChunkedArray[FloatScalar]
-_FloatArrayT = TypeVar("_FloatArrayT", bound=FloatArray)
-_StringScalarT = TypeVar("_StringScalarT", bound=StringScalar)
-StringArray: TypeAlias = ArrayOrChunkedArray[StringScalar]
-_StringArrayT = TypeVar("_StringArrayT", bound=StringArray)
-_BinaryScalarT = TypeVar("_BinaryScalarT", bound=BinaryScalar)
-BinaryArray: TypeAlias = ArrayOrChunkedArray[BinaryScalar]
-_BinaryArrayT = TypeVar("_BinaryArrayT", bound=BinaryArray)
-_StringOrBinaryScalarT = TypeVar("_StringOrBinaryScalarT", bound=StringOrBinaryScalar)
-StringOrBinaryArray: TypeAlias = StringArray | BinaryArray
-_StringOrBinaryArrayT = TypeVar("_StringOrBinaryArrayT", bound=StringOrBinaryArray)
-_TemporalScalarT = TypeVar("_TemporalScalarT", bound=TemporalScalar)
-TemporalArray: TypeAlias = ArrayOrChunkedArray[TemporalScalar]
-_TemporalArrayT = TypeVar("_TemporalArrayT", bound=TemporalArray)
-_ListArray: TypeAlias = ArrayOrChunkedArray[_ListScalar[_DataTypeT]]
-_LargeListArray: TypeAlias = ArrayOrChunkedArray[_LargeListScalar[_DataTypeT]]
-ListArray: TypeAlias = ArrayOrChunkedArray[ListScalar[_DataTypeT]]
-_DecimalScalarT = TypeVar("_DecimalScalarT", bound=DecimalScalar)
-DecimalArray: TypeAlias = lib.Array[_DecimalScalarT] | lib.ChunkedArray[_DecimalScalarT]
-_DecimalArrayT = TypeVar("_DecimalArrayT", bound=DecimalArray)
-# =============================== 1. Aggregation ===============================
-
-# ========================= 1.1 functions =========================
-
-def all(
-    array: lib.BooleanScalar | BooleanArray,
-    /,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 1,
-    options: ScalarAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar:
-    """
-    Test whether all elements in a boolean array evaluate to true.
-
-    Null values are ignored by default.
-    If the `skip_nulls` option is set to false, then Kleene logic is used.
-    See "kleene_and" for more details on Kleene logic.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-any = _clone_signature(all)
-"""
-Test whether any element in a boolean array evaluates to true.
-
-Null values are ignored by default.
-If the `skip_nulls` option is set to false, then Kleene logic is used.
-See "kleene_or" for more details on Kleene logic.
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-def approximate_median(
-    array: NumericScalar | NumericArray,
-    /,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 1,
-    options: ScalarAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleScalar:
-    """
-    Approximate median of a numeric array with T-Digest algorithm.
-
-    Nulls and NaNs are ignored.
-    A null scalar is returned if there is no valid data point.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def count(
-    array: lib.Array | lib.ChunkedArray,
-    /,
-    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
-    *,
-    options: CountOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar:
-    """
-    Count the number of null / non-null values.
-
-    By default, only non-null values are counted.
-    This can be changed through CountOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    mode : str, default "only_valid"
-        Which values to count in the input.
-        Accepted values are "only_valid", "only_null", "all".
-    options : pyarrow.compute.CountOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def count_distinct(
-    array: lib.Array | lib.ChunkedArray,
-    /,
-    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
-    *,
-    options: CountOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar:
-    """
-    Count the number of unique values.
-
-    By default, only non-null values are counted.
-    This can be changed through CountOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    mode : str, default "only_valid"
-        Which values to count in the input.
-        Accepted values are "only_valid", "only_null", "all".
-    options : pyarrow.compute.CountOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def first(
-    array: lib.Array[_ScalarT] | lib.ChunkedArray[_ScalarT],
-    /,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 1,
-    options: ScalarAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ScalarT:
-    """
-    Compute the first value in each group.
-
-    Null values are ignored by default.
-    If skip_nulls = false, then this will return the first and last values
-    regardless if it is null
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def first_last(
-    array: lib.Array[Any] | lib.ChunkedArray[Any] | Sequence[Any],
-    /,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 1,
-    options: ScalarAggregateOptions | dict[str, Any] | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructScalar:
-    """
-    Compute the first and last values of an array.
-
-    Null values are ignored by default.
-    If skip_nulls = false, then this will return the first and last values
-    regardless if it is null
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def index(
-    data: lib.Array[Any] | lib.ChunkedArray[Any],
-    value,
-    start: int | None = None,
-    end: int | None = None,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar:
-    """
-    Find the index of the first occurrence of a given value.
-
-    Parameters
-    ----------
-    data : Array-like
-    value : Scalar-like object
-        The value to search for.
-    start : int, optional
-    end : int, optional
-    memory_pool : MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Returns
-    -------
-    index : int
-        the index, or -1 if not found
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"])
-    >>> pc.index(arr, "ipsum")
-    <pyarrow.Int64Scalar: 1>
-    >>> pc.index(arr, "ipsum", start=2)
-    <pyarrow.Int64Scalar: 5>
-    >>> pc.index(arr, "amet")
-    <pyarrow.Int64Scalar: -1>
-    """
-
-last = _clone_signature(first)
-"""
-Compute the first and last values of an array.
-
-Null values are ignored by default.
-If skip_nulls = false, then this will return the first and last values
-regardless if it is null
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-In [15]: print(pc.last.__doc__)
-Compute the first value in each group.
-
-Null values are ignored by default.
-If skip_nulls = false, then this will return the first and last values
-regardless if it is null
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-max = _clone_signature(first)
-"""
-Compute the minimum or maximum values of a numeric array.
-
-Null values are ignored by default.
-This can be changed through ScalarAggregateOptions.
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-min = _clone_signature(first)
-"""
-Compute the minimum or maximum values of a numeric array.
-
-Null values are ignored by default.
-This can be changed through ScalarAggregateOptions.
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-min_max = _clone_signature(first_last)
-"""
-Compute the minimum and maximum values of a numeric array.
-
-Null values are ignored by default.
-This can be changed through ScalarAggregateOptions.
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def mean(
-    array: FloatScalar | FloatArray,
-    /,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 1,
-    options: ScalarAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleScalar: ...
-@overload
-def mean(
-    array: lib.NumericArray[lib.Decimal128Scalar]
-    | lib.ChunkedArray[lib.Decimal128Scalar]
-    | lib.Decimal128Scalar,
-    /,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 1,
-    options: ScalarAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Decimal128Scalar: ...
-@overload
-def mean(
-    array: lib.NumericArray[lib.Decimal256Scalar]
-    | lib.ChunkedArray[lib.Decimal256Scalar]
-    | lib.Decimal256Scalar,
-    /,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 1,
-    options: ScalarAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Decimal256Scalar: ...
-def mean(*args, **kwargs):
-    """
-    Compute the mean of a numeric array.
-
-    Null values are ignored by default. Minimum count of non-null
-    values can be set and null is returned if too few are present.
-    This can be changed through ScalarAggregateOptions.
-    The result is a double for integer and floating point arguments,
-    and a decimal with the same bit-width/precision/scale for decimal arguments.
-    For integers and floats, NaN is returned if min_count = 0 and
-    there are no values. For decimals, null is returned instead.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def mode(
-    array: NumericScalar | NumericArray,
-    /,
-    n: int = 1,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 0,
-    options: ModeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructArray:
-    """
-    Compute the modal (most common) values of a numeric array.
-
-    Compute the n most common values and their respective occurrence counts.
-    The output has type `struct<mode: T, count: int64>`, where T is the
-    input type.
-    The results are ordered by descending `count` first, and ascending `mode`
-    when breaking ties.
-    Nulls are ignored.  If there are no non-null values in the array,
-    an empty array is returned.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    n : int, default 1
-        Number of distinct most-common values to return.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ModeOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
-    >>> modes = pc.mode(arr, 2)
-    >>> modes[0]
-    <pyarrow.StructScalar: [('mode', 2), ('count', 5)]>
-    >>> modes[1]
-    <pyarrow.StructScalar: [('mode', 1), ('count', 2)]>
-    """
-
-def product(
-    array: _ScalarT | lib.NumericArray[_ScalarT],
-    /,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 1,
-    options: ScalarAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ScalarT:
-    """
-    Compute the product of values in a numeric array.
-
-    Null values are ignored by default. Minimum count of non-null
-    values can be set and null is returned if too few are present.
-    This can be changed through ScalarAggregateOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def quantile(
-    array: NumericScalar | NumericArray,
-    /,
-    q: float | list[float] = 0.5,
-    *,
-    interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
-    skip_nulls: bool = True,
-    min_count: int = 0,
-    options: QuantileOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleArray:
-    """
-    Compute an array of quantiles of a numeric array or chunked array.
-
-    By default, 0.5 quantile (median) is returned.
-    If quantile lies between two data points, an interpolated value is
-    returned based on selected interpolation method.
-    Nulls and NaNs are ignored.
-    An array of nulls is returned if there is no valid data point.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    q : double or sequence of double, default 0.5
-        Probability levels of the quantiles to compute. All values must be in
-        [0, 1].
-    interpolation : str, default "linear"
-        How to break ties between competing data points for a given quantile.
-        Accepted values are:
-
-        - "linear": compute an interpolation
-        - "lower": always use the smallest of the two data points
-        - "higher": always use the largest of the two data points
-        - "nearest": select the data point that is closest to the quantile
-        - "midpoint": compute the (unweighted) mean of the two data points
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.QuantileOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def stddev(
-    array: NumericScalar | NumericArray,
-    /,
-    *,
-    ddof: float = 0,
-    skip_nulls: bool = True,
-    min_count: int = 0,
-    options: VarianceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleScalar:
-    """
-    Calculate the standard deviation of a numeric array.
-
-    The number of degrees of freedom can be controlled using VarianceOptions.
-    By default (`ddof` = 0), the population standard deviation is calculated.
-    Nulls are ignored.  If there are not enough non-null values in the array
-    to satisfy `ddof`, null is returned.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    ddof : int, default 0
-        Number of degrees of freedom.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.VarianceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def skew(
-    array: NumericArray | Sequence[int | None],
-    /,
-    *,
-    skip_nulls: bool = True,
-    biased: bool = True,
-    min_count: int = 0,
-    options: SkewOptions | None = None,
-) -> NumericScalar:
-    """
-    Calculate the skewness of a numeric array
-    Nulls are ignored by default.  If there are not enough non-null values
-    in the array to satisfy `min_count`, null is returned.
-    The behavior of nulls and the `min_count` parameter can be changed.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    biased : bool, default True
-        Whether the calculated value is biased.
-        If False, the value computed includes a correction factor to reduce bias.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : SkewOptions, optional
-        Options for the `skew` and `kurtosis` functions.
-    """
-
-kurtosis = _clone_signature(skew)
-"""
-Calculate the kurtosis of a numeric array
-Nulls are ignored by default. If there are not enough non-null values
-in the array to satisfy `min_count`, null is returned.
-The behavior of nulls and the `min_count` parameter can be changed.
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-biased : bool, default True
-    Whether the calculated value is biased.
-    If False, the value computed includes a correction factor to reduce bias.
-min_count : int, default 0
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : SkewOptions, optional
-    Options for the `skew` and `kurtosis` functions.
-"""
-
-def sum(
-    array: _NumericScalarT | NumericArray[_NumericScalarT] | _DecimalArrayT,
-    /,
-    *,
-    skip_nulls: bool = True,
-    min_count: int = 1,
-    options: ScalarAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT:
-    """
-    Compute the sum of a numeric array.
-
-    Null values are ignored by default. Minimum count of non-null
-    values can be set and null is returned if too few are present.
-    This can be changed through ScalarAggregateOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def tdigest(
-    array: NumericScalar | NumericArray,
-    /,
-    q: float | list[float] = 0.5,
-    *,
-    delta: int = 100,
-    buffer_size: int = 500,
-    skip_nulls: bool = True,
-    min_count: int = 0,
-    options: TDigestOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleArray:
-    """
-    Approximate quantiles of a numeric array with T-Digest algorithm.
-
-    By default, 0.5 quantile (median) is returned.
-    Nulls and NaNs are ignored.
-    An array of nulls is returned if there is no valid data point.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    q : double or sequence of double, default 0.5
-        Probability levels of the quantiles to approximate. All values must be
-        in [0, 1].
-    delta : int, default 100
-        Compression parameter for the T-digest algorithm.
-    buffer_size : int, default 500
-        Buffer size for the T-digest algorithm.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.TDigestOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    """
-
-def variance(
-    array: NumericScalar | NumericArray | list[int] | list[int | None],
-    /,
-    *,
-    ddof: int = 0,
-    skip_nulls: bool = True,
-    min_count: int = 0,
-    options: VarianceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleScalar:
-    """
-    Calculate the variance of a numeric array.
-
-    The number of degrees of freedom can be controlled using VarianceOptions.
-    By default (`ddof` = 0), the population variance is calculated.
-    Nulls are ignored.  If there are not enough non-null values in the array
-    to satisfy `ddof`, null is returned.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    ddof : int, default 0
-        Number of degrees of freedom.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.VarianceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def top_k_unstable(
-    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
-    k: int,
-    sort_keys: list | None = None,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array:
-    """
-    Select the indices of the top-k ordered elements from array- or table-like
-    data.
-
-    This is a specialization for :func:`select_k_unstable`. Output is not
-    guaranteed to be stable.
-
-    Parameters
-    ----------
-    values : Array, ChunkedArray, RecordBatch, or Table
-        Data to sort and get top indices from.
-    k : int
-        The number of `k` elements to keep.
-    sort_keys : List-like
-        Column key names to order by when input is table-like data.
-    memory_pool : MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Returns
-    -------
-    result : Array
-        Indices of the top-k ordered elements
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
-    >>> pc.top_k_unstable(arr, k=3)
-    <pyarrow.lib.UInt64Array object at ...>
-    [
-      5,
-      4,
-      2
-    ]
-    """
-
-def bottom_k_unstable(
-    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
-    k: int,
-    sort_keys: list | None = None,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array:
-    """
-    Select the indices of the bottom-k ordered elements from
-    array- or table-like data.
-
-    This is a specialization for :func:`select_k_unstable`. Output is not
-    guaranteed to be stable.
-
-    Parameters
-    ----------
-    values : Array, ChunkedArray, RecordBatch, or Table
-        Data to sort and get bottom indices from.
-    k : int
-        The number of `k` elements to keep.
-    sort_keys : List-like
-        Column key names to order by when input is table-like data.
-    memory_pool : MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Returns
-    -------
-    result : Array of indices
-        Indices of the bottom-k ordered elements
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
-    >>> pc.bottom_k_unstable(arr, k=3)
-    <pyarrow.lib.UInt64Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def winsorize(
-    values: lib.Array | lib.ChunkedArray,
-	lower_limit: float | None = None,
-	upper_limit: float | None = None,
-	/,
-	*,
-    options: WinsorizeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array:
-    """
-    Apply a winsorization transform to the input array so as to reduce the influence of potential outliers.
-    NaNs and nulls in the input are ignored for the purpose of computing the lower and upper quantiles.
-    The quantile limits can be changed in WinsorizeOptions.
-
-    Parameters
-    ----------
-    values : Array, ChunkedArray, RecordBatch, or Table
-        Data to sort and get bottom indices from.
-
-    lower_limit : float, between 0 and 1
-        The quantile below which all values are replaced with the quantile's value.
-		For example, if lower_limit = 0.05, then all values in the lower 5% percentile will be replaced with the 5% percentile value.
-
-    upper_limit : float, between 0 and 1
-		The quantile above which all values are replaced with the quantile’s value.
-		For example, if upper_limit = 0.95, then all values in the upper 95% percentile will be replaced with the 95% percentile value.
-
-    options : pyarrow.compute.WinsorizeOptions, optional
-        Alternative way of passing options.
-
-    memory_pool : MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Returns
-    -------
-    result : Array of indices
-        Winsorized array
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>> arr = pa.array([10, 4, 9, 8, 5, 3, 7, 2, 1, 6])
-    >>> pc.winsorize(arr, 0.1, 0.8)
-    <pyarrow.lib.UInt64Array object at ...>
-    [
-     8,
-     4,
-     8,
-     8,
-     5,
-     3,
-     7,
-     2,
-     2,
-     6
-    ]
-    """
-
-def pivot_wider(
-    pivot_keys: lib.Array | lib.ChunkedArray | list[Any],
-	pivot_values: lib.Array | lib.ChunkedArray | list[Any],
-	/,
-	key_names: list[Any] | None = None,
-	*,
-	unexpected_key_behavior: str | None = None,
-	options: PivotWiderOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructScalar:
-    """
-	Pivot values according to a pivot key column.
-
-	Output is a struct with as many fields as PivotWiderOptions.key_names.
-	All output struct fields have the same type as pivot_values. Each pivot
-	key decides in which output field the corresponding pivot value is emitted.
-	If a pivot key doesn’t appear, null is emitted. If more than one non-null
-	value is encountered for a given pivot key, Invalid is raised. The pivot
-	key column can be string, binary or integer. The key_names will be cast
-	to the pivot key column type for matching. Behavior of unexpected pivot
-	keys is controlled by unexpected_key_behavior.
-
-    Parameters
-    ----------
-    pivot_keys : sequence
-        Array, ChunkedArray, list
-    pivot_values : sequence
-		Array, ChunkedArray, list
-    key_names : sequence of str
-        The pivot key names expected in the pivot key column.
-        For each entry in `key_names`, a column with the same name is emitted
-        in the struct output.
-    unexpected_key_behavior : str, default "ignore"
-        The behavior when pivot keys not in `key_names` are encountered.
-        Accepted values are "ignore", "raise".
-        If "ignore", unexpected keys are silently ignored.
-        If "raise", unexpected keys raise a KeyError.
-    options : pyarrow.compute.PivotWiderOptions, optional
-        Alternative way of passing options.
-    memory_pool : MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Returns
-    -------
-    result : Array of indices
-        Pivoted struct array
-    """
-
-# ========================= 2. Element-wise (“scalar”) functions =========================
-
-# ========================= 2.1 Arithmetic =========================
-@overload
-def abs(
-    x: _NumericOrDurationT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericOrDurationT: ...
-@overload
-def abs(
-    x: _NumericOrDurationArrayT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericOrDurationArrayT: ...
-@overload
-def abs(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def abs(*args, **kwargs):
-    """
-    Calculate the absolute value of the argument element-wise.
-
-    Results will wrap around on integer overflow.
-    Use function "abs_checked" if you want overflow
-    to return an error.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-abs_checked = _clone_signature(abs)
-"""
-Calculate the absolute value of the argument element-wise.
-
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "abs".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def add(
-    x: _NumericOrTemporalScalarT,
-    y: _NumericOrTemporalScalarT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalScalarT: ...
-@overload
-def add(
-    x: _NumericOrTemporalArrayT | NDArray[Any] | list[lib._AsPyType | None],
-    y: _NumericOrTemporalArrayT | NDArray[Any] | list[lib._AsPyType | None],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalArrayT: ...
-@overload
-def add(
-    x: Expression, y: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-@overload
-def add(
-    x: NumericOrTemporalScalar | lib._AsPyType,
-    y: _NumericOrTemporalArrayT | NDArray[Any] | list[lib._AsPyType | None],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalArrayT: ...
-@overload
-def add(
-    x: _NumericOrTemporalArrayT | NDArray[Any] | list[lib._AsPyType | None],
-    y: NumericOrTemporalScalar | lib._AsPyType,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalArrayT: ...
-@overload
-def add(
-    x: NumericOrTemporalScalar | lib._AsPyType, y: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-@overload
-def add(
-    x: Expression, y: NumericOrTemporalScalar | lib._AsPyType, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def add(*args, **kwargs):
-    """
-    Add the arguments element-wise.
-
-    Results will wrap around on integer overflow.
-    Use function "add_checked" if you want overflow
-    to return an error.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    y : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-add_checked = _clone_signature(add)
-"""
-Add the arguments element-wise.
-
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "add".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-
-"""
-
-@overload
-def divide(
-    dividend: _NumericOrTemporalScalarT,
-    divisor: _NumericOrTemporalScalarT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalScalarT: ...
-@overload
-def divide(
-    dividend: _NumericOrTemporalArrayT,
-    divisor: _NumericOrTemporalArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalArrayT: ...
-@overload
-def divide(
-    dividend: Expression,
-    divisor: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def divide(
-    dividend: NumericOrTemporalScalar,
-    divisor: _NumericOrTemporalArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalArrayT: ...
-@overload
-def divide(
-    dividend: _NumericOrTemporalArrayT,
-    divisor: NumericOrTemporalScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalArrayT: ...
-@overload
-def divide(
-    dividend: NumericOrTemporalScalar,
-    divisor: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def divide(
-    dividend: Expression,
-    divisor: NumericOrTemporalScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def divide(*args, **kwargs):
-    """
-    Divide the arguments element-wise.
-
-    Integer division by zero returns an error. However, integer overflow
-    wraps around, and floating-point division by zero returns an infinite.
-    Use function "divide_checked" if you want to get an error
-    in all the aforementioned cases.
-
-    Parameters
-    ----------
-    dividend : Array-like or scalar-like
-        Argument to compute function.
-    divisor : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    """
-
-divide_checked = _clone_signature(divide)
-"""
-Divide the arguments element-wise.
-
-An error is returned when trying to divide by zero, or when
-integer overflow is encountered.
-
-Parameters
-----------
-dividend : Array-like or scalar-like
-    Argument to compute function.
-divisor : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def exp(
-    exponent: _FloatArrayT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _FloatArrayT: ...
-@overload
-def exp(
-    exponent: ArrayOrChunkedArray[NonFloatNumericScalar],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleArray: ...
-@overload
-def exp(
-    exponent: _FloatScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _FloatScalarT: ...
-@overload
-def exp(
-    exponent: NonFloatNumericScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.DoubleScalar: ...
-@overload
-def exp(exponent: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def exp(*args, **kwargs):
-    """
-    Compute Euler's number raised to the power of specified exponent, element-wise.
-
-    If exponent is null the result will be null.
-
-    Parameters
-    ----------
-    exponent : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-multiply = _clone_signature(add)
-"""
-Multiply the arguments element-wise.
-
-Results will wrap around on integer overflow.
-Use function "multiply_checked" if you want overflow
-to return an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-multiply_checked = _clone_signature(add)
-"""
-Multiply the arguments element-wise.
-
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "multiply".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def negate(
-    x: _NumericOrDurationT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericOrDurationT: ...
-@overload
-def negate(
-    x: _NumericOrDurationArrayT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericOrDurationArrayT: ...
-@overload
-def negate(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def negate(*args, **kwargs):
-    """
-    Negate the argument element-wise.
-
-    Results will wrap around on integer overflow.
-    Use function "negate_checked" if you want overflow
-    to return an error.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-negate_checked = _clone_signature(negate)
-"""
-Negate the arguments element-wise.
-
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "negate".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def power(
-    base: _NumericScalarT,
-    exponent: _NumericScalarT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT: ...
-@overload
-def power(
-    base: _NumericArrayT,
-    exponent: _NumericArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericArrayT: ...
-@overload
-def power(
-    base: Expression,
-    exponent: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def power(
-    base: _NumericArrayT,
-    exponent: NumericScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericArrayT: ...
-@overload
-def power(
-    base: NumericScalar,
-    exponent: _NumericArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericArrayT: ...
-@overload
-def power(
-    base: NumericScalar,
-    exponent: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def power(
-    base: Expression,
-    exponent: NumericScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def power(*args, **kwargs):
-    """
-    Raise arguments to power element-wise.
-
-    Integer to negative integer power returns an error. However, integer overflow
-    wraps around. If either base or exponent is null the result will be null.
-
-    Parameters
-    ----------
-    base : Array-like or scalar-like
-        Argument to compute function.
-    exponent : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-power_checked = _clone_signature(power)
-"""
-Raise arguments to power element-wise.
-
-An error is returned when integer to negative integer power is encountered,
-or integer overflow is encountered.
-
-Parameters
-----------
-base : Array-like or scalar-like
-    Argument to compute function.
-exponent : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def sign(
-    x: NumericOrDurationArray, /, *, memory_pool: lib.MemoryPool | None = None
-) -> (
-    lib.NumericArray[lib.Int8Scalar]
-    | lib.NumericArray[lib.FloatScalar]
-    | lib.NumericArray[lib.DoubleScalar]
-): ...
-@overload
-def sign(
-    x: NumericOrDurationScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int8Scalar | lib.FloatScalar | lib.DoubleScalar: ...
-@overload
-def sign(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def sign(*args, **kwargs):
-    """
-    Get the signedness of the arguments element-wise.
-
-    Output is any of (-1,1) for nonzero inputs and 0 for zero input.
-    NaN values return NaN.  Integral values return signedness as Int8 and
-    floating-point values return it with the same type as the input values.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    """
-
-@overload
-def sqrt(x: NumericArray, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatArray: ...
-@overload
-def sqrt(x: NumericScalar, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatScalar: ...
-@overload
-def sqrt(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def sqrt(*args, **kwargs):
-    """
-    Takes the square root of arguments element-wise.
-
-    A negative argument returns a NaN.  For a variant that returns an
-    error, use function "sqrt_checked".
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    """
-
-sqrt_checked = _clone_signature(sqrt)
-"""
-Takes the square root of arguments element-wise.
-
-A negative argument returns an error.  For a variant that returns a
-NaN, use function "sqrt".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-subtract = _clone_signature(add)
-"""
-Subtract the arguments element-wise.
-
-Results will wrap around on integer overflow.
-Use function "subtract_checked" if you want overflow
-to return an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-subtract_checked = _clone_signature(add)
-"""
-Subtract the arguments element-wise.
-
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "subtract".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.1 Bit-wise functions =========================
-@overload
-def bit_wise_and(
-    x: _NumericScalarT, y: _NumericScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericScalarT: ...
-@overload
-def bit_wise_and(
-    x: _NumericArrayT,
-    y: _NumericArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericArrayT: ...
-@overload
-def bit_wise_and(
-    x: NumericScalar, y: _NumericArrayT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericArrayT: ...
-@overload
-def bit_wise_and(
-    x: _NumericArrayT, y: NumericScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericArrayT: ...
-@overload
-def bit_wise_and(
-    x: Expression,
-    y: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def bit_wise_and(
-    x: Expression,
-    y: NumericScalar | ArrayOrChunkedArray[NumericScalar],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def bit_wise_and(
-    x: NumericScalar | ArrayOrChunkedArray[NumericScalar],
-    y: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def bit_wise_and(*args, **kwargs):
-    """
-    Bit-wise AND the arguments element-wise.
-
-    Null values return null.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    y : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def bit_wise_not(
-    x: _NumericScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericScalarT: ...
-@overload
-def bit_wise_not(
-    x: _NumericArrayT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericArrayT: ...
-@overload
-def bit_wise_not(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def bit_wise_not(*args, **kwargs):
-    """
-    Bit-wise negate the arguments element-wise.
-
-    Null values return null.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-bit_wise_or = _clone_signature(bit_wise_and)
-"""
-Bit-wise OR the arguments element-wise.
-
-Null values return null.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-bit_wise_xor = _clone_signature(bit_wise_and)
-"""
-Bit-wise XOR the arguments element-wise.
-
-Null values return null.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-shift_left = _clone_signature(bit_wise_and)
-"""
-Left shift `x` by `y`.
-
-The shift operates as if on the two's complement representation of the number.
-In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
-even if overflow occurs.
-`x` is returned if `y` (the amount to shift by) is (1) negative or
-(2) greater than or equal to the precision of `x`.
-Use function "shift_left_checked" if you want an invalid shift amount
-to return an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-shift_left_checked = _clone_signature(bit_wise_and)
-"""
-Left shift `x` by `y`.
-
-The shift operates as if on the two's complement representation of the number.
-In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
-even if overflow occurs.
-An error is raised if `y` (the amount to shift by) is (1) negative or
-(2) greater than or equal to the precision of `x`.
-See "shift_left" for a variant that doesn't fail for an invalid shift amount.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-shift_right = _clone_signature(bit_wise_and)
-"""
-Right shift `x` by `y`.
-
-This is equivalent to dividing `x` by 2 to the power `y`.
-`x` is returned if `y` (the amount to shift by) is: (1) negative or
-(2) greater than or equal to the precision of `x`.
-Use function "shift_right_checked" if you want an invalid shift amount
-to return an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-shift_right_checked = _clone_signature(bit_wise_and)
-"""
-Right shift `x` by `y`.
-
-This is equivalent to dividing `x` by 2 to the power `y`.
-An error is raised if `y` (the amount to shift by) is (1) negative or
-(2) greater than or equal to the precision of `x`.
-See "shift_right" for a variant that doesn't fail for an invalid shift amount
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.2 Rounding functions =========================
-@overload
-def ceil(x: _FloatScalarT, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatScalarT: ...
-@overload
-def ceil(x: _FloatArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatArrayT: ...
-@overload
-def ceil(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def ceil(*args, **kwargs):
-    """
-    Round up to the nearest integer.
-
-    Compute the smallest integer value not less in magnitude than `x`.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-floor = _clone_signature(ceil)
-"""
-Round down to the nearest integer.
-
-Compute the largest integer value not greater in magnitude than `x`.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def round(
-    x: _NumericScalarT | int | float,
-    /,
-    ndigits: int = 0,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT: ...
-@overload
-def round(
-    x: _NumericArrayT | Sequence[int | float | None],
-    /,
-    ndigits: int = 0,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericArrayT: ...
-@overload
-def round(
-    x: Expression,
-    /,
-    ndigits: int = 0,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def round(*args, **kwargs):
-    """
-    Round to a given precision.
-
-    Options are used to control the number of digits and rounding mode.
-    Default behavior is to round to the nearest integer and
-    use half-to-even rule to break ties.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    ndigits : int, default 0
-        Number of fractional digits to round to.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    options : pyarrow.compute.RoundOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def round_to_multiple(
-    x: int | float | _NumericScalarT,
-    /,
-    multiple: int | float | _NumericScalarT = 0,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundToMultipleOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT: ...
-@overload
-def round_to_multiple(
-    x: _NumericArrayT | Sequence[int | float | None],
-    /,
-    multiple: int | float | _NumericScalarT = 0,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundToMultipleOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericArrayT: ...
-@overload
-def round_to_multiple(
-    x: Expression,
-    /,
-    multiple: int | float | _NumericScalarT = 0,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundToMultipleOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def round_to_multiple(*args, **kwargs):
-    """
-    Round to a given multiple.
-
-    Options are used to control the rounding multiple and rounding mode.
-    Default behavior is to round to the nearest integer and
-    use half-to-even rule to break ties.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    multiple : numeric scalar, default 1.0
-        Multiple to round to. Should be a scalar of a type compatible
-        with the argument to be rounded.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    options : pyarrow.compute.RoundToMultipleOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def round_binary(
-    x: _NumericScalarT | float,
-    s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar,
-    /,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundBinaryOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT: ...
-@overload
-def round_binary(
-    x: _NumericScalarT | float,
-    s: Iterable,
-    /,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundBinaryOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.NumericArray[_NumericScalarT]: ...
-@overload
-def round_binary(
-    x: _NumericArrayT | Sequence[float],
-    s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar | Iterable,
-    /,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundBinaryOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericArrayT: ...
-@overload
-def round_binary(
-    x: Expression,
-    s: Iterable,
-    /,
-    round_mode: Literal[
-        "down",
-        "up",
-        "towards_zero",
-        "towards_infinity",
-        "half_down",
-        "half_up",
-        "half_towards_zero",
-        "half_towards_infinity",
-        "half_to_even",
-        "half_to_odd",
-    ] = "half_to_even",
-    *,
-    options: RoundBinaryOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def round_binary(*args, **kwargs):
-    """
-    Round to the given precision.
-
-    Options are used to control the rounding mode.
-    Default behavior is to use the half-to-even rule to break ties.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    s : Array-like or scalar-like
-        Argument to compute function.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    options : pyarrow.compute.RoundBinaryOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-trunc = _clone_signature(ceil)
-"""
-Compute the integral part.
-
-Compute the nearest integer not greater in magnitude than `x`.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.3 Logarithmic functions =========================
-@overload
-def ln(
-    x: FloatScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.FloatScalar | lib.DoubleScalar: ...
-@overload
-def ln(
-    x: FloatArray, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
-@overload
-def ln(x: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def ln(*args, **kwargs):
-    """
-    Compute natural logarithm.
-
-    Non-positive values return -inf or NaN. Null values return null.
-    Use function "ln_checked" if you want non-positive values to raise an error.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-ln_checked = _clone_signature(ln)
-"""
-Compute natural logarithm.
-
-Non-positive values raise an error. Null values return null.
-Use function "ln" if you want non-positive values to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-log10 = _clone_signature(ln)
-"""
-Compute base 10 logarithm.
-
-Non-positive values return -inf or NaN. Null values return null.
-Use function "log10_checked" if you want non-positive values
-to raise an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-log10_checked = _clone_signature(ln)
-"""
-Compute base 10 logarithm.
-
-Non-positive values raise an error. Null values return null.
-Use function "log10" if you want non-positive values
-to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-log1p = _clone_signature(ln)
-"""
-Compute natural log of (1+x).
-
-Values <= -1 return -inf or NaN. Null values return null.
-This function may be more precise than log(1 + x) for x close to zero.
-Use function "log1p_checked" if you want invalid values to raise an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-log1p_checked = _clone_signature(ln)
-"""
-Compute natural log of (1+x).
-
-Values <= -1 return -inf or NaN. Null values return null.
-This function may be more precise than log(1 + x) for x close to zero.
-Use function "log1p" if you want invalid values to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-log2 = _clone_signature(ln)
-"""
-Compute base 2 logarithm.
-
-Non-positive values return -inf or NaN. Null values return null.
-Use function "log2_checked" if you want non-positive values
-to raise an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-log2_checked = _clone_signature(ln)
-"""
-Compute base 2 logarithm.
-
-Non-positive values raise an error. Null values return null.
-Use function "log2" if you want non-positive values
-to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def logb(
-    x: FloatScalar, b: FloatScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.FloatScalar | lib.DoubleScalar: ...
-@overload
-def logb(
-    x: FloatArray, b: FloatArray, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
-@overload
-def logb(
-    x: FloatScalar,
-    b: FloatArray,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
-@overload
-def logb(
-    x: FloatArray,
-    b: FloatScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
-@overload
-def logb(
-    x: Expression | Any, b: Expression | Any, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression | Any: ...
-def logb(*args, **kwargs):
-    """
-    Compute base `b` logarithm.
-
-    Values <= 0 return -inf or NaN. Null values return null.
-    Use function "logb_checked" if you want non-positive values to raise an error.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    b : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-logb_checked = _clone_signature(logb)
-"""
-Compute base `b` logarithm.
-
-Values <= 0 return -inf or NaN. Null values return null.
-Use function "logb" if you want non-positive values to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-b : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.4 Trigonometric functions =========================
-acos = _clone_signature(ln)
-"""
-Compute the inverse cosine.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "acos_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-acos_checked = _clone_signature(ln)
-"""
-Compute the inverse cosine.
-
-Invalid input values raise an error;
-to return NaN instead, see "acos".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-asin = _clone_signature(ln)
-"""
-Compute the inverse sine.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "asin_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-asinh = _clone_signature(ln)
-"""
-Compute the inverse hyperbolic sine.
-NaN is returned for invalid input values.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-asin_checked = _clone_signature(ln)
-"""
-Compute the inverse sine.
-
-Invalid input values raise an error;
-to return NaN instead, see "asin".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-atan = _clone_signature(ln)
-"""
-Compute the inverse tangent of x.
-
-The return value is in the range [-pi/2, pi/2];
-for a full return range [-pi, pi], see "atan2".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-atanh = _clone_signature(ln)
-"""
-Compute the inverse hyperbolic tangent of x.
-The return value is in the range [-1, 1].
-NaN is returned for invalid input values.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-cos = _clone_signature(ln)
-"""
-Compute the cosine.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "cos_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-cosh = _clone_signature(ln)
-"""
-Compute the hyperbolic cosine.
-NaN is returned for invalid input values.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-acosh = _clone_signature(ln)
-"""
-Compute the inverse hyperbolic cosine.
-NaN is returned for invalid input values.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-cos_checked = _clone_signature(ln)
-"""
-Compute the cosine.
-
-Infinite values raise an error;
-to return NaN instead, see "cos".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-sin = _clone_signature(ln)
-"""
-Compute the sine.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "sin_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-sin_checked = _clone_signature(ln)
-"""
-Compute the sine.
-
-Invalid input values raise an error;
-to return NaN instead, see "sin".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-sinh = _clone_signature(ln)
-"""
-Compute the hyperbolic sine.
-NaN is returned for invalid input values.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-tan = _clone_signature(ln)
-"""
-Compute the tangent.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "tan_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-tan_checked = _clone_signature(ln)
-"""
-Compute the tangent.
-
-Infinite values raise an error;
-to return NaN instead, see "tan".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-tanh = _clone_signature(ln)
-"""
-Compute the hyperbolic tangent.
-NaN is returned for invalid input values.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def atan2(
-    y: FloatScalar, x: FloatScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.FloatScalar | lib.DoubleScalar: ...
-@overload
-def atan2(
-    y: FloatArray, x: FloatArray, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
-@overload
-def atan2(
-    y: FloatArray,
-    x: FloatScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
-@overload
-def atan2(
-    y: FloatScalar,
-    x: FloatArray,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar]: ...
-@overload
-def atan2(
-    y: Expression, x: Any, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-@overload
-def atan2(
-    y: Any, x: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def atan2(*args, **kwargs):
-    """
-    Compute the inverse tangent of y/x.
-
-    The return value is in the range [-pi, pi].
-
-    Parameters
-    ----------
-    y : Array-like or scalar-like
-        Argument to compute function.
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.5 Comparisons functions =========================
-@overload
-def equal(
-    x: lib.Scalar, y: lib.Scalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar: ...
-@overload
-def equal(
-    x: lib.Scalar | lib._AsPyType,
-    y: lib.Array | lib.ChunkedArray | list[lib._AsPyType],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def equal(
-    x: lib.Array | lib.ChunkedArray | list[lib._AsPyType],
-    y: lib.Scalar | lib._AsPyType,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def equal(
-    x: lib.Array | lib.ChunkedArray,
-    y: lib.Array | lib.ChunkedArray,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def equal(
-    x: Expression,
-    y: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def equal(
-    x: lib.Scalar,
-    y: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def equal(
-    x: Expression,
-    y: lib.Scalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def equal(*args, **kwargs):
-    """
-    Compare values for equality (x == y).
-
-    A null on either side emits a null comparison result.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    y : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-greater = _clone_signature(equal)
-"""
-Compare values for ordered inequality (x > y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-greater_equal = _clone_signature(equal)
-"""
-Compare values for ordered inequality (x >= y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-less = _clone_signature(equal)
-"""
-Compare values for ordered inequality (x < y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-less_equal = _clone_signature(equal)
-"""
-Compare values for ordered inequality (x <= y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-not_equal = _clone_signature(equal)
-"""
-Compare values for inequality (x != y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def max_element_wise(
-    *args: ScalarOrArray[_Scalar_CoT] | NDArray[Any] | float,
-    skip_nulls: bool = True,
-    options: ElementWiseAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]: ...
-@overload
-def max_element_wise(
-    *args: Expression,
-    skip_nulls: bool = True,
-    options: ElementWiseAggregateOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def max_element_wise(*args, **kwargs):
-    """
-    Find the element-wise maximum value.
-
-    Nulls are ignored (by default) or propagated.
-    NaN is preferred over null, but not over any valid value.
-
-    Parameters
-    ----------
-    *args : Array-like or scalar-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    options : pyarrow.compute.ElementWiseAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-min_element_wise = _clone_signature(max_element_wise)
-"""
-Find the element-wise minimum value.
-
-Nulls are ignored (by default) or propagated.
-NaN is preferred over null, but not over any valid value.
-
-Parameters
-----------
-*args : Array-like or scalar-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-options : pyarrow.compute.ElementWiseAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.6 Logical functions =========================
-@overload
-def and_(
-    x: lib.BooleanScalar, y: lib.BooleanScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar: ...
-@overload
-def and_(
-    x: BooleanArray,
-    y: BooleanArray,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def and_(
-    x: Expression,
-    y: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def and_(
-    x: lib.BooleanScalar,
-    y: BooleanArray,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def and_(
-    x: BooleanArray,
-    y: lib.BooleanScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def and_(
-    x: lib.BooleanScalar,
-    y: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def and_(
-    x: Expression,
-    y: lib.BooleanScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def and_(
-    x: ScalarOrArray[lib.BooleanScalar],
-    y: ScalarOrArray[lib.BooleanScalar],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> ScalarOrArray[lib.BooleanScalar]: ...
-def and_(*args, **kwargs):
-    """
-    Logical 'and' boolean values.
-
-    When a null is encountered in either input, a null is output.
-    For a different null behavior, see function "and_kleene".
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    y : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-and_kleene = _clone_signature(and_)
-"""
-Logical 'and' boolean values (Kleene logic).
-
-This function behaves as follows with nulls:
-
-- true and null = null
-- null and true = null
-- false and null = false
-- null and false = false
-- null and null = null
-
-In other words, in this context a null value really means "unknown",
-and an unknown value 'and' false is always false.
-For a different null behavior, see function "and".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-and_not = _clone_signature(and_)
-"""
-Logical 'and not' boolean values.
-
-When a null is encountered in either input, a null is output.
-For a different null behavior, see function "and_not_kleene".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-and_not_kleene = _clone_signature(and_)
-"""
-Logical 'and not' boolean values (Kleene logic).
-
-This function behaves as follows with nulls:
-
-- true and not null = null
-- null and not false = null
-- false and not null = false
-- null and not true = false
-- null and not null = null
-
-In other words, in this context a null value really means "unknown",
-and an unknown value 'and not' true is always false, as is false
-'and not' an unknown value.
-For a different null behavior, see function "and_not".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-or_ = _clone_signature(and_)
-"""
-Logical 'or' boolean values.
-
-When a null is encountered in either input, a null is output.
-For a different null behavior, see function "or_kleene".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-or_kleene = _clone_signature(and_)
-"""
-Logical 'or' boolean values (Kleene logic).
-
-This function behaves as follows with nulls:
-
-- true or null = true
-- null or true = true
-- false or null = null
-- null or false = null
-- null or null = null
-
-In other words, in this context a null value really means "unknown",
-and an unknown value 'or' true is always true.
-For a different null behavior, see function "or".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-xor = _clone_signature(and_)
-"""
-Logical 'xor' boolean values.
-
-When a null is encountered in either input, a null is output.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def invert(
-    x: lib.BooleanScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar: ...
-@overload
-def invert(
-    x: _BooleanArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _BooleanArrayT: ...
-@overload
-def invert(
-    x: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def invert(*args, **kwargs):
-    """
-    Invert boolean values.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.10 String predicates =========================
-@overload
-def ascii_is_alnum(
-    strings: StringScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar: ...
-@overload
-def ascii_is_alnum(
-    strings: StringArray, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanArray: ...
-@overload
-def ascii_is_alnum(
-    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def ascii_is_alnum(*args, **kwargs):
-    """
-    Classify strings as ASCII alphanumeric.
-
-    For each string in `strings`, emit true iff the string is non-empty
-    and consists only of alphanumeric ASCII characters.  Null strings emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-ascii_is_alpha = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII alphabetic.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of alphabetic ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_is_decimal = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII decimal.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of decimal ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_is_lower = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII lowercase.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of lowercase ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_is_printable = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII printable.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of printable ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_is_space = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII whitespace.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of whitespace ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_is_upper = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII uppercase.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of uppercase ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_alnum = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as alphanumeric.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of alphanumeric Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_alpha = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as alphabetic.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of alphabetic Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_decimal = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as decimal.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of decimal Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_digit = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as digits.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of Unicode digits.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_lower = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as lowercase.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of lowercase Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_numeric = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as numeric.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of numeric Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_printable = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as printable.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of printable Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_space = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as whitespace.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of whitespace Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_upper = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as uppercase.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of uppercase Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_is_title = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII titlecase.
-
-For each string in `strings`, emit true iff the string is title-cased,
-i.e. it has at least one cased character, each uppercase character
-follows an uncased character, and each lowercase character follows
-an uppercase character.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_is_title = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as titlecase.
-
-For each string in `strings`, emit true iff the string is title-cased,
-i.e. it has at least one cased character, each uppercase character
-follows an uncased character, and each lowercase character follows
-an uppercase character.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-string_is_ascii = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII.
-
-For each string in `strings`, emit true iff the string consists only
-of ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.11 String transforms =========================
-@overload
-def ascii_capitalize(
-    strings: _StringScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _StringScalarT: ...
-@overload
-def ascii_capitalize(
-    strings: _StringArrayT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _StringArrayT: ...
-@overload
-def ascii_capitalize(
-    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def ascii_capitalize(*args, **kwargs):
-    """
-    Capitalize the first character of ASCII input.
-
-    For each string in `strings`, return a capitalized version.
-
-    This function assumes the input is fully ASCII.  If it may contain
-    non-ASCII characters, use "utf8_capitalize" instead.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-ascii_lower = _clone_signature(ascii_capitalize)
-"""
-Transform ASCII input to lowercase.
-
-For each string in `strings`, return a lowercase version.
-
-This function assumes the input is fully ASCII.  If it may contain
-non-ASCII characters, use "utf8_lower" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_reverse = _clone_signature(ascii_capitalize)
-"""
-Reverse ASCII input.
-
-For each ASCII string in `strings`, return a reversed version.
-
-This function assumes the input is fully ASCII.  If it may contain
-non-ASCII characters, use "utf8_reverse" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_swapcase = _clone_signature(ascii_capitalize)
-"""
-Transform ASCII input by inverting casing.
-
-For each string in `strings`, return a string with opposite casing.
-
-This function assumes the input is fully ASCII.  If it may contain
-non-ASCII characters, use "utf8_swapcase" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_title = _clone_signature(ascii_capitalize)
-"""
-Titlecase each word of ASCII input.
-
-For each string in `strings`, return a titlecased version.
-Each word in the output will start with an uppercase character and its
-remaining characters will be lowercase.
-
-This function assumes the input is fully ASCII.  If it may contain
-non-ASCII characters, use "utf8_title" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_upper = _clone_signature(ascii_capitalize)
-"""
-Transform ASCII input to uppercase.
-
-For each string in `strings`, return an uppercase version.
-
-This function assumes the input is fully ASCII.  It it may contain
-non-ASCII characters, use "utf8_upper" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def binary_length(
-    strings: lib.BinaryScalar | lib.StringScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int32Scalar: ...
-@overload
-def binary_length(
-    strings: lib.LargeBinaryScalar | lib.LargeStringScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar: ...
-@overload
-def binary_length(
-    strings: lib.BinaryArray
-    | lib.StringArray
-    | lib.ChunkedArray[lib.BinaryScalar]
-    | lib.ChunkedArray[lib.StringScalar],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Array: ...
-@overload
-def binary_length(
-    strings: lib.LargeBinaryArray
-    | lib.LargeStringArray
-    | lib.ChunkedArray[lib.LargeBinaryScalar]
-    | lib.ChunkedArray[lib.LargeStringScalar],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Array: ...
-@overload
-def binary_length(
-    strings: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def binary_length(*args, **kwargs):
-    """
-    Compute string lengths.
-
-    For each string in `strings`, emit its length of bytes.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def binary_repeat(
-    strings: _StringOrBinaryScalarT,
-    num_repeats: int,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringOrBinaryScalarT: ...
-@overload
-def binary_repeat(
-    strings: _StringOrBinaryScalarT,
-    num_repeats: list[int] | list[int | None],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array[_StringOrBinaryScalarT]: ...
-@overload
-def binary_repeat(
-    strings: _StringOrBinaryArrayT,
-    num_repeats: int | list[int] | list[int | None],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringOrBinaryArrayT: ...
-@overload
-def binary_repeat(
-    strings: Expression,
-    num_repeats: int | list[int] | list[int | None],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def binary_repeat(*args, **kwargs):
-    """
-    Repeat a binary string.
-
-    For each binary string in `strings`, return a replicated version.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    num_repeats : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def binary_replace_slice(
-    strings: _StringOrBinaryScalarT,
-    /,
-    start: int,
-    stop: int,
-    replacement: str | bytes,
-    *,
-    options: ReplaceSliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringOrBinaryScalarT: ...
-@overload
-def binary_replace_slice(
-    strings: _StringOrBinaryArrayT,
-    /,
-    start: int,
-    stop: int,
-    replacement: str | bytes,
-    *,
-    options: ReplaceSliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringOrBinaryArrayT: ...
-@overload
-def binary_replace_slice(
-    strings: Expression,
-    /,
-    start: int,
-    stop: int,
-    replacement: str | bytes,
-    *,
-    options: ReplaceSliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def binary_replace_slice(*args, **kwargs):
-    """
-    Replace a slice of a binary string.
-
-    For each string in `strings`, replace a slice of the string defined by `start`
-    and `stop` indices with the given `replacement`. `start` is inclusive
-    and `stop` is exclusive, and both are measured in bytes.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int
-        Index to stop slicing at (exclusive).
-    replacement : str
-        What to replace the slice with.
-    options : pyarrow.compute.ReplaceSliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def binary_reverse(
-    strings: _BinaryScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _BinaryScalarT: ...
-@overload
-def binary_reverse(
-    strings: _BinaryArrayT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _BinaryArrayT: ...
-@overload
-def binary_reverse(
-    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def binary_reverse(*args, **kwargs):
-    """
-    Reverse binary input.
-
-    For each binary string in `strings`, return a reversed version.
-
-    This function reverses the binary data at a byte-level.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def replace_substring(
-    strings: _StringScalarT,
-    /,
-    pattern: str | bytes,
-    replacement: str | bytes,
-    *,
-    max_replacements: int | None = None,
-    options: ReplaceSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT: ...
-@overload
-def replace_substring(
-    strings: _StringArrayT,
-    /,
-    pattern: str | bytes,
-    replacement: str | bytes,
-    *,
-    max_replacements: int | None = None,
-    options: ReplaceSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringArrayT: ...
-@overload
-def replace_substring(
-    strings: Expression,
-    /,
-    pattern: str | bytes,
-    replacement: str | bytes,
-    *,
-    max_replacements: int | None = None,
-    options: ReplaceSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def replace_substring(*args, **kwargs):
-    """
-    Replace matching non-overlapping substrings with replacement.
-
-    For each string in `strings`, replace non-overlapping substrings that match
-    the given literal `pattern` with the given `replacement`.
-    If `max_replacements` is given and not equal to -1, it limits the
-    maximum amount replacements per input, counted from the left.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        Substring pattern to look for inside input values.
-    replacement : str
-        What to replace the pattern with.
-    max_replacements : int or None, default None
-        The maximum number of strings to replace in each
-        input value (unlimited if None).
-    options : pyarrow.compute.ReplaceSubstringOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-replace_substring_regex = _clone_signature(replace_substring)
-"""
-Replace matching non-overlapping substrings with replacement.
-
-For each string in `strings`, replace non-overlapping substrings that match
-the given regular expression `pattern` with the given `replacement`.
-If `max_replacements` is given and not equal to -1, it limits the
-maximum amount replacements per input, counted from the left.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-replacement : str
-    What to replace the pattern with.
-max_replacements : int or None, default None
-    The maximum number of strings to replace in each
-    input value (unlimited if None).
-options : pyarrow.compute.ReplaceSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def utf8_capitalize(
-    strings: _StringScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _StringScalarT: ...
-@overload
-def utf8_capitalize(
-    strings: _StringArrayT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _StringArrayT: ...
-@overload
-def utf8_capitalize(
-    strings: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def utf8_capitalize(*args, **kwargs):
-    """
-    Capitalize the first character of input.
-
-    For each string in `strings`, return a capitalized version,
-    with the first character uppercased and the others lowercased.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def utf8_length(
-    strings: lib.StringScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int32Scalar: ...
-@overload
-def utf8_length(
-    strings: lib.LargeStringScalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar: ...
-@overload
-def utf8_length(
-    strings: lib.StringArray | lib.ChunkedArray[lib.StringScalar],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Array: ...
-@overload
-def utf8_length(
-    strings: lib.LargeStringArray | lib.ChunkedArray[lib.LargeStringScalar],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Array: ...
-@overload
-def utf8_length(
-    strings: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def utf8_length(*args, **kwargs):
-    """
-    Compute UTF8 string lengths.
-
-    For each string in `strings`, emit its length in UTF8 characters.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-utf8_lower = _clone_signature(utf8_capitalize)
-"""
-Transform input to lowercase.
-
-For each string in `strings`, return a lowercase version.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def utf8_replace_slice(
-    strings: _StringScalarT,
-    /,
-    start: int,
-    stop: int,
-    replacement: str | bytes,
-    *,
-    options: ReplaceSliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT: ...
-@overload
-def utf8_replace_slice(
-    strings: _StringArrayT,
-    /,
-    start: int,
-    stop: int,
-    replacement: str | bytes,
-    *,
-    options: ReplaceSliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringArrayT: ...
-@overload
-def utf8_replace_slice(
-    strings: Expression,
-    /,
-    start: int,
-    stop: int,
-    replacement: str | bytes,
-    *,
-    options: ReplaceSliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def utf8_replace_slice(*args, **kwargs):
-    """
-    Replace a slice of a string.
-
-    For each string in `strings`, replace a slice of the string defined by `start`
-    and `stop` indices with the given `replacement`. `start` is inclusive
-    and `stop` is exclusive, and both are measured in UTF8 characters.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int
-        Index to stop slicing at (exclusive).
-    replacement : str
-        What to replace the slice with.
-    options : pyarrow.compute.ReplaceSliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-utf8_reverse = _clone_signature(utf8_capitalize)
-"""
-Reverse input.
-
-For each string in `strings`, return a reversed version.
-
-This function operates on Unicode codepoints, not grapheme
-clusters. Hence, it will not correctly reverse grapheme clusters
-composed of multiple codepoints.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_swapcase = _clone_signature(utf8_capitalize)
-"""
-Transform input lowercase characters to uppercase and uppercase characters to lowercase.
-
-For each string in `strings`, return an opposite case version.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_title = _clone_signature(utf8_capitalize)
-"""
-Titlecase each word of input.
-
-For each string in `strings`, return a titlecased version.
-Each word in the output will start with an uppercase character and its
-remaining characters will be lowercase.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_upper = _clone_signature(utf8_capitalize)
-"""
-Transform input to uppercase.
-
-For each string in `strings`, return an uppercase version.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory poo
-"""
-
-def utf8_normalize(
-    strings: _StringArrayT, /, form: str, *, options: Utf8NormalizeOptions | None = None, memory_pool: lib.MemoryPool | None = None
-) -> _StringArrayT:
-    """
-    Utf8-normalize input
-
-    For each string in `strings`, return the normal form.
-    The normalization form must be given in the Utf8NormalizeOptions.
-    Null inputs emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    form : str
-        Unicode normalization form.
-        Accepted values are "NFC", "NFKC", "NFD", NFKD".
-    """
-
-# ========================= 2.12 String padding =========================
-@overload
-def ascii_center(
-    strings: _StringScalarT,
-    /,
-    width: int,
-    padding: str = " ",
-    lean_left_on_odd_padding: bool = True,
-    *,
-    options: PadOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT: ...
-@overload
-def ascii_center(
-    strings: _StringArrayT,
-    /,
-    width: int,
-    padding: str = " ",
-    lean_left_on_odd_padding: bool = True,
-    *,
-    options: PadOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringArrayT: ...
-@overload
-def ascii_center(
-    strings: Expression,
-    /,
-    width: int,
-    padding: str = " ",
-    lean_left_on_odd_padding: bool = True,
-    *,
-    options: PadOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def ascii_center(*args, **kwargs):
-    """
-    Center strings by padding with a given character.
-
-    For each string in `strings`, emit a centered string by padding both sides
-    with the given ASCII character.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    width : int
-        Desired string length.
-    padding : str, default " "
-        What to pad the string with. Should be one byte or codepoint.
-    lean_left_on_odd_padding : bool, default True
-        What to do if there is an odd number of padding characters (in case
-        of centered padding). Defaults to aligning on the left (i.e. adding
-        the extra padding character on the right).
-    options : pyarrow.compute.PadOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-ascii_lpad = _clone_signature(ascii_center)
-"""
-Right-align strings by padding with a given character.
-
-For each string in `strings`, emit a right-aligned string by prepending
-the given ASCII character.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_rpad = _clone_signature(ascii_center)
-"""
-Left-align strings by padding with a given character.
-
-For each string in `strings`, emit a left-aligned string by appending
-the given ASCII character.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_center = _clone_signature(ascii_center)
-"""
-Center strings by padding with a given character.
-
-For each string in `strings`, emit a centered string by padding both sides
-with the given UTF8 codeunit.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_lpad = _clone_signature(ascii_center)
-"""
-Right-align strings by padding with a given character.
-
-For each string in `strings`, emit a right-aligned string by prepending
-the given UTF8 codeunit.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_rpad = _clone_signature(ascii_center)
-"""
-Left-align strings by padding with a given character.
-
-For each string in `strings`, emit a left-aligned string by appending
-the given UTF8 codeunit.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def utf8_zero_fill(
-    strings: _StringScalarT,
-    /,
-    width: int,
-    padding: str = '0',
-    *,
-    options: ZeroFillOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT: ...
-@overload
-def utf8_zero_fill(
-    strings: _StringArrayT,
-    /,
-    width: int | None = None,
-    padding: str | None = '0',
-    *,
-    options: ZeroFillOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringArrayT: ...
-@overload
-def utf8_zero_fill(
-    strings: Expression,
-    /,
-    width: int,
-    padding: str = '0',
-    *,
-    options: ZeroFillOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def utf8_zero_fill(*args, **kwargs):
-    """
-    Left-pad strings to a given width, preserving leading sign characters
-
-    For each string in `strings`, emit a string of length `width` by
-    prepending the given padding character (defaults to '0' if not specified).
-    If the string starts with '+' or '-', the sign is preserved and padding
-    occurs after the sign. Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    width : int
-        Desired string length.
-    padding : str, default "0"
-        Padding character. Should be one Unicode codepoint.
-    options : pyarrow.compute.ZeroFillOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-utf8_zfill = _clone_signature(utf8_zero_fill)
-
-# ========================= 2.13 String trimming =========================
-@overload
-def ascii_ltrim(
-    strings: _StringScalarT,
-    /,
-    characters: str,
-    *,
-    options: TrimOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT: ...
-@overload
-def ascii_ltrim(
-    strings: _StringArrayT,
-    /,
-    characters: str,
-    *,
-    options: TrimOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringArrayT: ...
-@overload
-def ascii_ltrim(
-    strings: Expression,
-    /,
-    characters: str,
-    *,
-    options: TrimOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def ascii_ltrim(*args, **kwargs):
-    """
-    Trim leading characters.
-
-    For each string in `strings`, remove any leading characters
-    from the `characters` option (as given in TrimOptions).
-    Null values emit null.
-    Both the `strings` and the `characters` are interpreted as
-    ASCII; to trim non-ASCII characters, use `utf8_ltrim`.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    characters : str
-        Individual characters to be trimmed from the string.
-    options : pyarrow.compute.TrimOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-ascii_rtrim = _clone_signature(ascii_ltrim)
-"""
-Trim trailing characters.
-
-For each string in `strings`, remove any trailing characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-Both the `strings` and the `characters` are interpreted as
-ASCII; to trim non-ASCII characters, use `utf8_rtrim`.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_trim = _clone_signature(ascii_ltrim)
-"""
-Trim leading and trailing characters.
-
-For each string in `strings`, remove any leading or trailing characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-Both the `strings` and the `characters` are interpreted as
-ASCII; to trim non-ASCII characters, use `utf8_trim`.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_ltrim = _clone_signature(ascii_ltrim)
-"""
-Trim leading characters.
-
-For each string in `strings`, remove any leading characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_rtrim = _clone_signature(ascii_ltrim)
-"""
-Trim trailing characters.
-
-For each string in `strings`, remove any trailing characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_trim = _clone_signature(ascii_ltrim)
-"""
-Trim leading and trailing characters.
-
-For each string in `strings`, remove any leading or trailing characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def ascii_ltrim_whitespace(
-    strings: _StringScalarT,
-    /,
-    *,
-    options: TrimOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT: ...
-@overload
-def ascii_ltrim_whitespace(
-    strings: _StringArrayT,
-    /,
-    *,
-    options: TrimOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringArrayT: ...
-@overload
-def ascii_ltrim_whitespace(
-    strings: Expression,
-    /,
-    *,
-    options: TrimOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def ascii_ltrim_whitespace(*args, **kwargs):
-    """
-    Trim leading ASCII whitespace characters.
-
-    For each string in `strings`, emit a string with leading ASCII whitespace
-    characters removed.  Use `utf8_ltrim_whitespace` to trim leading Unicode
-    whitespace characters. Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-ascii_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim trailing ASCII whitespace characters.
-
-For each string in `strings`, emit a string with trailing ASCII whitespace
-characters removed. Use `utf8_rtrim_whitespace` to trim trailing Unicode
-whitespace characters. Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-ascii_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim leading and trailing ASCII whitespace characters.
-
-For each string in `strings`, emit a string with leading and trailing ASCII
-whitespace characters removed. Use `utf8_trim_whitespace` to trim Unicode
-whitespace characters. Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_ltrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim leading whitespace characters.
-
-For each string in `strings`, emit a string with leading whitespace
-characters removed, where whitespace characters are defined by the Unicode
-standard.  Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim trailing whitespace characters.
-
-For each string in `strings`, emit a string with trailing whitespace
-characters removed, where whitespace characters are defined by the Unicode
-standard.  Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim leading and trailing whitespace characters.
-
-For each string in `strings`, emit a string with leading and trailing
-whitespace characters removed, where whitespace characters are defined
-by the Unicode standard.  Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.14 String splitting =========================
-@overload
-def ascii_split_whitespace(
-    strings: _StringScalarT,
-    /,
-    *,
-    max_splits: int | None = None,
-    reverse: bool = False,
-    options: SplitOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[_StringScalarT]: ...
-@overload
-def ascii_split_whitespace(
-    strings: lib.Array[lib.Scalar[_DataTypeT]],
-    /,
-    *,
-    max_splits: int | None = None,
-    reverse: bool = False,
-    options: SplitOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[lib.ListScalar[_DataTypeT]]: ...
-@overload
-def ascii_split_whitespace(
-    strings: Expression,
-    /,
-    *,
-    max_splits: int | None = None,
-    reverse: bool = False,
-    options: SplitOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def ascii_split_whitespace(*args, **kwargs):
-    """
-    Split string according to any ASCII whitespace.
-
-    Split each string according any non-zero length sequence of ASCII
-    whitespace characters.  The output for each string input is a list
-    of strings.
-
-    The maximum number of splits and direction of splitting
-    (forward, reverse) can optionally be defined in SplitOptions.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    max_splits : int or None, default None
-        Maximum number of splits for each input value (unlimited if None).
-    reverse : bool, default False
-        Whether to start splitting from the end of each input value.
-        This only has an effect if `max_splits` is not None.
-    options : pyarrow.compute.SplitOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def split_pattern(
-    strings: _StringOrBinaryScalarT,
-    /,
-    pattern: str,
-    *,
-    max_splits: int | None = None,
-    reverse: bool = False,
-    options: SplitOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[_StringOrBinaryScalarT]: ...
-@overload
-def split_pattern(
-    strings: lib.Array[lib.Scalar[_DataTypeT]],
-    /,
-    pattern: str,
-    *,
-    max_splits: int | None = None,
-    reverse: bool = False,
-    options: SplitPatternOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[lib.ListScalar[_DataTypeT]]: ...
-@overload
-def split_pattern(
-    strings: Expression,
-    /,
-    pattern: str,
-    *,
-    max_splits: int | None = None,
-    reverse: bool = False,
-    options: SplitPatternOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def split_pattern(*args, **kwargs):
-    """
-    Split string according to separator.
-
-    Split each string according to the exact `pattern` defined in
-    SplitPatternOptions.  The output for each string input is a list
-    of strings.
-
-    The maximum number of splits and direction of splitting
-    (forward, reverse) can optionally be defined in SplitPatternOptions.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        String pattern to split on.
-    max_splits : int or None, default None
-        Maximum number of splits for each input value (unlimited if None).
-    reverse : bool, default False
-        Whether to start splitting from the end of each input value.
-        This only has an effect if `max_splits` is not None.
-    options : pyarrow.compute.SplitPatternOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-split_pattern_regex = _clone_signature(split_pattern)
-"""
-Split string according to regex pattern.
-
-Split each string according to the regex `pattern` defined in
-SplitPatternOptions.  The output for each string input is a list
-of strings.
-
-The maximum number of splits and direction of splitting
-(forward, reverse) can optionally be defined in SplitPatternOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    String pattern to split on.
-max_splits : int or None, default None
-    Maximum number of splits for each input value (unlimited if None).
-reverse : bool, default False
-    Whether to start splitting from the end of each input value.
-    This only has an effect if `max_splits` is not None.
-options : pyarrow.compute.SplitPatternOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-utf8_split_whitespace = _clone_signature(ascii_split_whitespace)
-"""
-Split string according to any Unicode whitespace.
-
-Split each string according any non-zero length sequence of Unicode
-whitespace characters.  The output for each string input is a list
-of strings.
-
-The maximum number of splits and direction of splitting
-(forward, reverse) can optionally be defined in SplitOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-max_splits : int or None, default None
-    Maximum number of splits for each input value (unlimited if None).
-reverse : bool, default False
-    Whether to start splitting from the end of each input value.
-    This only has an effect if `max_splits` is not None.
-options : pyarrow.compute.SplitOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.15 String component extraction =========================
-@overload
-def extract_regex(
-    strings: StringOrBinaryScalar,
-    /,
-    pattern: str,
-    *,
-    options: ExtractRegexOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructScalar: ...
-@overload
-def extract_regex(
-    strings: StringOrBinaryArray,
-    /,
-    pattern: str,
-    *,
-    options: ExtractRegexOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructArray: ...
-@overload
-def extract_regex(
-    strings: Expression,
-    /,
-    pattern: str,
-    *,
-    options: ExtractRegexOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def extract_regex(*args, **kwargs):
-    """
-    Extract substrings captured by a regex pattern.
-
-    For each string in `strings`, match the regular expression and, if
-    successful, emit a struct with field names and values coming from the
-    regular expression's named capture groups. If the input is null or the
-    regular expression fails matching, a null output value is emitted.
-
-    Regular expression matching is done using the Google RE2 library.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        Regular expression with named capture fields.
-    options : pyarrow.compute.ExtractRegexOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def extract_regex_span(
-    strings: StringOrBinaryArray,
-    /,
-    pattern: str,
-    *,
-    options: ExtractRegexSpanOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructArray:
-    """
-    Extract string spans captured by a regex pattern
-
-    For each string in `strings`, match the regular expression and, if
-    successful, emit a struct with field names and values coming from the
-    regular expression's named capture groups. Each struct field value
-    will be a fixed_size_list(offset_type, 2) where offset_type is int32
-    or int64, depending on the input string type. The two elements in
-    each fixed-size list are the index and the length of the substring
-    matched by the corresponding named capture group.
-
-    If the input is null or the regular expression fails matching,
-    a null output value is emitted.
-
-    Regular expression matching is done using the Google RE2 library.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        Regular expression with named capture fields.
-    options : pyarrow.compute.ExtractRegexSpanOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.16 String join =========================
-def binary_join(
-    strings: ArrayOrChunkedArray[lib.ListType[lib.BinaryType]], separator, /, *, memory_pool: lib.MemoryPool | None = None,
-) -> StringArray | BinaryArray: ...
-"""
-Join a list of strings together with a separator.
-
-Concatenate the strings in `list`. The `separator` is inserted
-between each given string.
-Any null input and any null `list` element emits a null output.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-separator : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def binary_join_element_wise(
-    *strings: _StringOrBinaryScalarT | str,
-    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
-    null_replacement: str = "",
-    options: JoinOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT | _BinaryScalarT: ...
-@overload
-def binary_join_element_wise(
-    *strings: _StringOrBinaryArrayT | Sequence[str | None],
-    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
-    null_replacement: str = "",
-    options: JoinOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringOrBinaryArrayT: ...
-@overload
-def binary_join_element_wise(
-    *strings: Expression,
-    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
-    null_replacement: str = "",
-    options: JoinOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def binary_join_element_wise(*args, **kwargs):
-    """
-    Join string arguments together, with the last argument as separator.
-
-    Concatenate the `strings` except for the last one. The last argument
-    in `strings` is inserted between each given string.
-    Any null separator element emits a null output. Null elements either
-    emit a null (the default), are skipped, or replaced with a given string.
-
-    Parameters
-    ----------
-    *strings : Array-like or scalar-like
-        Argument to compute function.
-    null_handling : str, default "emit_null"
-        How to handle null values in the inputs.
-        Accepted values are "emit_null", "skip", "replace".
-    null_replacement : str, default ""
-        Replacement string to emit for null inputs if `null_handling`
-        is "replace".
-    options : pyarrow.compute.JoinOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.17 String Slicing =========================
-@overload
-def binary_slice(
-    strings: _BinaryScalarT,
-    /,
-    start: int,
-    stop: int | None = None,
-    step: int = 1,
-    *,
-    options: SliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _BinaryScalarT: ...
-@overload
-def binary_slice(
-    strings: _BinaryArrayT,
-    /,
-    start: int,
-    stop: int | None = None,
-    step: int = 1,
-    *,
-    options: SliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _BinaryArrayT: ...
-@overload
-def binary_slice(
-    strings: Expression,
-    /,
-    start: int,
-    stop: int | None = None,
-    step: int = 1,
-    *,
-    options: SliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def binary_slice(*args, **kwargs):
-    """
-    Slice binary string.
-
-    For each binary string in `strings`, emit the substring defined by
-    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
-    inclusive and `stop` is exclusive. All three values are measured in
-    bytes.
-    If `step` is negative, the string will be advanced in reversed order.
-    An error is raised if `step` is zero.
-    Null inputs emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int or None, default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end.
-    step : int, default 1
-        Slice step.
-    options : pyarrow.compute.SliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def utf8_slice_codeunits(
-    strings: _StringScalarT,
-    /,
-    start: int,
-    stop: int | None = None,
-    step: int = 1,
-    *,
-    options: SliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT: ...
-@overload
-def utf8_slice_codeunits(
-    strings: _StringArrayT,
-    /,
-    start: int,
-    stop: int | None = None,
-    step: int = 1,
-    *,
-    options: SliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _StringArrayT: ...
-@overload
-def utf8_slice_codeunits(
-    strings: Expression,
-    /,
-    start: int,
-    stop: int | None = None,
-    step: int = 1,
-    *,
-    options: SliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def utf8_slice_codeunits(*args, **kwargs):
-    """
-    Slice string.
-
-    For each string in `strings`, emit the substring defined by
-    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
-    inclusive and `stop` is exclusive. All three values are measured in
-    UTF8 codeunits.
-    If `step` is negative, the string will be advanced in reversed order.
-    An error is raised if `step` is zero.
-    Null inputs emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int or None, default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end.
-    step : int, default 1
-        Slice step.
-    options : pyarrow.compute.SliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.18 Containment tests =========================
-@overload
-def count_substring(
-    strings: lib.Scalar[lib.StringType | lib.BinaryType | lib.LargeStringType | lib.LargeBinaryType],
-    /,
-    pattern: str,
-    *,
-    ignore_case: bool = False,
-    options: MatchSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Scalar | lib.Int64Scalar: ...
-@overload
-def count_substring(
-    strings: lib.Array[lib.Scalar[lib.StringType | lib.BinaryType | lib.LargeStringType | lib.LargeBinaryType]]
-    | lib.ChunkedArray[lib.Scalar[lib.StringType | lib.BinaryType | lib.LargeStringType | lib.LargeBinaryType]],
-    /,
-    pattern: str,
-    *,
-    ignore_case: bool = False,
-    options: MatchSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Array | lib.Int64Array: ...
-@overload
-def count_substring(
-    strings: Expression,
-    /,
-    pattern: Any,
-    *,
-    ignore_case: bool = False,
-    options: MatchSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def count_substring(*args, **kwargs):
-    """
-    Count occurrences of substring.
-
-    For each string in `strings`, emit the number of occurrences of the given
-    literal pattern.
-    Null inputs emit null. The pattern must be given in MatchSubstringOptions.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        Substring pattern to look for inside input values.
-    ignore_case : bool, default False
-        Whether to perform a case-insensitive match.
-    options : pyarrow.compute.MatchSubstringOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-count_substring_regex = _clone_signature(count_substring)
-"""
-Count occurrences of substring.
-
-For each string in `strings`, emit the number of occurrences of the given
-regular expression pattern.
-Null inputs emit null. The pattern must be given in MatchSubstringOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def ends_with(
-    strings: StringScalar | BinaryScalar,
-    /,
-    pattern: str,
-    *,
-    ignore_case: bool = False,
-    options: MatchSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar: ...
-@overload
-def ends_with(
-    strings: StringArray | BinaryArray,
-    /,
-    pattern: str,
-    *,
-    ignore_case: bool = False,
-    options: MatchSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def ends_with(
-    strings: Expression,
-    /,
-    pattern: str,
-    *,
-    ignore_case: bool = False,
-    options: MatchSubstringOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def ends_with(*args, **kwargs):
-    """
-    Check if strings end with a literal pattern.
-
-    For each string in `strings`, emit true iff it ends with a given pattern.
-    The pattern must be given in MatchSubstringOptions.
-    If ignore_case is set, only simple case folding is performed.
-
-    Null inputs emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        Substring pattern to look for inside input values.
-    ignore_case : bool, default False
-        Whether to perform a case-insensitive match.
-    options : pyarrow.compute.MatchSubstringOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-find_substring = _clone_signature(count_substring)
-"""
-Find first occurrence of substring.
-
-For each string in `strings`, emit the index in bytes of the first occurrence
-of the given literal pattern, or -1 if not found.
-Null inputs emit null. The pattern must be given in MatchSubstringOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-find_substring_regex = _clone_signature(count_substring)
-"""
-Find location of first match of regex pattern.
-
-For each string in `strings`, emit the index in bytes of the first occurrence
-of the given literal pattern, or -1 if not found.
-Null inputs emit null. The pattern must be given in MatchSubstringOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def index_in(
-    values: lib.Scalar,
-    /,
-    value_set: lib.Array | lib.ChunkedArray,
-    *,
-    skip_nulls: bool = False,
-    options: SetLookupOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Scalar: ...
-@overload
-def index_in(
-    values: lib.Array | lib.ChunkedArray,
-    /,
-    value_set: lib.Array | lib.ChunkedArray,
-    *,
-    skip_nulls: bool = False,
-    options: SetLookupOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Array: ...
-@overload
-def index_in(
-    values: Expression,
-    /,
-    value_set: lib.Array | lib.ChunkedArray,
-    *,
-    skip_nulls: bool = False,
-    options: SetLookupOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def index_in(*args, **kwargs):
-    """
-    Return index of each element in a set of values.
-
-    For each element in `values`, return its index in a given set of
-    values, or null if it is not found there.
-    The set of values to look for must be given in SetLookupOptions.
-    By default, nulls are matched against the value set, this can be
-    changed in SetLookupOptions.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    value_set : Array
-        Set of values to look for in the input.
-    skip_nulls : bool, default False
-        If False, nulls in the input are matched in the value_set just
-        like regular values.
-        If True, nulls in the input always fail matching.
-    options : pyarrow.compute.SetLookupOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def is_in(
-    values: lib.Scalar,
-    /,
-    value_set: lib.Array | lib.ChunkedArray,
-    *,
-    skip_nulls: bool = False,
-    options: SetLookupOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar: ...
-@overload
-def is_in(
-    values: lib.Array | lib.ChunkedArray,
-    /,
-    value_set: lib.Array | lib.ChunkedArray,
-    *,
-    skip_nulls: bool = False,
-    options: SetLookupOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def is_in(
-    values: Expression,
-    /,
-    value_set: lib.Array | lib.ChunkedArray,
-    *,
-    skip_nulls: bool = False,
-    options: SetLookupOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def is_in(*args, **kwargs):
-    """
-    Find each element in a set of values.
-
-    For each element in `values`, return true if it is found in a given
-    set of values, false otherwise.
-    The set of values to look for must be given in SetLookupOptions.
-    By default, nulls are matched against the value set, this can be
-    changed in SetLookupOptions.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    value_set : Array
-        Set of values to look for in the input.
-    skip_nulls : bool, default False
-        If False, nulls in the input are matched in the value_set just
-        like regular values.
-        If True, nulls in the input always fail matching.
-    options : pyarrow.compute.SetLookupOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-match_like = _clone_signature(ends_with)
-"""
-Match strings against SQL-style LIKE pattern.
-
-For each string in `strings`, emit true iff it matches a given pattern
-at any position. '%' will match any number of characters, '_' will
-match exactly one character, and any other character matches itself.
-To match a literal '%', '_', or '\', precede the character with a backslash.
-Null inputs emit null.  The pattern must be given in MatchSubstringOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-match_substring = _clone_signature(ends_with)
-"""
-Match strings against literal pattern.
-
-For each string in `strings`, emit true iff it contains a given pattern.
-Null inputs emit null.
-The pattern must be given in MatchSubstringOptions.
-If ignore_case is set, only simple case folding is performed.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-match_substring_regex = _clone_signature(ends_with)
-"""
-Match strings against regex pattern.
-
-For each string in `strings`, emit true iff it matches a given pattern
-at any position. The pattern must be given in MatchSubstringOptions.
-If ignore_case is set, only simple case folding is performed.
-
-Null inputs emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-starts_with = _clone_signature(ends_with)
-"""
-Check if strings start with a literal pattern.
-
-For each string in `strings`, emit true iff it starts with a given pattern.
-The pattern must be given in MatchSubstringOptions.
-If ignore_case is set, only simple case folding is performed.
-
-Null inputs emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.19 Categorizations =========================
-@overload
-def is_finite(
-    values: NumericScalar | lib.NullScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar: ...
-@overload
-def is_finite(
-    values: NumericArray | lib.NullArray, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanArray: ...
-@overload
-def is_finite(
-    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def is_finite(*args, **kwargs):
-    """
-    Return true if value is finite.
-
-    For each input value, emit true iff the value is finite
-    (i.e. neither NaN, inf, nor -inf).
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-is_inf = _clone_signature(is_finite)
-"""
-Return true if infinity.
-
-For each input value, emit true iff the value is infinite (inf or -inf).
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-is_nan = _clone_signature(is_finite)
-"""
-Return true if NaN.
-
-For each input value, emit true iff the value is NaN.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def is_null(
-    values: lib.Scalar,
-    /,
-    *,
-    nan_is_null: bool = False,
-    options: NullOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar: ...
-@overload
-def is_null(
-    values: lib.Array | lib.ChunkedArray,
-    /,
-    *,
-    nan_is_null: bool = False,
-    options: NullOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def is_null(
-    values: Expression,
-    /,
-    *,
-    nan_is_null: bool = False,
-    options: NullOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def is_null(*args, **kwargs):
-    """
-    Return true if null (and optionally NaN).
-
-    For each input value, emit true iff the value is null.
-    True may also be emitted for NaN values by setting the `nan_is_null` flag.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    nan_is_null : bool, default False
-        Whether floating-point NaN values are considered null.
-    options : pyarrow.compute.NullOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def is_valid(
-    values: lib.Scalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar: ...
-@overload
-def is_valid(
-    values: lib.Array | lib.ChunkedArray, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanArray: ...
-@overload
-def is_valid(
-    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def is_valid(*args, **kwargs):
-    """
-    Return true if non-null.
-
-    For each input value, emit true iff the value is valid (i.e. non-null).
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-true_unless_null = _clone_signature(is_valid)
-"""
-Return true if non-null, else return null.
-
-For each input value, emit true iff the value
-is valid (non-null), otherwise emit null.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.20 Selecting / multiplexing =========================
-def case_when(cond, /, *cases, memory_pool: lib.MemoryPool | None = None):
-    """
-    Choose values based on multiple conditions.
-
-    `cond` must be a struct of Boolean values. `cases` can be a mix
-    of scalar and array arguments (of any type, but all must be the
-    same type or castable to a common type), with either exactly one
-    datum per child of `cond`, or one more `cases` than children of
-    `cond` (in which case we have an "else" value).
-
-    Each row of the output will be the corresponding value of the
-    first datum in `cases` for which the corresponding child of `cond`
-    is true, or otherwise the "else" value (if given), or null.
-
-    Essentially, this implements a switch-case or if-else, if-else... statement.
-
-    Parameters
-    ----------
-    cond : Array-like or scalar-like
-        Argument to compute function.
-    *cases : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def choose(indices, /, *values, memory_pool: lib.MemoryPool | None = None):
-    """
-    Choose values from several arrays.
-
-    For each row, the value of the first argument is used as a 0-based index
-    into the list of `values` arrays (i.e. index 0 selects the first of the
-    `values` arrays). The output value is the corresponding value of the
-    selected argument.
-
-    If an index is null, the output will be null.
-
-    Parameters
-    ----------
-    indices : Array-like or scalar-like
-        Argument to compute function.
-    *values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def coalesce(
-    *values: _ScalarOrArrayT | Expression, memory_pool: lib.MemoryPool | None = None
-) -> _ScalarOrArrayT:
-    """
-    Select the first non-null value.
-
-    Each row of the output will be the value from the first corresponding input
-    for which the value is not null. If all inputs are null in a row, the output
-    will be null.
-
-    Parameters
-    ----------
-    *values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-fill_null = coalesce
-"""Replace each null element in values with a corresponding
-element from fill_value.
-
-If fill_value is scalar-like, then every null element in values
-will be replaced with fill_value. If fill_value is array-like,
-then the i-th element in values will be replaced with the i-th
-element in fill_value.
-
-The fill_value's type must be the same as that of values, or it
-must be able to be implicitly casted to the array's type.
-
-This is an alias for :func:`coalesce`.
-
-Parameters
-----------
-values : Array, ChunkedArray, or Scalar-like object
-    Each null element is replaced with the corresponding value
-    from fill_value.
-fill_value : Array, ChunkedArray, or Scalar-like object
-    If not same type as values, will attempt to cast.
-
-Returns
--------
-result : depends on inputs
-    Values with all null elements replaced
-
-Examples
---------
->>> import pyarrow as pa
->>> arr = pa.array([1, 2, None, 3], type=pa.int8())
->>> fill_value = pa.scalar(5, type=pa.int8())
->>> arr.fill_null(fill_value)
-<pyarrow.lib.Int8Array object at ...>
-[
-    1,
-    2,
-    5,
-    3
-]
->>> arr = pa.array([1, 2, None, 4, None])
->>> arr.fill_null(pa.array([10, 20, 30, 40, 50]))
-<pyarrow.lib.Int64Array object at ...>
-[
-    1,
-    2,
-    30,
-    4,
-    50
-]
-"""
-
-def if_else(
-    cond: ArrayLike | ScalarLike,
-    left: ArrayLike | ScalarLike,
-    right: ArrayLike | ScalarLike,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> ArrayLike | ScalarLike:
-    """
-    Choose values based on a condition.
-
-    `cond` must be a Boolean scalar/ array.
-    `left` or `right` must be of the same type scalar/ array.
-    `null` values in `cond` will be promoted to the output.
-
-    Parameters
-    ----------
-    cond : Array-like or scalar-like
-        Argument to compute function.
-    left : Array-like or scalar-like
-        Argument to compute function.
-    right : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.21 Structural transforms =========================
-
-@overload
-def list_value_length(
-    lists: _ListArray[Any],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Array: ...
-@overload
-def list_value_length(
-    lists: _LargeListArray[Any],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Array: ...
-@overload
-def list_value_length(
-    lists: ListArray[Any],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Array | lib.Int64Array: ...
-@overload
-def list_value_length(
-    lists: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def list_value_length(*args, **kwargs):
-    """
-    Compute list lengths.
-
-    `lists` must have a list-like type.
-    For each non-null value in `lists`, its length is emitted.
-    Null values emit a null in the output.
-
-    Parameters
-    ----------
-    lists : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def make_struct(
-    *args: lib.Scalar | lib._AsPyType,
-    field_names: list[str] | tuple[str, ...] = (),
-    field_nullability: bool | None = None,
-    field_metadata: list[lib.KeyValueMetadata] | None = None,
-    options: MakeStructOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructScalar: ...
-@overload
-def make_struct(
-    *args: lib.Array | lib.ChunkedArray | list[lib._AsPyType],
-    field_names: list[str] | tuple[str, ...] = (),
-    field_nullability: bool | None = None,
-    field_metadata: list[lib.KeyValueMetadata] | None = None,
-    options: MakeStructOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructArray: ...
-@overload
-def make_struct(
-    *args: Expression,
-    field_names: list[str] | tuple[str, ...] = (),
-    field_nullability: bool | None = None,
-    field_metadata: list[lib.KeyValueMetadata] | None = None,
-    options: MakeStructOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def make_struct(*args, **kwargs):
-    """
-    Wrap Arrays into a StructArray.
-
-    Names of the StructArray's fields are
-    specified through MakeStructOptions.
-
-    Parameters
-    ----------
-    *args : Array-like or scalar-like
-        Argument to compute function.
-    field_names : sequence of str
-        Names of the struct fields to create.
-    field_nullability : sequence of bool, optional
-        Nullability information for each struct field.
-        If omitted, all fields are nullable.
-    field_metadata : sequence of KeyValueMetadata, optional
-        Metadata for each struct field.
-    options : pyarrow.compute.MakeStructOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.22 Conversions =========================
-
-def run_end_decode(
-	array: lib.Array,
-	/,
-	*,
-	memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array:
-    """
-    Decode run-end encoded array.
-
-	Return a decoded version of a run-end encoded input array.
-
-	Parameters
-    ----------
-	array : Array-like
-		Argument to compute function.
-
-	memory_pool : pyarrow.MemoryPool, optional
-		If not passed, will allocate memory from the default memory pool.
-    """
-
-
-def run_end_encode(
-	array: lib.Array,
-	/,
-	run_end_type: lib.Type_INT16 | lib.Type_INT32 | lib.Type_INT64 = lib.Type_INT32,
-	*,
-	options: RunEndEncodeOptions | None = None,
-	memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array:
-    """
-	Run-end encode array.
-
-	Return a run-end encoded version of the input array.
-
-	Parameters
-    ----------
-
-	array : Array-like
-		Argument to compute function.
-
-	run_end_type : DataType, default pyarrow.int32()
-		The data type of the run_ends array.
-
-		Accepted values are pyarrow.{int16(), int32(), int64()}.
-
-	options : pyarrow.compute.RunEndEncodeOptions, optional
-		Alternative way of passing options.
-
-	memory_pool : pyarrow.MemoryPool, optional
-		If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def ceil_temporal(
-    timestamps: _TemporalScalarT,
-    /,
-    multiple: int = 1,
-    unit: Literal[
-        "year",
-        "quarter",
-        "month",
-        "week",
-        "day",
-        "hour",
-        "minute",
-        "second",
-        "millisecond",
-        "microsecond",
-        "nanosecond",
-    ] = "day",
-    *,
-    week_starts_monday: bool = True,
-    ceil_is_strictly_greater: bool = False,
-    calendar_based_origin: bool = False,
-    options: RoundTemporalOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _TemporalScalarT: ...
-@overload
-def ceil_temporal(
-    timestamps: _TemporalArrayT,
-    /,
-    multiple: int = 1,
-    unit: Literal[
-        "year",
-        "quarter",
-        "month",
-        "week",
-        "day",
-        "hour",
-        "minute",
-        "second",
-        "millisecond",
-        "microsecond",
-        "nanosecond",
-    ] = "day",
-    *,
-    week_starts_monday: bool = True,
-    ceil_is_strictly_greater: bool = False,
-    calendar_based_origin: bool = False,
-    options: RoundTemporalOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _TemporalArrayT: ...
-@overload
-def ceil_temporal(
-    timestamps: Expression,
-    /,
-    multiple: int = 1,
-    unit: Literal[
-        "year",
-        "quarter",
-        "month",
-        "week",
-        "day",
-        "hour",
-        "minute",
-        "second",
-        "millisecond",
-        "microsecond",
-        "nanosecond",
-    ] = "day",
-    *,
-    week_starts_monday: bool = True,
-    ceil_is_strictly_greater: bool = False,
-    calendar_based_origin: bool = False,
-    options: RoundTemporalOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def ceil_temporal(*args, **kwargs):
-    """
-    Round temporal values up to nearest multiple of specified time unit.
-
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    timestamps : Array-like or scalar-like
-        Argument to compute function.
-    multiple : int, default 1
-        Number of units to round to.
-    unit : str, default "day"
-        The unit in which `multiple` is expressed.
-        Accepted values are "year", "quarter", "month", "week", "day",
-        "hour", "minute", "second", "millisecond", "microsecond",
-        "nanosecond".
-    week_starts_monday : bool, default True
-        If True, weeks start on Monday; if False, on Sunday.
-    ceil_is_strictly_greater : bool, default False
-        If True, ceil returns a rounded value that is strictly greater than the
-        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
-        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
-        if set to False.
-        This applies to the ceil_temporal function only.
-    calendar_based_origin : bool, default False
-        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
-        rounding origin will be beginning of one less precise calendar unit.
-        E.g.: rounding to hours will use beginning of day as origin.
-
-        By default time is rounded to a multiple of units since
-        1970-01-01T00:00:00. By setting calendar_based_origin to true,
-        time will be rounded to number of units since the last greater
-        calendar unit.
-        For example: rounding to multiple of days since the beginning of the
-        month or to hours since the beginning of the day.
-        Exceptions: week and quarter are not used as greater units,
-        therefore days will be rounded to the beginning of the month not
-        week. Greater unit of week is a year.
-        Note that ceiling and rounding might change sorting order of an array
-        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
-        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
-        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
-        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
-        order of an already ordered array.
-    options : pyarrow.compute.RoundTemporalOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-floor_temporal = _clone_signature(ceil_temporal)
-"""
-Round temporal values down to nearest multiple of specified time unit.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-timestamps : Array-like or scalar-like
-    Argument to compute function.
-multiple : int, default 1
-    Number of units to round to.
-unit : str, default "day"
-    The unit in which `multiple` is expressed.
-    Accepted values are "year", "quarter", "month", "week", "day",
-    "hour", "minute", "second", "millisecond", "microsecond",
-    "nanosecond".
-week_starts_monday : bool, default True
-    If True, weeks start on Monday; if False, on Sunday.
-ceil_is_strictly_greater : bool, default False
-    If True, ceil returns a rounded value that is strictly greater than the
-    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
-    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
-    if set to False.
-    This applies to the ceil_temporal function only.
-calendar_based_origin : bool, default False
-    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
-    rounding origin will be beginning of one less precise calendar unit.
-    E.g.: rounding to hours will use beginning of day as origin.
-
-    By default time is rounded to a multiple of units since
-    1970-01-01T00:00:00. By setting calendar_based_origin to true,
-    time will be rounded to number of units since the last greater
-    calendar unit.
-    For example: rounding to multiple of days since the beginning of the
-    month or to hours since the beginning of the day.
-    Exceptions: week and quarter are not used as greater units,
-    therefore days will be rounded to the beginning of the month not
-    week. Greater unit of week is a year.
-    Note that ceiling and rounding might change sorting order of an array
-    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
-    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
-    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
-    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
-    order of an already ordered array.
-options : pyarrow.compute.RoundTemporalOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-round_temporal = _clone_signature(ceil_temporal)
-"""
-Round temporal values to the nearest multiple of specified time unit.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-timestamps : Array-like or scalar-like
-    Argument to compute function.
-multiple : int, default 1
-    Number of units to round to.
-unit : str, default "day"
-    The unit in which `multiple` is expressed.
-    Accepted values are "year", "quarter", "month", "week", "day",
-    "hour", "minute", "second", "millisecond", "microsecond",
-    "nanosecond".
-week_starts_monday : bool, default True
-    If True, weeks start on Monday; if False, on Sunday.
-ceil_is_strictly_greater : bool, default False
-    If True, ceil returns a rounded value that is strictly greater than the
-    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
-    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
-    if set to False.
-    This applies to the ceil_temporal function only.
-calendar_based_origin : bool, default False
-    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
-    rounding origin will be beginning of one less precise calendar unit.
-    E.g.: rounding to hours will use beginning of day as origin.
-
-    By default time is rounded to a multiple of units since
-    1970-01-01T00:00:00. By setting calendar_based_origin to true,
-    time will be rounded to number of units since the last greater
-    calendar unit.
-    For example: rounding to multiple of days since the beginning of the
-    month or to hours since the beginning of the day.
-    Exceptions: week and quarter are not used as greater units,
-    therefore days will be rounded to the beginning of the month not
-    week. Greater unit of week is a year.
-    Note that ceiling and rounding might change sorting order of an array
-    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
-    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
-    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
-    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
-    order of an already ordered array.
-options : pyarrow.compute.RoundTemporalOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def cast(
-    arr: lib.Scalar,
-    target_type: _DataTypeT | None = None,
-    safe: bool | None = None,
-    options: CastOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Scalar[_DataTypeT]: ...
-@overload
-def cast(
-    arr: lib.Array,
-    target_type: _DataTypeT | str | None = None,
-    safe: bool | None = None,
-    options: CastOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array[lib.Scalar[_DataTypeT]]: ...
-@overload
-def cast(
-    arr: lib.ChunkedArray,
-    target_type: _DataTypeT | None = None,
-    safe: bool | None = None,
-    options: CastOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.ChunkedArray[lib.Scalar[_DataTypeT]]: ...
-def cast(*args, **kwargs):
-    """
-    Cast array values to another data type. Can also be invoked as an array
-    instance method.
-
-    Parameters
-    ----------
-    arr : Array-like
-    target_type : DataType or str
-        Type to cast to
-    safe : bool, default True
-        Check for overflows or other unsafe conversions
-    options : CastOptions, default None
-        Additional checks pass by CastOptions
-    memory_pool : MemoryPool, optional
-        memory pool to use for allocations during function execution.
-
-    Examples
-    --------
-    >>> from datetime import datetime
-    >>> import pyarrow as pa
-    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
-    >>> arr.type
-    TimestampType(timestamp[us])
-
-    You can use ``pyarrow.DataType`` objects to specify the target type:
-
-    >>> cast(arr, pa.timestamp("ms"))
-    <pyarrow.lib.TimestampArray object at ...>
-    [
-      2010-01-01 00:00:00.000,
-      2015-01-01 00:00:00.000
-    ]
-
-    >>> cast(arr, pa.timestamp("ms")).type
-    TimestampType(timestamp[ms])
-
-    Alternatively, it is also supported to use the string aliases for these
-    types:
-
-    >>> arr.cast("timestamp[ms]")
-    <pyarrow.lib.TimestampArray object at ...>
-    [
-      2010-01-01 00:00:00.000,
-      2015-01-01 00:00:00.000
-    ]
-    >>> arr.cast("timestamp[ms]").type
-    TimestampType(timestamp[ms])
-
-    Returns
-    -------
-    casted : Array
-        The cast result as a new Array
-    """
-
-@overload
-def strftime(
-    timestamps: _ZonedTimestampScalarT | _ZonelessTimestampScalarT,
-    /,
-    format: str = "%Y-%m-%dT%H:%M:%S",
-    locale: str = "C",
-    *,
-    options: StrftimeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StringScalar: ...
-@overload
-def strftime(
-    timestamps: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
-    /,
-    format: str = "%Y-%m-%dT%H:%M:%S",
-    locale: str = "C",
-    *,
-    options: StrftimeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StringArray: ...
-@overload
-def strftime(
-    timestamps: Expression,
-    /,
-    format: str = "%Y-%m-%dT%H:%M:%S",
-    locale: str = "C",
-    *,
-    options: StrftimeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def strftime(*args, **kwargs):
-    """
-    Format temporal values according to a format string.
-
-    For each input value, emit a formatted string.
-    The time format string and locale can be set using StrftimeOptions.
-    The output precision of the "%S" (seconds) format code depends on
-    the input time precision: it is an integer for timestamps with
-    second precision, a real number with the required number of fractional
-    digits for higher precisions.
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database, or if the specified locale
-    does not exist on this system.
-
-    Parameters
-    ----------
-    timestamps : Array-like or scalar-like
-        Argument to compute function.
-    format : str, default "%Y-%m-%dT%H:%M:%S"
-        Pattern for formatting input values.
-    locale : str, default "C"
-        Locale to use for locale-specific format specifiers.
-    options : pyarrow.compute.StrftimeOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def strptime(
-    strings: StringScalar,
-    /,
-    format: str,
-    unit: Literal["s", "ms", "us", "ns"],
-    error_is_null: bool = False,
-    *,
-    options: StrptimeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.TimestampScalar: ...
-@overload
-def strptime(
-    strings: StringArray,
-    /,
-    format: str,
-    unit: Literal["s", "ms", "us", "ns"],
-    error_is_null: bool = False,
-    *,
-    options: StrptimeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.TimestampArray: ...
-@overload
-def strptime(
-    strings: Expression,
-    /,
-    format: str,
-    unit: Literal["s", "ms", "us", "ns"],
-    error_is_null: bool = False,
-    *,
-    options: StrptimeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def strptime(*args, **kwargs):
-    """
-    Parse timestamps.
-
-    For each string in `strings`, parse it as a timestamp.
-    The timestamp unit and the expected string pattern must be given
-    in StrptimeOptions. Null inputs emit null. If a non-null string
-    fails parsing, an error is returned by default.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    format : str
-        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
-        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
-        There are differences in behavior, for example how the "%y" placeholder
-        handles years with less than four digits.
-    unit : str
-        Timestamp unit of the output.
-        Accepted values are "s", "ms", "us", "ns".
-    error_is_null : boolean, default False
-        Return null on parsing errors if true or raise if false.
-    options : pyarrow.compute.StrptimeOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.23 Temporal component extraction =========================
-@overload
-def day(
-    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int64Scalar: ...
-@overload
-def day(
-    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int64Array: ...
-@overload
-def day(values: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def day(*args, **kwargs):
-    """
-    Extract day number.
-
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def day_of_week(
-    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT,
-    /,
-    *,
-    count_from_zero: bool = True,
-    week_start: int = 1,
-    options: DayOfWeekOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar: ...
-@overload
-def day_of_week(
-    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
-    /,
-    *,
-    count_from_zero: bool = True,
-    week_start: int = 1,
-    options: DayOfWeekOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Array: ...
-@overload
-def day_of_week(
-    values: Expression,
-    /,
-    *,
-    count_from_zero: bool = True,
-    week_start: int = 1,
-    options: DayOfWeekOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def day_of_week(*args, **kwargs):
-    """
-    Extract day of the week number.
-
-    By default, the week starts on Monday represented by 0 and ends on Sunday
-    represented by 6.
-    `DayOfWeekOptions.week_start` can be used to set another starting day using
-    the ISO numbering convention (1=start week on Monday, 7=start week on Sunday).
-    Day numbers can start at 0 or 1 based on `DayOfWeekOptions.count_from_zero`.
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    count_from_zero : bool, default True
-        If True, number days from 0, otherwise from 1.
-    week_start : int, default 1
-        Which day does the week start with (Monday=1, Sunday=7).
-        How this value is numbered is unaffected by `count_from_zero`.
-    options : pyarrow.compute.DayOfWeekOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-day_of_year = _clone_signature(day)
-"""
-Extract day of year number.
-
-January 1st maps to day number 1, February 1st to 32, etc.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def hour(
-    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT | lib.Time32Scalar[Any] | lib.Time64Scalar[Any],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar: ...
-@overload
-def hour(
-    values: _ZonedTimestampArrayT
-    | _ZonelessTimestampArrayT
-    | lib.Time32Array[Any]
-    | lib.Time64Array[Any]
-    | lib.ChunkedArray[lib.Time32Scalar[Any]]
-    | lib.ChunkedArray[lib.Time64Scalar[Any]],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Array: ...
-@overload
-def hour(
-    values: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def hour(*args, **kwargs):
-    """
-    Extract hour value.
-
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def is_dst(
-    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar: ...
-@overload
-def is_dst(
-    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def is_dst(values: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def is_dst(*args, **kwargs):
-    """
-    Extracts if currently observing daylight savings.
-
-    IsDaylightSavings returns true if a timestamp has a daylight saving
-    offset in the given timezone.
-    Null values emit null.
-    An error is returned if the values do not have a defined timezone.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def iso_week(
-    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int64Scalar: ...
-@overload
-def iso_week(
-    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Array: ...
-@overload
-def iso_week(
-    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def iso_week(*args, **kwargs):
-    """
-    Extract ISO week of year number.
-
-    First ISO week has the majority (4 or more) of its days in January.
-    ISO week starts on Monday. The week number starts with 1 and can run
-    up to 53.
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-iso_year = _clone_signature(iso_week)
-"""
-Extract ISO year number.
-
-First week of an ISO year has the majority (4 or more) of its days in January.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def iso_calendar(
-    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.StructScalar: ...
-@overload
-def iso_calendar(
-    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructArray: ...
-@overload
-def iso_calendar(
-    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def iso_calendar(*args, **kwargs):
-    """
-    Extract (ISO year, ISO week, ISO day of week) struct.
-
-    ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.
-    Null values emit null. An error is returned if the values have a defined
-    timezone, but it cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def is_leap_year(
-    values: _ZonedTimestampScalarT | _ZonelessTimestampScalarT | lib.Date32Scalar | lib.Date64Scalar,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar: ...
-@overload
-def is_leap_year(
-    values: _ZonedTimestampArrayT
-    | _ZonelessTimestampArrayT
-    | lib.Date32Array
-    | lib.Date64Array
-    | lib.ChunkedArray[lib.TimestampScalar]
-    | lib.ChunkedArray[lib.Date32Scalar]
-    | lib.ChunkedArray[lib.Date64Scalar],
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanArray: ...
-@overload
-def is_leap_year(
-    values: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def is_leap_year(*args, **kwargs):
-    """
-    Extract if year is a leap year.
-
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-microsecond = _clone_signature(iso_week)
-"""
-Extract microsecond values.
-
-Microsecond returns number of microseconds since the last full millisecond.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-millisecond = _clone_signature(iso_week)
-"""
-Extract millisecond values.
-
-Millisecond returns number of milliseconds since the last full second.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-minute = _clone_signature(iso_week)
-"""
-Extract minute values.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-month = _clone_signature(day_of_week)
-"""
-Extract month number.
-
-Month is encoded as January=1, December=12.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-nanosecond = _clone_signature(hour)
-"""
-Extract nanosecond values.
-
-Nanosecond returns number of nanoseconds since the last full microsecond.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-quarter = _clone_signature(day_of_week)
-"""
-Extract quarter of year number.
-
-First quarter maps to 1 and forth quarter maps to 4.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-second = _clone_signature(hour)
-"""
-Extract second values.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-subsecond = _clone_signature(hour)
-"""
-Extract subsecond values.
-
-Subsecond returns the fraction of a second since the last full second.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-us_week = _clone_signature(iso_week)
-"""
-Extract US week of year number.
-
-First US week has the majority (4 or more) of its days in January.
-US week starts on Monday. The week number starts with 1 and can run
-up to 53.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-us_year = _clone_signature(iso_week)
-"""
-Extract US epidemiological year number.
-
-First week of US epidemiological year has the majority (4 or more) of
-it's days in January. Last week of US epidemiological year has the
-year's last Wednesday in it. US epidemiological week starts on Sunday.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-year = _clone_signature(iso_week)
-"""
-Extract year number.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def week(
-    values: lib.Scalar[lib.TimestampType[Any, Any]],
-    /,
-    *,
-    week_starts_monday: bool = True,
-    count_from_zero: bool = False,
-    first_week_is_fully_in_year: bool = False,
-    options: WeekOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar: ...
-@overload
-def week(
-    values: _ZonedTimestampArrayT | _ZonelessTimestampArrayT,
-    /,
-    *,
-    week_starts_monday: bool = True,
-    count_from_zero: bool = False,
-    first_week_is_fully_in_year: bool = False,
-    options: WeekOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Array: ...
-@overload
-def week(
-    values: Expression,
-    /,
-    *,
-    week_starts_monday: bool = True,
-    count_from_zero: bool = False,
-    first_week_is_fully_in_year: bool = False,
-    options: WeekOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def week(*args, **kwargs):
-    """
-    Extract week of year number.
-
-    First week has the majority (4 or more) of its days in January.
-    Year can have 52 or 53 weeks. Week numbering can start with 0 or 1 using
-    DayOfWeekOptions.count_from_zero.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    week_starts_monday : bool, default True
-        If True, weeks start on Monday; if False, on Sunday.
-    count_from_zero : bool, default False
-        If True, dates at the start of a year that fall into the last week
-        of the previous year emit 0.
-        If False, they emit 52 or 53 (the week number of the last week
-        of the previous year).
-    first_week_is_fully_in_year : bool, default False
-        If True, week number 0 is fully in January.
-        If False, a week that begins on December 29, 30 or 31 is considered
-        to be week number 0 of the following year.
-    options : pyarrow.compute.WeekOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def year_month_day(
-    values: TemporalScalar, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.StructScalar: ...
-@overload
-def year_month_day(
-    values: TemporalArray, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.StructArray: ...
-@overload
-def year_month_day(
-    values: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def year_month_day(*args, **kwargs):
-    """
-    Extract (year, month, day) struct.
-
-    Null values emit null.
-    An error is returned in the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.24 Temporal difference =========================
-def day_time_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
-    """
-    Compute the number of days and milliseconds between two timestamps.
-
-    Returns the number of days and milliseconds from `start` to `end`.
-    That is, first the difference in days is computed as if both
-    timestamps were truncated to the day, then the difference between time times
-    of the two timestamps is computed as if both times were truncated to the
-    millisecond.
-    Null values return null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def days_between(
-    start, end, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int64Scalar | lib.Int64Array:
-    """
-    Compute the number of days between two timestamps.
-
-    Returns the number of day boundaries crossed from `start` to `end`.
-    That is, the difference is calculated as if the timestamps were
-    truncated to the day.
-    Null values emit null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-hours_between = _clone_signature(days_between)
-"""
-Compute the number of hours between two timestamps.
-
-Returns the number of hour boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the hour.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-microseconds_between = _clone_signature(days_between)
-"""
-Compute the number of microseconds between two timestamps.
-
-Returns the number of microsecond boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the microsecond.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-milliseconds_between = _clone_signature(days_between)
-"""
-Compute the number of millisecond boundaries between two timestamps.
-
-Returns the number of millisecond boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the millisecond.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-minutes_between = _clone_signature(days_between)
-"""
-Compute the number of millisecond boundaries between two timestamps.
-
-Returns the number of millisecond boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the millisecond.
-Null values emit null.
-In [152]: print(pc.minutes_between.__doc__)
-Compute the number of minute boundaries between two timestamps.
-
-Returns the number of minute boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the minute.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-def month_day_nano_interval_between(
-    start, end, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.MonthDayNanoIntervalScalar | lib.MonthDayNanoIntervalArray:
-    """
-    Compute the number of months, days and nanoseconds between two timestamps.
-
-    Returns the number of months, days, and nanoseconds from `start` to `end`.
-    That is, first the difference in months is computed as if both timestamps
-    were truncated to the months, then the difference between the days
-    is computed, and finally the difference between the times of the two
-    timestamps is computed as if both times were truncated to the nanosecond.
-    Null values return null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def month_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
-    """
-    Compute the number of months between two timestamps.
-
-    Returns the number of month boundaries crossed from `start` to `end`.
-    That is, the difference is calculated as if the timestamps were
-    truncated to the month.
-    Null values emit null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-nanoseconds_between = _clone_signature(days_between)
-"""
-Compute the number of nanoseconds between two timestamps.
-
-Returns the number of nanosecond boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the nanosecond.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-quarters_between = _clone_signature(days_between)
-"""
-Compute the number of quarters between two timestamps.
-
-Returns the number of quarter start boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the quarter.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-seconds_between = _clone_signature(days_between)
-"""
-Compute the number of seconds between two timestamps.
-
-Returns the number of second boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the second.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-def weeks_between(
-    start,
-    end,
-    /,
-    *,
-    count_from_zero: bool = True,
-    week_start: int = 1,
-    options: DayOfWeekOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar | lib.Int64Array:
-    """
-    Compute the number of weeks between two timestamps.
-
-    Returns the number of week boundaries crossed from `start` to `end`.
-    That is, the difference is calculated as if the timestamps were
-    truncated to the week.
-    Null values emit null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    count_from_zero : bool, default True
-        If True, number days from 0, otherwise from 1.
-    week_start : int, default 1
-        Which day does the week start with (Monday=1, Sunday=7).
-        How this value is numbered is unaffected by `count_from_zero`.
-    options : pyarrow.compute.DayOfWeekOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-years_between = _clone_signature(days_between)
-"""
-Compute the number of years between two timestamps.
-
-Returns the number of year boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the year.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-# ========================= 2.25 Timezone handling =========================
-@overload
-def assume_timezone(
-    timestamps: _ZonelessTimestampScalarT,
-    /,
-    timezone: str,
-    *,
-    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
-    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
-    options: AssumeTimezoneOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ZonedTimestampScalarT: ...
-@overload
-def assume_timezone(
-    timestamps: _ZonelessTimestampArrayT,
-    /,
-    timezone: str,
-    *,
-    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
-    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
-    options: AssumeTimezoneOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ZonedTimestampArrayT: ...
-@overload
-def assume_timezone(
-    timestamps: _ZonelessTimestampScalarT,
-    /,
-    *,
-    options: AssumeTimezoneOptions,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ZonedTimestampScalarT: ...
-@overload
-def assume_timezone(
-    timestamps: _ZonelessTimestampArrayT,
-    /,
-    *,
-    options: AssumeTimezoneOptions,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ZonedTimestampArrayT: ...
-@overload
-def assume_timezone(
-    timestamps: Expression,
-    /,
-    timezone: str,
-    *,
-    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
-    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
-    options: AssumeTimezoneOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def assume_timezone(*args, **kwargs):
-    """
-    Convert naive timestamp to timezone-aware timestamp.
-
-    Input timestamps are assumed to be relative to the timezone given in the
-    `timezone` option. They are converted to UTC-relative timestamps and
-    the output type has its timezone set to the value of the `timezone`
-    option. Null values emit null.
-    This function is meant to be used when an external system produces
-    "timezone-naive" timestamps which need to be converted to
-    "timezone-aware" timestamps. An error is returned if the timestamps
-    already have a defined timezone.
-
-    Parameters
-    ----------
-    timestamps : Array-like or scalar-like
-        Argument to compute function.
-    timezone : str
-        Timezone to assume for the input.
-    ambiguous : str, default "raise"
-        How to handle timestamps that are ambiguous in the assumed timezone.
-        Accepted values are "raise", "earliest", "latest".
-    nonexistent : str, default "raise"
-        How to handle timestamps that don't exist in the assumed timezone.
-        Accepted values are "raise", "earliest", "latest".
-    options : pyarrow.compute.AssumeTimezoneOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def local_timestamp(
-    timestamps: _ZonedTimestampScalarT, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _ZonelessTimestampScalarT: ...
-@overload
-def local_timestamp(
-    timestamps: _ZonedTimestampArrayT,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ZonelessTimestampArrayT: ...
-@overload
-def local_timestamp(
-    timestamps: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-def local_timestamp(*args, **kwargs):
-    """
-    Convert timestamp to a timezone-naive local time timestamp.
-
-    LocalTimestamp converts timezone-aware timestamp to local timestamp
-    of the given timestamp's timezone and removes timezone metadata.
-    Alternative name for this timestamp is also wall clock time.
-    If input is in UTC or without timezone, then unchanged input values
-    without timezone metadata are returned.
-    Null values emit null.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 2.26 Random number generation =========================
-def random(
-    n: int,
-    *,
-    initializer: Hashable = "system",
-    options: RandomOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleArray:
-    """
-    Generate numbers in the range [0, 1).
-
-    Generated values are uniformly-distributed, double-precision
-    in range [0, 1). Algorithm and seed can be changed via RandomOptions.
-
-    Parameters
-    ----------
-    n : int
-        Number of values to generate, must be greater than or equal to 0
-    initializer : int or str
-        How to initialize the underlying random generator.
-        If an integer is given, it is used as a seed.
-        If "system" is given, the random generator is initialized with
-        a system-specific source of (hopefully true) randomness.
-        Other values are invalid.
-    options : pyarrow.compute.RandomOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 3. Array-wise (“vector”) functions =========================
-
-# ========================= 3.1 Cumulative Functions =========================
-@overload
-def cumulative_sum(
-    values: _NumericArrayT,
-    /,
-    start: lib.Scalar | int | None = None,
-    *,
-    skip_nulls: bool = False,
-    options: CumulativeSumOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericArrayT: ...
-@overload
-def cumulative_sum(
-    values: Expression,
-    /,
-    start: lib.Scalar | None = None,
-    *,
-    skip_nulls: bool = False,
-    options: CumulativeSumOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def cumulative_sum(*args, **kwargs):
-    """
-    Compute the cumulative sum over a numeric input.
-
-    `values` must be numeric. Return an array/chunked array which is the
-    cumulative sum computed over `values`. Results will wrap around on
-    integer overflow. Use function "cumulative_sum_checked" if you want
-    overflow to return an error. The default start is 0.
-
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    start : Scalar, default None
-        Starting value for the cumulative operation. If none is given,
-        a default value depending on the operation and input type is used.
-    skip_nulls : bool, default False
-        When false, the first encountered null is propagated.
-    options : pyarrow.compute.CumulativeOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-cumulative_sum_checked = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative sum over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative sum computed over `values`. This function returns an error
-on overflow. For a variant that doesn't fail on overflow, use
-function "cumulative_sum". The default start is 0.
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-cumulative_prod = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative product over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative product computed over `values`. Results will wrap around on
-integer overflow. Use function "cumulative_prod_checked" if you want
-overflow to return an error. The default start is 1.
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-cumulative_prod_checked = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative product over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative product computed over `values`. This function returns an error
-on overflow. For a variant that doesn't fail on overflow, use
-function "cumulative_prod". The default start is 1.
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-cumulative_max = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative max over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative max computed over `values`. The default start is the minimum
-value of input type (so that any other value will replace the
-start as the new maximum).
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-cumulative_min = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative min over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative min computed over `values`. The default start is the maximum
-value of input type (so that any other value will replace the
-start as the new minimum).
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-cumulative_mean = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative max over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative max computed over `values`. The default start is the minimum
-value of input type (so that any other value will replace the
-start as the new maximum).
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-# ========================= 3.2 Associative transforms =========================
-
-@overload
-def dictionary_encode(
-    array: _ScalarOrArrayT,
-    /,
-    null_encoding: Literal["mask", "encode"] = "mask",
-    *,
-    options=None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ScalarOrArrayT: ...
-@overload
-def dictionary_encode(
-    array: Expression,
-    /,
-    null_encoding: Literal["mask", "encode"] = "mask",
-    *,
-    options=None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def dictionary_decode(array: _ScalarOrArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ScalarOrArrayT: ...
-@overload
-def dictionary_decode(array: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-def dictionary_decode(*args, **kwargs):
-    """
-    Decodes a DictionaryArray to an Array
-
-    Return a plain-encoded version of the array input.
-    This function does nothing if the input is not a dictionary.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    """
-@overload
-def unique(array: _ArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT: ...
-@overload
-def unique(array: Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> Expression: ...
-@overload
-def value_counts(
-    array: lib.Array | lib.ChunkedArray, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.StructArray: ...
-@overload
-def value_counts(
-    array: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-
-# ========================= 3.3 Selections =========================
-@overload
-def array_filter(
-    array: _ArrayT,
-    selection_filter: list[bool] | list[bool | None] | BooleanArray,
-    /,
-    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
-    *,
-    options: FilterOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ArrayT: ...
-@overload
-def array_filter(
-    array: Expression,
-    selection_filter: list[bool] | list[bool | None] | BooleanArray,
-    /,
-    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
-    *,
-    options: FilterOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def array_take(
-    array: _ArrayT,
-    indices: Indices | list[int | None],
-    /,
-    *,
-    boundscheck: bool = True,
-    options: TakeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _ArrayT: ...
-@overload
-def array_take(
-    array: Expression,
-    indices: list[int]
-    | list[int | None]
-    | lib.Int16Array
-    | lib.Int32Array
-    | lib.Int64Array
-    | lib.ChunkedArray[lib.Int16Scalar]
-    | lib.ChunkedArray[lib.Int32Scalar]
-    | lib.ChunkedArray[lib.Int64Scalar],
-    /,
-    *,
-    boundscheck: bool = True,
-    options: TakeOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def drop_null(input: _ArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT: ...
-@overload
-def drop_null(
-    input: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-
-filter = array_filter
-take = array_take
-"""
-Select values (or records) from array- or table-like data given integer
-selection indices.
-
-The result will be of the same type(s) as the input, with elements taken
-from the input array (or record batch / table fields) at the given
-indices. If an index is null then the corresponding value in the output
-will be null.
-
-Parameters
-----------
-data : Array, ChunkedArray, RecordBatch, or Table
-indices : Array, ChunkedArray
-    Must be of integer type
-boundscheck : boolean, default True
-    Whether to boundscheck the indices. If False and there is an out of
-    bounds index, will likely cause the process to crash.
-memory_pool : MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-
-Returns
--------
-result : depends on inputs
-    Selected values for the given indices
-
-Examples
---------
->>> import pyarrow as pa
->>> arr = pa.array(["a", "b", "c", None, "e", "f"])
->>> indices = pa.array([0, None, 4, 3])
->>> arr.take(indices)
-<pyarrow.lib.StringArray object at ...>
-[
-    "a",
-    null,
-    "e",
-    null
-]
-"""
-
-# ========================= 3.4 Containment tests  =========================
-@overload
-def indices_nonzero(
-    values: lib.BooleanArray
-    | lib.NullArray
-    | NumericArray
-    | lib.Decimal128Array
-    | lib.Decimal256Array,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array: ...
-@overload
-def indices_nonzero(
-    values: Expression,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def indices_nonzero(*args, **kwargs):
-    """
-    Return the indices of the values in the array that are non-zero.
-
-    For each input value, check if it's zero, false or null. Emit the index
-    of the value in the array if it's none of the those.
-
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 3.5 Sorts and partitions  =========================
-@overload
-def array_sort_indices(
-    array: lib.Array | lib.ChunkedArray,
-    /,
-    order: _Order = "ascending",
-    *,
-    null_placement: _Placement = "at_end",
-    options: ArraySortOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array: ...
-@overload
-def array_sort_indices(
-    array: Expression,
-    /,
-    order: _Order = "ascending",
-    *,
-    null_placement: _Placement = "at_end",
-    options: ArraySortOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def array_sort_indices(*args, **kwargs):
-    """
-    Return the indices that would sort an array.
-
-    This function computes an array of indices that define a stable sort
-    of the input array.  By default, Null values are considered greater
-    than any other value and are therefore sorted at the end of the array.
-    For floating-point types, NaNs are considered greater than any
-    other non-null value, but smaller than null values.
-
-    The handling of nulls and NaNs can be changed in ArraySortOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    order : str, default "ascending"
-        Which order to sort values in.
-        Accepted values are "ascending", "descending".
-    null_placement : str, default "at_end"
-        Where nulls in the input should be sorted.
-        Accepted values are "at_start", "at_end".
-    options : pyarrow.compute.ArraySortOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def partition_nth_indices(
-    array: lib.Array | lib.ChunkedArray | Sequence[int | float | str | None],
-    /,
-    pivot: int,
-    *,
-    null_placement: _Placement = "at_end",
-    options: PartitionNthOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array: ...
-@overload
-def partition_nth_indices(
-    array: Expression,
-    /,
-    pivot: int,
-    *,
-    null_placement: _Placement = "at_end",
-    options: PartitionNthOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def partition_nth_indices(*args, **kwargs):
-    """
-    Return the indices that would partition an array around a pivot.
-
-    This functions computes an array of indices that define a non-stable
-    partial sort of the input array.
-
-    The output is such that the `N`'th index points to the `N`'th element
-    of the input in sorted order, and all indices before the `N`'th point
-    to elements in the input less or equal to elements at or after the `N`'th.
-
-    By default, null values are considered greater than any other value
-    and are therefore partitioned towards the end of the array.
-    For floating-point types, NaNs are considered greater than any
-    other non-null value, but smaller than null values.
-
-    The pivot index `N` must be given in PartitionNthOptions.
-    The handling of nulls and NaNs can also be changed in PartitionNthOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    pivot : int
-        Index into the equivalent sorted array of the pivot element.
-    null_placement : str, default "at_end"
-        Where nulls in the input should be partitioned.
-        Accepted values are "at_start", "at_end".
-    options : pyarrow.compute.PartitionNthOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def rank(
-    input: lib.Array | lib.ChunkedArray,
-    /,
-    sort_keys: _Order = "ascending",
-    *,
-    null_placement: _Placement = "at_end",
-    tiebreaker: Literal["min", "max", "first", "dense"] = "first",
-    options: RankOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array:
-    """
-    Compute ordinal ranks of an array (1-based).
-
-    This function computes a rank of the input array.
-    By default, null values are considered greater than any other value and
-    are therefore sorted at the end of the input. For floating-point types,
-    NaNs are considered greater than any other non-null value, but smaller
-    than null values. The default tiebreaker is to assign ranks in order of
-    when ties appear in the input.
-
-    The handling of nulls, NaNs and tiebreakers can be changed in RankOptions.
-
-    Parameters
-    ----------
-    input : Array-like or scalar-like
-        Argument to compute function.
-    sort_keys : sequence of (name, order) tuples or str, default "ascending"
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-        Alternatively, one can simply pass "ascending" or "descending" as a string
-        if the input is array-like.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted.
-        Accepted values are "at_start", "at_end".
-    tiebreaker : str, default "first"
-        Configure how ties between equal values are handled.
-        Accepted values are:
-
-        - "min": Ties get the smallest possible rank in sorted order.
-        - "max": Ties get the largest possible rank in sorted order.
-        - "first": Ranks are assigned in order of when ties appear in the
-                   input. This ensures the ranks are a stable permutation
-                   of the input.
-        - "dense": The ranks span a dense [1, M] interval where M is the
-                   number of distinct values in the input.
-    options : pyarrow.compute.RankOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def rank_quantile(
-    input: lib.Array | lib.ChunkedArray,
-    /,
-    sort_keys: _Order = "ascending",
-    *,
-    null_placement: _Placement = "at_end",
-    options: RankQuantileOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array:
-    """
-    Compute quantile ranks of an array (1-based).
-
-    This function computes a quantile rank of the input array.
-    By default, null values are considered greater than any other value and
-    are therefore sorted at the end of the input. For floating-point types,
-    NaNs are considered greater than any other non-null value, but smaller
-    than null values.
-
-    The results are real values strictly between 0 and 1. They are
-    computed as in https://en.wikipedia.org/wiki/Quantile_rank
-    but without multiplying by 100.
-
-    The handling of nulls and NaNs can be changed in RankQuantileOptions.
-
-    Parameters
-    ----------
-    input : Array-like or scalar-like
-        Argument to compute function.
-    sort_keys : sequence of (name, order) tuples or str, default "ascending"
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-        Alternatively, one can simply pass "ascending" or "descending" as a string
-        if the input is array-like.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted.
-        Accepted values are "at_start", "at_end".
-    options : pyarrow.compute.RankQuantileOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-
-rank_normal = _clone_signature(rank_quantile)
-"""
-Compute normal (gaussian) ranks of an array (1-based).
-
-This function computes a normal (gaussian) rank of the input array.
-By default, null values are considered greater than any other value and
-are therefore sorted at the end of the input. For floating-point types,
-NaNs are considered greater than any other non-null value, but smaller
-than null values.
-The results are finite real values. They are obtained as if first
-calling the "rank_quantile" function and then applying the normal
-percent-point function (PPF) to the resulting quantile values.
-
-The handling of nulls and NaNs can be changed in RankQuantileOptions.
-
-Parameters
-----------
-input : Array-like or scalar-like
-    Argument to compute function.
-sort_keys : sequence of (name, order) tuples or str, default "ascending"
-    Names of field/column keys to sort the input on,
-    along with the order each field/column is sorted in.
-    Accepted values for `order` are "ascending", "descending".
-    The field name can be a string column name or expression.
-    Alternatively, one can simply pass "ascending" or "descending" as a string
-    if the input is array-like.
-null_placement : str, default "at_end"
-    Where nulls in input should be sorted.
-    Accepted values are "at_start", "at_end".
-options : pyarrow.compute.RankQuantileOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
-
-@overload
-def select_k_unstable(
-    input: lib.Array | lib.ChunkedArray | lib.Table,
-    /,
-    k: int,
-    sort_keys: list[tuple[str | Expression, _Order]] | None = None,
-    *,
-    options: SelectKOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array: ...
-@overload
-def select_k_unstable(
-    input: Expression,
-    /,
-    k: int,
-    sort_keys: list[tuple[str | Expression, _Order]] | None = None,
-    *,
-    options: SelectKOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def select_k_unstable(
-    input: lib.Array | lib.ChunkedArray | lib.Table,
-    /,
-    options: SelectKOptions,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array: ...
-@overload
-def select_k_unstable(
-    input: Expression,
-    /,
-    options: SelectKOptions,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def select_k_unstable(*args, **kwargs):
-    """
-    Select the indices of the first `k` ordered elements from the input.
-
-    This function selects an array of indices of the first `k` ordered elements
-    from the `input` array, record batch or table specified in the column keys
-    (`options.sort_keys`). Output is not guaranteed to be stable.
-    Null values are considered greater than any other value and are
-    therefore ordered at the end. For floating-point types, NaNs are considered
-    greater than any other non-null value, but smaller than null values.
-
-    Parameters
-    ----------
-    input : Array-like or scalar-like
-        Argument to compute function.
-    k : int
-        Number of leading values to select in sorted order
-        (i.e. the largest values if sort order is "descending",
-        the smallest otherwise).
-    sort_keys : sequence of (name, order) tuples
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-    options : pyarrow.compute.SelectKOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def sort_indices(
-    input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
-    /,
-    sort_keys: Sequence[tuple[str|Expression, _Order]] = (),
-    *,
-    null_placement: _Placement = "at_end",
-    options: SortOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array: ...
-@overload
-def sort_indices(
-    input: Expression,
-    /,
-    sort_keys: Sequence[tuple[str|Expression, _Order]] = (),
-    *,
-    null_placement: _Placement = "at_end",
-    options: SortOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def sort_indices(*args, **kwargs):
-    """
-    Return the indices that would sort an array, record batch or table.
-
-    This function computes an array of indices that define a stable sort
-    of the input array, record batch or table.  By default, null values are
-    considered greater than any other value and are therefore sorted at the
-    end of the input. For floating-point types, NaNs are considered greater
-    than any other non-null value, but smaller than null values.
-
-    The handling of nulls and NaNs can be changed in SortOptions.
-
-    Parameters
-    ----------
-    input : Array-like or scalar-like
-        Argument to compute function.
-    sort_keys : sequence of (name, order) tuples
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted, only applying to
-        columns/fields mentioned in `sort_keys`.
-        Accepted values are "at_start", "at_end".
-    options : pyarrow.compute.SortOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 3.6 Structural transforms =========================
-@overload
-def list_element(
-    lists: Expression, index: ScalarLike, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-@overload
-def list_element(
-    lists: lib.Array[ListScalar[_DataTypeT]] | lib.Array[lib.Scalar[lib.ListType[lib.StructType]]],
-    index: ScalarLike,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array[lib.Scalar[_DataTypeT]]: ...
-@overload
-def list_element(
-    lists: lib.ChunkedArray[ListScalar[_DataTypeT]],
-    index: ScalarLike,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.ChunkedArray[lib.Scalar[_DataTypeT]]: ...
-@overload
-def list_element(
-    lists: ListScalar[_DataTypeT],
-    index: ScalarLike,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _DataTypeT: ...
-def list_element(*args, **kwargs):
-    """
-    Compute elements using of nested list values using an index.
-
-    `lists` must have a list-like type.
-    For each value in each list of `lists`, the element at `index`
-    is emitted. Null values emit a null in the output.
-
-    Parameters
-    ----------
-    lists : Array-like or scalar-like
-        Argument to compute function.
-    index : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def list_flatten(
-    lists: Expression,
-    /,
-    recursive: bool = False,
-    *,
-    options: ListFlattenOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def list_flatten(
-    lists: ArrayOrChunkedArray[ListScalar[Any]],
-    /,
-    recursive: bool = False,
-    *,
-    options: ListFlattenOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[Any]: ...
-def list_flatten(*args, **kwargs):
-    """
-    Flatten list values.
-
-    `lists` must have a list-like type (lists, list-views, and
-    fixed-size lists).
-    Return an array with the top list level flattened unless
-    `recursive` is set to true in ListFlattenOptions. When that
-    is that case, flattening happens recursively until a non-list
-    array is formed.
-
-    Null list values do not emit anything to the output.
-
-    Parameters
-    ----------
-    lists : Array-like
-        Argument to compute function.
-    recursive : bool, default False
-        When True, the list array is flattened recursively until an array
-        of non-list values is formed.
-    options : pyarrow.compute.ListFlattenOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def list_parent_indices(
-    lists: Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> Expression: ...
-@overload
-def list_parent_indices(
-    lists: ArrayOrChunkedArray[Any], /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int64Array: ...
-def list_parent_indices(*args, **kwargs):
-    """
-    Compute parent indices of nested list values.
-
-    `lists` must have a list-like or list-view type.
-    For each value in each list of `lists`, the top-level list index
-    is emitted.
-
-    Parameters
-    ----------
-    lists : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-@overload
-def list_slice(
-    lists: Expression,
-    /,
-    start: int,
-    stop: int | None = None,
-    step: int = 1,
-    return_fixed_size_list: bool | None = None,
-    *,
-    options: ListSliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-@overload
-def list_slice(
-    lists: ArrayOrChunkedArray[Any],
-    /,
-    start: int,
-    stop: int | None = None,
-    step: int = 1,
-    return_fixed_size_list: bool | None = None,
-    *,
-    options: ListSliceOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[Any]: ...
-def list_slice(*args, **kwargs):
-    """
-    Compute slice of list-like array.
-
-    `lists` must have a list-like type.
-    For each list element, compute a slice, returning a new list array.
-    A variable or fixed size list array is returned, depending on options.
-
-    Parameters
-    ----------
-    lists : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing inner list elements (inclusive).
-    stop : Optional[int], default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end. (NotImplemented)
-    step : int, default 1
-        Slice step.
-    return_fixed_size_list : Optional[bool], default None
-        Whether to return a FixedSizeListArray. If true _and_ stop is after
-        a list element's length, nulls will be appended to create the
-        requested slice size. The default of `None` will return the same
-        type which was passed in.
-    options : pyarrow.compute.ListSliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def map_lookup(
-    container,
-    /,
-    query_key,
-    occurrence: str,
-    *,
-    options: MapLookupOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-):
-    """
-    Find the items corresponding to a given key in a Map.
-
-    For a given query key (passed via MapLookupOptions), extract
-    either the FIRST, LAST or ALL items from a Map that have
-    matching keys.
-
-    Parameters
-    ----------
-    container : Array-like or scalar-like
-        Argument to compute function.
-    query_key : Scalar or Object can be converted to Scalar
-        The key to search for.
-    occurrence : str
-        The occurrence(s) to return from the Map
-        Accepted values are "first", "last", or "all".
-    options : pyarrow.compute.MapLookupOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def struct_field(
-    values,
-    /,
-    indices,
-    *,
-    options: StructFieldOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-):
-    """
-    Extract children of a struct or union by index.
-
-    Given a list of indices (passed via StructFieldOptions), extract
-    the child array or scalar with the given child index, recursively.
-
-    For union inputs, nulls are emitted for union values that reference
-    a different child than specified. Also, the indices are always
-    in physical order, not logical type codes - for example, the first
-    child is always index 0.
-
-    An empty list of indices returns the argument unchanged.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
-        List of indices for chained field lookup, for example `[4, 1]`
-        will look up the second nested field in the fifth outer field.
-    options : pyarrow.compute.StructFieldOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def fill_null_backward(values, /, *, memory_pool: lib.MemoryPool | None = None):
-    """
-    Carry non-null values backward to fill null slots.
-
-    Given an array, propagate next valid observation backward to previous valid
-    or nothing if all next values are null.
-
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def fill_null_forward(values, /, *, memory_pool: lib.MemoryPool | None = None):
-    """
-    Carry non-null values forward to fill null slots.
-
-    Given an array, propagate last valid observation forward to next valid
-    or nothing if all previous values are null.
-
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def replace_with_mask(
-    values,
-    mask: list[bool] | list[bool | None] | BooleanArray,
-    replacements,
-    /,
-    *,
-    memory_pool: lib.MemoryPool | None = None,
-):
-    """
-    Replace items selected with a mask.
-
-    Given an array and a boolean mask (either scalar or of equal length),
-    along with replacement values (either scalar or array),
-    each element of the array for which the corresponding mask element is
-    true will be replaced by the next value from the replacements,
-    or with null if the mask is null.
-    Hence, for replacement arrays, len(replacements) == sum(mask == true).
-
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    mask : Array-like
-        Argument to compute function.
-    replacements : Array-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-# ========================= 3.7 Pairwise functions =========================
-@overload
-def pairwise_diff(
-    input: _NumericOrTemporalArrayT,
-    /,
-    period: int = 1,
-    *,
-    options: PairwiseOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalArrayT: ...
-@overload
-def pairwise_diff(
-    input: Expression,
-    /,
-    period: int = 1,
-    *,
-    options: PairwiseOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> Expression: ...
-def pairwise_diff(*args, **kwargs):
-    """
-    Compute first order difference of an array.
-
-    Computes the first order difference of an array, It internally calls
-    the scalar function "subtract" to compute
-     differences, so its
-    behavior and supported types are the same as
-    "subtract". The period can be specified in :struct:`PairwiseOptions`.
-
-    Results will wrap around on integer overflow. Use function
-    "pairwise_diff_checked" if you want overflow to return an error.
-
-    Parameters
-    ----------
-    input : Array-like
-        Argument to compute function.
-    period : int, default 1
-        Period for applying the period function.
-    options : pyarrow.compute.PairwiseOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-pairwise_diff_checked = _clone_signature(pairwise_diff)
-"""
-Compute first order difference of an array.
-
-Computes the first order difference of an array, It internally calls
-the scalar function "subtract_checked" (or the checked variant) to compute
-differences, so its behavior and supported types are the same as
-"subtract_checked". The period can be specified in :struct:`PairwiseOptions`.
-
-This function returns an error on overflow. For a variant that doesn't
-fail on overflow, use function "pairwise_diff".
-
-Parameters
-----------
-input : Array-like
-    Argument to compute function.
-period : int, default 1
-    Period for applying the period function.
-options : pyarrow.compute.PairwiseOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
diff --git a/python/pyarrow/cuda.py b/python/pyarrow/cuda.py
deleted file mode 100644
index 18c530d4afe..00000000000
--- a/python/pyarrow/cuda.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# flake8: noqa
-
-
-from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
-                           HostBuffer, BufferReader, BufferWriter,
-                           new_host_buffer,
-                           serialize_record_batch, read_message,
-                           read_record_batch)
diff --git a/python/pyarrow/dataset.pyi b/python/pyarrow/dataset.pyi
deleted file mode 100644
index 6cb7fed43e6..00000000000
--- a/python/pyarrow/dataset.pyi
+++ /dev/null
@@ -1,246 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
-
-from _typeshed import StrPath
-from pyarrow._dataset import (
-    CsvFileFormat,
-    CsvFragmentScanOptions,
-    Dataset,
-    DatasetFactory,
-    DirectoryPartitioning,
-    FeatherFileFormat,
-    FileFormat,
-    FileFragment,
-    FilenamePartitioning,
-    FileSystemDataset,
-    FileSystemDatasetFactory,
-    FileSystemFactoryOptions,
-    FileWriteOptions,
-    Fragment,
-    FragmentScanOptions,
-    HivePartitioning,
-    InMemoryDataset,
-    IpcFileFormat,
-    IpcFileWriteOptions,
-    JsonFileFormat,
-    JsonFragmentScanOptions,
-    Partitioning,
-    PartitioningFactory,
-    Scanner,
-    TaggedRecordBatch,
-    UnionDataset,
-    UnionDatasetFactory,
-    WrittenFile,
-    get_partition_keys,
-)
-from pyarrow._dataset_orc import OrcFileFormat
-from pyarrow._dataset_parquet import (
-    ParquetDatasetFactory,
-    ParquetFactoryOptions,
-    ParquetFileFormat,
-    ParquetFileFragment,
-    ParquetFileWriteOptions,
-    ParquetFragmentScanOptions,
-    ParquetReadOptions,
-    RowGroupInfo,
-)
-from pyarrow._dataset_parquet_encryption import (
-    ParquetDecryptionConfig,
-    ParquetEncryptionConfig,
-)
-from pyarrow.compute import Expression, field, scalar
-from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
-
-from ._fs import SupportedFileSystem
-
-_orc_available: bool
-_parquet_available: bool
-
-__all__ = [
-    "CsvFileFormat",
-    "CsvFragmentScanOptions",
-    "Dataset",
-    "DatasetFactory",
-    "DirectoryPartitioning",
-    "FeatherFileFormat",
-    "FileFormat",
-    "FileFragment",
-    "FilenamePartitioning",
-    "FileSystemDataset",
-    "FileSystemDatasetFactory",
-    "FileSystemFactoryOptions",
-    "FileWriteOptions",
-    "Fragment",
-    "FragmentScanOptions",
-    "HivePartitioning",
-    "InMemoryDataset",
-    "IpcFileFormat",
-    "IpcFileWriteOptions",
-    "JsonFileFormat",
-    "JsonFragmentScanOptions",
-    "Partitioning",
-    "PartitioningFactory",
-    "Scanner",
-    "TaggedRecordBatch",
-    "UnionDataset",
-    "UnionDatasetFactory",
-    "WrittenFile",
-    "get_partition_keys",
-    # Orc
-    "OrcFileFormat",
-    # Parquet
-    "ParquetDatasetFactory",
-    "ParquetFactoryOptions",
-    "ParquetFileFormat",
-    "ParquetFileFragment",
-    "ParquetFileWriteOptions",
-    "ParquetFragmentScanOptions",
-    "ParquetReadOptions",
-    "RowGroupInfo",
-    # Parquet Encryption
-    "ParquetDecryptionConfig",
-    "ParquetEncryptionConfig",
-    # Compute
-    "Expression",
-    "field",
-    "scalar",
-    # Dataset
-    "partitioning",
-    "parquet_dataset",
-    "write_dataset",
-]
-
-_DatasetFormat: TypeAlias = Literal["parquet", "ipc", "arrow", "feather", "csv"]
-
-@overload
-def partitioning(
-    schema: Schema,
-) -> Partitioning: ...
-@overload
-def partitioning(
-    schema: Schema,
-    *,
-    flavor: Literal["filename"],
-    dictionaries: dict[str, Array] | None = None,
-) -> Partitioning: ...
-@overload
-def partitioning(
-    schema: Schema,
-    *,
-    flavor: Literal["filename"],
-    dictionaries: Literal["infer"],
-) -> PartitioningFactory: ...
-@overload
-def partitioning(
-    field_names: list[str],
-    *,
-    flavor: Literal["filename"],
-) -> PartitioningFactory: ...
-@overload
-def partitioning(
-    schema: Schema,
-    *,
-    flavor: Literal["hive"],
-    dictionaries: Literal["infer"],
-) -> PartitioningFactory: ...
-@overload
-def partitioning(
-    *,
-    flavor: Literal["hive"],
-) -> PartitioningFactory: ...
-@overload
-def partitioning(
-    schema: Schema,
-    *,
-    flavor: Literal["hive"],
-    dictionaries: dict[str, Array] | None = None,
-) -> Partitioning: ...
-def parquet_dataset(
-    metadata_path: StrPath,
-    schema: Schema | None = None,
-    filesystem: SupportedFileSystem | None = None,
-    format: ParquetFileFormat | None = None,
-    partitioning: Partitioning | PartitioningFactory | None = None,
-    partition_base_dir: str | None = None,
-) -> FileSystemDataset: ...
-@overload
-def dataset(
-    source: StrPath | Sequence[StrPath],
-    schema: Schema | None = None,
-    format: FileFormat | _DatasetFormat | None = None,
-    filesystem: SupportedFileSystem | str | None = None,
-    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
-    partition_base_dir: str | None = None,
-    exclude_invalid_files: bool | None = None,
-    ignore_prefixes: list[str] | None = None,
-) -> FileSystemDataset: ...
-@overload
-def dataset(
-    source: list[Dataset],
-    schema: Schema | None = None,
-    format: FileFormat | _DatasetFormat | None = None,
-    filesystem: SupportedFileSystem | str | None = None,
-    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
-    partition_base_dir: str | None = None,
-    exclude_invalid_files: bool | None = None,
-    ignore_prefixes: list[str] | None = None,
-) -> UnionDataset: ...
-@overload
-def dataset(
-    source: Iterable[RecordBatch] | Iterable[Table] | RecordBatchReader,
-    schema: Schema | None = None,
-    format: FileFormat | _DatasetFormat | None = None,
-    filesystem: SupportedFileSystem | str | None = None,
-    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
-    partition_base_dir: str | None = None,
-    exclude_invalid_files: bool | None = None,
-    ignore_prefixes: list[str] | None = None,
-) -> InMemoryDataset: ...
-@overload
-def dataset(
-    source: RecordBatch | Table,
-    schema: Schema | None = None,
-    format: FileFormat | _DatasetFormat | None = None,
-    filesystem: SupportedFileSystem | str | None = None,
-    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
-    partition_base_dir: str | None = None,
-    exclude_invalid_files: bool | None = None,
-    ignore_prefixes: list[str] | None = None,
-) -> InMemoryDataset: ...
-def write_dataset(
-    data: Dataset | Table | RecordBatch | RecordBatchReader | list[Table] | Iterable[RecordBatch],
-    base_dir: StrPath,
-    *,
-    basename_template: str | None = None,
-    format: FileFormat | _DatasetFormat | None = None,
-    partitioning: Partitioning | list[str] | None = None,
-    partitioning_flavor: str | None = None,
-    schema: Schema | None = None,
-    filesystem: SupportedFileSystem | None = None,
-    file_options: FileWriteOptions | None = None,
-    use_threads: bool = True,
-    max_partitions: int = 1024,
-    max_open_files: int = 1024,
-    max_rows_per_file: int = 0,
-    min_rows_per_group: int = 0,
-    max_rows_per_group: int = 1024 * 1024,
-    file_visitor: Callable[[str], None] | None = None,
-    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"] = "error",
-    create_dir: bool = True,
-): ...
diff --git a/python/pyarrow/feather.pyi b/python/pyarrow/feather.pyi
deleted file mode 100644
index ce8d83dbcd9..00000000000
--- a/python/pyarrow/feather.pyi
+++ /dev/null
@@ -1,67 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import IO, Literal
-
-import pandas as pd
-
-from _typeshed import StrPath
-from pyarrow._feather import FeatherError
-from pyarrow.lib import Table
-
-__all__ = [
-    "FeatherError",
-    "FeatherDataset",
-    "check_chunked_overflow",
-    "write_feather",
-    "read_feather",
-    "read_table",
-]
-
-class FeatherDataset:
-    path_or_paths: str | list[str]
-    validate_schema: bool
-
-    def __init__(self, path_or_paths: str | list[str], validate_schema: bool = True) -> None: ...
-    def read_table(self, columns: list[str] | None = None) -> Table: ...
-    def validate_schemas(self, piece, table: Table) -> None: ...
-    def read_pandas(
-        self, columns: list[str] | None = None, use_threads: bool = True
-    ) -> pd.DataFrame: ...
-
-def check_chunked_overflow(name: str, col) -> None: ...
-def write_feather(
-    df: pd.DataFrame | Table,
-    dest: StrPath | IO,
-    compression: Literal["zstd", "lz4", "uncompressed"] | None = None,
-    compression_level: int | None = None,
-    chunksize: int | None = None,
-    version: Literal[1, 2] = 2,
-) -> None: ...
-def read_feather(
-    source: StrPath | IO,
-    columns: list[str] | None = None,
-    use_threads: bool = True,
-    memory_map: bool = False,
-    **kwargs,
-) -> pd.DataFrame: ...
-def read_table(
-    source: StrPath | IO,
-    columns: list[str] | None = None,
-    memory_map: bool = False,
-    use_threads: bool = True,
-) -> Table: ...
diff --git a/python/pyarrow/gandiva.pyi b/python/pyarrow/gandiva.pyi
deleted file mode 100644
index bc07e15c4a6..00000000000
--- a/python/pyarrow/gandiva.pyi
+++ /dev/null
@@ -1,82 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Iterable, Literal
-
-from .lib import Array, DataType, Field, MemoryPool, RecordBatch, Schema, _Weakrefable
-
-class Node(_Weakrefable):
-    def return_type(self) -> DataType: ...
-
-class Expression(_Weakrefable):
-    def root(self) -> Node: ...
-    def result(self) -> Field: ...
-
-class Condition(_Weakrefable):
-    def root(self) -> Node: ...
-    def result(self) -> Field: ...
-
-class SelectionVector(_Weakrefable):
-    def to_array(self) -> Array: ...
-
-class Projector(_Weakrefable):
-    @property
-    def llvm_ir(self): ...
-    def evaluate(
-        self, batch: RecordBatch, selection: SelectionVector | None = None
-    ) -> list[Array]: ...
-
-class Filter(_Weakrefable):
-    @property
-    def llvm_ir(self): ...
-    def evaluate(
-        self, batch: RecordBatch, pool: MemoryPool, dtype: DataType | str = "int32"
-    ) -> SelectionVector: ...
-
-class TreeExprBuilder(_Weakrefable):
-    def make_literal(self, value: float | str | bytes | bool, dtype: DataType) -> Node: ...
-    def make_expression(self, root_node: Node, return_field: Field) -> Expression: ...
-    def make_function(self, name: str, children: list[Node], return_type: DataType) -> Node: ...
-    def make_field(self, field: Field) -> Node: ...
-    def make_if(
-        self, condition: Node, this_node: Node, else_node: Node, return_type: DataType
-    ) -> Node: ...
-    def make_and(self, children: list[Node]) -> Node: ...
-    def make_or(self, children: list[Node]) -> Node: ...
-    def make_in_expression(self, node: Node, values: Iterable, dtype: DataType) -> Node: ...
-    def make_condition(self, condition: Node) -> Condition: ...
-
-class Configuration(_Weakrefable):
-    def __init__(self, optimize: bool = True, dump_ir: bool = False) -> None: ...
-
-def make_projector(
-    schema: Schema,
-    children: list[Expression],
-    pool: MemoryPool,
-    selection_mode: Literal["NONE", "UINT16", "UINT32", "UINT64"] = "NONE",
-    configuration: Configuration | None = None,
-) -> Projector: ...
-def make_filter(
-    schema: Schema, condition: Condition, configuration: Configuration | None = None
-) -> Filter: ...
-
-class FunctionSignature(_Weakrefable):
-    def return_type(self) -> DataType: ...
-    def param_types(self) -> list[DataType]: ...
-    def name(self) -> str: ...
-
-def get_registered_function_signatures() -> list[FunctionSignature]: ...
diff --git a/python/pyarrow/ipc.pyi b/python/pyarrow/ipc.pyi
deleted file mode 100644
index 985cf0678f9..00000000000
--- a/python/pyarrow/ipc.pyi
+++ /dev/null
@@ -1,140 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from io import IOBase
-
-import pandas as pd
-import pyarrow.lib as lib
-
-from pyarrow.lib import (
-    IpcReadOptions,
-    IpcWriteOptions,
-    Message,
-    MessageReader,
-    MetadataVersion,
-    ReadStats,
-    RecordBatchReader,
-    WriteStats,
-    _ReadPandasMixin,
-    get_record_batch_size,
-    get_tensor_size,
-    read_message,
-    read_record_batch,
-    read_schema,
-    read_tensor,
-    write_tensor,
-)
-
-class RecordBatchStreamReader(lib._RecordBatchStreamReader):
-    def __init__(
-        self,
-        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
-        *,
-        options: IpcReadOptions | None = None,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> None: ...
-
-class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
-    def __init__(
-        self,
-        sink: str | lib.NativeFile | IOBase,
-        schema: lib.Schema,
-        *,
-        use_legacy_format: bool | None = None,
-        options: IpcWriteOptions | None = None,
-    ) -> None: ...
-
-class RecordBatchFileReader(lib._RecordBatchFileReader):
-    def __init__(
-        self,
-        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
-        footer_offset: int | None = None,
-        *,
-        options: IpcReadOptions | None,
-        memory_pool: lib.MemoryPool | None = None,
-    ) -> None: ...
-
-class RecordBatchFileWriter(lib._RecordBatchFileWriter):
-    def __init__(
-        self,
-        sink: str | lib.NativeFile | IOBase,
-        schema: lib.Schema,
-        *,
-        use_legacy_format: bool | None = None,
-        options: IpcWriteOptions | None = None,
-    ) -> None: ...
-
-def new_stream(
-    sink: str | lib.NativeFile | IOBase,
-    schema: lib.Schema,
-    *,
-    use_legacy_format: bool | None = None,
-    options: IpcWriteOptions | None = None,
-) -> RecordBatchStreamWriter: ...
-def open_stream(
-    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
-    *,
-    options: IpcReadOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> RecordBatchStreamReader: ...
-def new_file(
-    sink: str | lib.NativeFile | IOBase,
-    schema: lib.Schema,
-    *,
-    use_legacy_format: bool | None = None,
-    options: IpcWriteOptions | None = None,
-) -> RecordBatchFileWriter: ...
-def open_file(
-    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
-    footer_offset: int | None = None,
-    *,
-    options: IpcReadOptions | None = None,
-    memory_pool: lib.MemoryPool | None = None,
-) -> RecordBatchFileReader: ...
-def serialize_pandas(
-    df: pd.DataFrame, *, nthreads: int | None = None, preserve_index: bool | None = None
-) -> lib.Buffer: ...
-def deserialize_pandas(buf: lib.Buffer, *, use_threads: bool = True) -> pd.DataFrame: ...
-
-__all__ = [
-    "IpcReadOptions",
-    "IpcWriteOptions",
-    "Message",
-    "MessageReader",
-    "MetadataVersion",
-    "ReadStats",
-    "RecordBatchReader",
-    "WriteStats",
-    "_ReadPandasMixin",
-    "get_record_batch_size",
-    "get_tensor_size",
-    "read_message",
-    "read_record_batch",
-    "read_schema",
-    "read_tensor",
-    "write_tensor",
-    "RecordBatchStreamReader",
-    "RecordBatchStreamWriter",
-    "RecordBatchFileReader",
-    "RecordBatchFileWriter",
-    "new_stream",
-    "open_stream",
-    "new_file",
-    "open_file",
-    "serialize_pandas",
-    "deserialize_pandas",
-]
diff --git a/python/pyarrow/json.pyi b/python/pyarrow/json.pyi
deleted file mode 100644
index 67768db42e4..00000000000
--- a/python/pyarrow/json.pyi
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
-
-__all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/pyarrow/orc.pyi b/python/pyarrow/orc.pyi
deleted file mode 100644
index 557f38a2b9e..00000000000
--- a/python/pyarrow/orc.pyi
+++ /dev/null
@@ -1,296 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-from typing import IO, Literal
-
-from _typeshed import StrPath
-
-from . import _orc
-from ._fs import SupportedFileSystem
-from .lib import KeyValueMetadata, NativeFile, RecordBatch, Schema, Table
-
-class ORCFile:
-    """
-    Reader interface for a single ORC file
-
-    Parameters
-    ----------
-    source : str or pyarrow.NativeFile
-        Readable source. For passing Python file objects or byte buffers,
-        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
-    """
-
-    reader: _orc.ORCReader
-    def __init__(self, source: StrPath | NativeFile | IO) -> None: ...
-    @property
-    def metadata(self) -> KeyValueMetadata:
-        """The file metadata, as an arrow KeyValueMetadata"""
-    @property
-    def schema(self) -> Schema:
-        """The file schema, as an arrow schema"""
-    @property
-    def nrows(self) -> int:
-        """The number of rows in the file"""
-    @property
-    def nstripes(self) -> int:
-        """The number of stripes in the file"""
-    @property
-    def file_version(self) -> str:
-        """Format version of the ORC file, must be 0.11 or 0.12"""
-    @property
-    def software_version(self) -> str:
-        """Software instance and version that wrote this file"""
-    @property
-    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]:
-        """Compression codec of the file"""
-    @property
-    def compression_size(self) -> int:
-        """Number of bytes to buffer for the compression codec in the file"""
-    @property
-    def writer(self) -> str:
-        """Name of the writer that wrote this file.
-        If the writer is unknown then its Writer ID
-        (a number) is returned"""
-    @property
-    def writer_version(self) -> str:
-        """Version of the writer"""
-    @property
-    def row_index_stride(self) -> int:
-        """Number of rows per an entry in the row index or 0
-        if there is no row index"""
-    @property
-    def nstripe_statistics(self) -> int:
-        """Number of stripe statistics"""
-    @property
-    def content_length(self) -> int:
-        """Length of the data stripes in the file in bytes"""
-    @property
-    def stripe_statistics_length(self) -> int:
-        """The number of compressed bytes in the file stripe statistics"""
-    @property
-    def file_footer_length(self) -> int:
-        """The number of compressed bytes in the file footer"""
-    @property
-    def file_postscript_length(self) -> int:
-        """The number of bytes in the file postscript"""
-    @property
-    def file_length(self) -> int:
-        """The number of bytes in the file"""
-    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch:
-        """Read a single stripe from the file.
-
-        Parameters
-        ----------
-        n : int
-            The stripe index
-        columns : list
-            If not None, only these columns will be read from the stripe. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-            Content of the stripe as a RecordBatch.
-        """
-    def read(self, columns: list[str] | None = None) -> Table:
-        """Read the whole file.
-
-        Parameters
-        ----------
-        columns : list
-            If not None, only these columns will be read from the file. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'. Output always follows the
-            ordering of the file and not the `columns` list.
-
-        Returns
-        -------
-        pyarrow.Table
-            Content of the file as a Table.
-        """
-
-class ORCWriter:
-    """
-    Writer interface for a single ORC file
-
-    Parameters
-    ----------
-    where : str or pyarrow.io.NativeFile
-        Writable target. For passing Python file objects or byte buffers,
-        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
-        or pyarrow.io.FixedSizeBufferWriter.
-    file_version : {"0.11", "0.12"}, default "0.12"
-        Determine which ORC file version to use.
-        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
-        is the older version
-        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
-        is the newer one.
-    batch_size : int, default 1024
-        Number of rows the ORC writer writes at a time.
-    stripe_size : int, default 64 * 1024 * 1024
-        Size of each ORC stripe in bytes.
-    compression : string, default 'uncompressed'
-        The compression codec.
-        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
-        Note that LZ0 is currently not supported.
-    compression_block_size : int, default 64 * 1024
-        Size of each compression block in bytes.
-    compression_strategy : string, default 'speed'
-        The compression strategy i.e. speed vs size reduction.
-        Valid values: {'SPEED', 'COMPRESSION'}
-    row_index_stride : int, default 10000
-        The row index stride i.e. the number of rows per
-        an entry in the row index.
-    padding_tolerance : double, default 0.0
-        The padding tolerance.
-    dictionary_key_size_threshold : double, default 0.0
-        The dictionary key size threshold. 0 to disable dictionary encoding.
-        1 to always enable dictionary encoding.
-    bloom_filter_columns : None, set-like or list-like, default None
-        Columns that use the bloom filter.
-    bloom_filter_fpp : double, default 0.05
-        Upper limit of the false-positive rate of the bloom filter.
-    """
-
-    writer: _orc.ORCWriter
-    is_open: bool
-    def __init__(
-        self,
-        where: StrPath | NativeFile | IO,
-        *,
-        file_version: str = "0.12",
-        batch_size: int = 1024,
-        stripe_size: int = 64 * 1024 * 1024,
-        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
-        compression_block_size: int = 65536,
-        compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
-        row_index_stride: int = 10000,
-        padding_tolerance: float = 0.0,
-        dictionary_key_size_threshold: float = 0.0,
-        bloom_filter_columns: list[int] | None = None,
-        bloom_filter_fpp: float = 0.05,
-    ): ...
-    def __enter__(self) -> Self: ...
-    def __exit__(self, *args, **kwargs) -> None: ...
-    def write(self, table: Table) -> None:
-        """
-        Write the table into an ORC file. The schema of the table must
-        be equal to the schema used when opening the ORC file.
-
-        Parameters
-        ----------
-        table : pyarrow.Table
-            The table to be written into the ORC file
-        """
-    def close(self) -> None:
-        """
-        Close the ORC file
-        """
-
-def read_table(
-    source: StrPath | NativeFile | IO,
-    columns: list[str] | None = None,
-    filesystem: SupportedFileSystem | None = None,
-) -> Table:
-    """
-    Read a Table from an ORC file.
-
-    Parameters
-    ----------
-    source : str, pyarrow.NativeFile, or file-like object
-        If a string passed, can be a single file name. For file-like objects,
-        only read a single file. Use pyarrow.BufferReader to read a file
-        contained in a bytes or buffer-like object.
-    columns : list
-        If not None, only these columns will be read from the file. A column
-        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
-        'a.c', and 'a.d.e'. Output always follows the ordering of the file and
-        not the `columns` list. If empty, no columns will be read. Note
-        that the table will still have the correct num_rows set despite having
-        no columns.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    """
-
-def write_table(
-    table: Table,
-    where: StrPath | NativeFile | IO,
-    *,
-    file_version: str = "0.12",
-    batch_size: int = 1024,
-    stripe_size: int = 64 * 1024 * 1024,
-    compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
-    compression_block_size: int = 65536,
-    compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
-    row_index_stride: int = 10000,
-    padding_tolerance: float = 0.0,
-    dictionary_key_size_threshold: float = 0.0,
-    bloom_filter_columns: list[int] | None = None,
-    bloom_filter_fpp: float = 0.05,
-) -> None:
-    """
-    Write a table into an ORC file.
-
-    Parameters
-    ----------
-    table : pyarrow.lib.Table
-        The table to be written into the ORC file
-    where : str or pyarrow.io.NativeFile
-        Writable target. For passing Python file objects or byte buffers,
-        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
-        or pyarrow.io.FixedSizeBufferWriter.
-    file_version : {"0.11", "0.12"}, default "0.12"
-        Determine which ORC file version to use.
-        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
-        is the older version
-        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
-        is the newer one.
-    batch_size : int, default 1024
-        Number of rows the ORC writer writes at a time.
-    stripe_size : int, default 64 * 1024 * 1024
-        Size of each ORC stripe in bytes.
-    compression : string, default 'uncompressed'
-        The compression codec.
-        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
-        Note that LZ0 is currently not supported.
-    compression_block_size : int, default 64 * 1024
-        Size of each compression block in bytes.
-    compression_strategy : string, default 'speed'
-        The compression strategy i.e. speed vs size reduction.
-        Valid values: {'SPEED', 'COMPRESSION'}
-    row_index_stride : int, default 10000
-        The row index stride i.e. the number of rows per
-        an entry in the row index.
-    padding_tolerance : double, default 0.0
-        The padding tolerance.
-    dictionary_key_size_threshold : double, default 0.0
-        The dictionary key size threshold. 0 to disable dictionary encoding.
-        1 to always enable dictionary encoding.
-    bloom_filter_columns : None, set-like or list-like, default None
-        Columns that use the bloom filter.
-    bloom_filter_fpp : double, default 0.05
-        Upper limit of the false-positive rate of the bloom filter.
-    """
diff --git a/python/pyarrow/pandas_compat.pyi b/python/pyarrow/pandas_compat.pyi
deleted file mode 100644
index 82fcb19ad97..00000000000
--- a/python/pyarrow/pandas_compat.pyi
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, TypedDict, TypeVar
-
-import numpy as np
-import pandas as pd
-
-from pandas import DatetimeTZDtype
-
-from .lib import Array, DataType, Schema, Table
-
-_T = TypeVar("_T")
-
-def get_logical_type_map() -> dict[int, str]: ...
-def get_logical_type(arrow_type: DataType) -> str: ...
-def get_numpy_logical_type_map() -> dict[type[np.generic], str]: ...
-def get_logical_type_from_numpy(pandas_collection) -> str: ...
-def get_extension_dtype_info(column) -> tuple[str, dict[str, Any]]: ...
-
-class _ColumnMetadata(TypedDict):
-    name: str
-    field_name: str
-    pandas_type: int
-    numpy_type: str
-    metadata: dict | None
-
-def get_column_metadata(
-    column: pd.Series | pd.Index, name: str, arrow_type: DataType, field_name: str
-) -> _ColumnMetadata: ...
-def construct_metadata(
-    columns_to_convert: list[pd.Series],
-    df: pd.DataFrame,
-    column_names: list[str],
-    index_levels: list[pd.Index],
-    index_descriptors: list[dict],
-    preserve_index: bool,
-    types: list[DataType],
-    column_field_names: list[str] = ...,
-) -> dict[bytes, bytes]: ...
-def dataframe_to_types(
-    df: pd.DataFrame, preserve_index: bool | None, columns: list[str] | None = None
-) -> tuple[list[str], list[DataType], dict[bytes, bytes]]: ...
-def dataframe_to_arrays(
-    df: pd.DataFrame,
-    schema: Schema,
-    preserve_index: bool | None,
-    nthreads: int = 1,
-    columns: list[str] | None = None,
-    safe: bool = True,
-) -> tuple[Array, Schema, int]: ...
-def get_datetimetz_type(values: _T, dtype, type_) -> tuple[_T, DataType]: ...
-def make_datetimetz(unit: str, tz: str) -> DatetimeTZDtype: ...
-def table_to_dataframe(
-    options, table: Table, categories=None, ignore_metadata: bool = False, types_mapper=None
-) -> pd.DataFrame: ...
-def make_tz_aware(series: pd.Series, tz: str) -> pd.Series: ...
diff --git a/python/pyarrow/substrait.pyi b/python/pyarrow/substrait.pyi
deleted file mode 100644
index b78bbd8aebd..00000000000
--- a/python/pyarrow/substrait.pyi
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow._substrait import (
-    BoundExpressions,
-    SubstraitSchema,
-    deserialize_expressions,
-    deserialize_schema,
-    get_supported_functions,
-    run_query,
-    serialize_expressions,
-    serialize_schema,
-)
-
-__all__ = [
-    "BoundExpressions",
-    "get_supported_functions",
-    "run_query",
-    "deserialize_expressions",
-    "serialize_expressions",
-    "deserialize_schema",
-    "serialize_schema",
-    "SubstraitSchema",
-]
diff --git a/python/pyarrow/util.pyi b/python/pyarrow/util.pyi
deleted file mode 100644
index 5c9687bb83f..00000000000
--- a/python/pyarrow/util.pyi
+++ /dev/null
@@ -1,44 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from collections.abc import Callable
-from os import PathLike
-from typing import Any, Protocol, Sequence, TypeVar
-
-_F = TypeVar("_F", bound=Callable)
-_N = TypeVar("_N")
-
-class _DocStringComponents(Protocol):
-    _docstring_components: list[str]
-
-def doc(
-    *docstrings: str | _DocStringComponents | Callable | None, **params: Any
-) -> Callable[[_F], _F]: ...
-def _is_iterable(obj) -> bool: ...
-def _is_path_like(path) -> bool: ...
-def _stringify_path(path: str | PathLike) -> str: ...
-def product(seq: Sequence[_N]) -> _N: ...
-def get_contiguous_span(
-    shape: tuple[int, ...], strides: tuple[int, ...], itemsize: int
-) -> tuple[int, int]: ...
-def find_free_port() -> int: ...
-def guid() -> str: ...
-def _download_urllib(url, out_path) -> None: ...
-def _download_requests(url, out_path) -> None: ...
-def download_tzdata_on_windows() -> None: ...
-def _deprecate_api(old_name, new_name, api, next_version, type=...): ...
-def _deprecate_class(old_name, new_class, next_version, instancecheck=True): ...

From a7ca3d2c9c490383ee889abd8972f7c5f4cc46e9 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 12 Sep 2025 15:32:41 +0200
Subject: [PATCH 08/26] Remove overloads from stubs and other things

---
 python/pyarrow/__init__.pyi          |  232 +--
 python/pyarrow/__lib_pxi/array.pyi   | 2281 ++++++++------------------
 python/pyarrow/__lib_pxi/io.pyi      |  347 ++--
 python/pyarrow/__lib_pxi/memory.pyi  |    7 +-
 python/pyarrow/__lib_pxi/scalar.pyi  | 1055 +++++-------
 python/pyarrow/__lib_pxi/tensor.pyi  |   51 +-
 python/pyarrow/__lib_pxi/types.pyi   |  915 +++++------
 python/pyarrow/_stubs_typing.pyi     |   17 +-
 python/pyarrow/array.pxi             |    2 +-
 python/pyarrow/lib.pyi               |   71 +-
 python/pyarrow/scalar.pxi            |    2 +-
 python/pyarrow/tests/strategies.py   |   38 +-
 python/pyarrow/tests/test_array.py   |    8 +-
 python/pyarrow/tests/test_compute.py |  323 ++--
 python/pyarrow/tests/test_io.py      |   20 +-
 python/pyarrow/types.pyi             |  506 +++++-
 python/pyproject.toml                |   13 -
 17 files changed, 2434 insertions(+), 3454 deletions(-)

diff --git a/python/pyarrow/__init__.pyi b/python/pyarrow/__init__.pyi
index ed1cad1bf80..d366d1793ff 100644
--- a/python/pyarrow/__init__.pyi
+++ b/python/pyarrow/__init__.pyi
@@ -15,34 +15,43 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# ruff: noqa: F401, I001, E402
 __version__: str
 
 import pyarrow.lib as _lib
 
 _gc_enabled: bool
 
+# TODO
 from pyarrow.lib import (
-    BuildInfo,
-    RuntimeInfo,
-    set_timezone_db_path,
+    # BuildInfo,
+    # RuntimeInfo,
+    # set_timezone_db_path,
     MonthDayNano,
-    VersionInfo,
-    cpp_build_info,
-    cpp_version,
-    cpp_version_info,
-    runtime_info,
+    # VersionInfo,
+    # cpp_build_info,
+    # cpp_version,
+    # cpp_version_info,
+    # runtime_info,
     cpu_count,
     set_cpu_count,
-    enable_signal_handlers,
+    # enable_signal_handlers,
     io_thread_count,
     set_io_thread_count,
 )
 
-def show_versions() -> None: ...
-def show_info() -> None: ...
+def show_versions() -> None:
+    """
+    Print various version information, to help with error reporting.
+    """
+def show_info() -> None:
+    """
+    Print detailed version and platform information, for error reporting
+    """
 def _module_is_available(module: str) -> bool: ...
 def _filesystem_is_available(fs: str) -> bool: ...
 
+# TODO
 from pyarrow.lib import (
     null,
     bool_,
@@ -123,7 +132,6 @@ from pyarrow.lib import (
     UuidType,
     JsonType,
     OpaqueType,
-    PyExtensionType,
     UnknownExtensionType,
     register_extension_type,
     unregister_extension_type,
@@ -136,8 +144,8 @@ from pyarrow.lib import (
     Array,
     Tensor,
     array,
-    chunked_array,
-    record_batch,
+    # chunked_array,
+    # record_batch,
     nulls,
     repeat,
     SparseCOOTensor,
@@ -249,7 +257,7 @@ from pyarrow.lib import (
 )
 
 # Buffers, allocation
-from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager
+# from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager
 
 from pyarrow.lib import (
     Buffer,
@@ -303,79 +311,108 @@ from pyarrow.lib import (
     have_libhdfs,
 )
 
+# TODO
 from pyarrow.lib import (
-    ChunkedArray,
-    RecordBatch,
-    Table,
-    table,
+    # ChunkedArray,
+    # RecordBatch,
+    # Table,
+    # table,
     concat_arrays,
-    concat_tables,
-    TableGroupBy,
-    RecordBatchReader,
+    # concat_tables,
+    # TableGroupBy,
+    # RecordBatchReader,
 )
 
 # Exceptions
-from pyarrow.lib import (
-    ArrowCancelled,
-    ArrowCapacityError,
-    ArrowException,
-    ArrowKeyError,
-    ArrowIndexError,
-    ArrowInvalid,
-    ArrowIOError,
-    ArrowMemoryError,
-    ArrowNotImplementedError,
-    ArrowTypeError,
-    ArrowSerializationError,
-)
+# from pyarrow.lib import (
+#     ArrowCancelled,
+#     ArrowCapacityError,
+#     ArrowException,
+#     ArrowKeyError,
+#     ArrowIndexError,
+#     ArrowInvalid,
+#     ArrowIOError,
+#     ArrowMemoryError,
+#     ArrowNotImplementedError,
+#     ArrowTypeError,
+#     ArrowSerializationError,
+# )
 
-from pyarrow.ipc import serialize_pandas, deserialize_pandas
-import pyarrow.ipc as ipc
+# TODO
+# from ipc import serialize_pandas, deserialize_pandas
+# import ipc as ipc
 
-import pyarrow.types as types
+import types as types
 
 # ----------------------------------------------------------------------
 # Deprecations
 
-from pyarrow.util import _deprecate_api, _deprecate_class
+# from util import _deprecate_api, _deprecate_class
 
-from pyarrow.ipc import (
-    Message,
-    MessageReader,
-    MetadataVersion,
-    RecordBatchFileReader,
-    RecordBatchFileWriter,
-    RecordBatchStreamReader,
-    RecordBatchStreamWriter,
-)
+# TODO
+# from pyarrow.ipc import (
+#     Message,
+#     MessageReader,
+#     MetadataVersion,
+#     RecordBatchFileReader,
+#     RecordBatchFileWriter,
+#     RecordBatchStreamReader,
+#     RecordBatchStreamWriter,
+# )
 
 # ----------------------------------------------------------------------
 # Returning absolute path to the pyarrow include directory (if bundled, e.g. in
 # wheels)
-def get_include() -> str: ...
+def get_include() -> str:
+    """
+    Return absolute path to directory containing Arrow C++ include
+    headers. Similar to numpy.get_include
+    """
 def _get_pkg_config_executable() -> str: ...
 def _has_pkg_config(pkgname: str) -> bool: ...
 def _read_pkg_config_variable(pkgname: str, cli_args: list[str]) -> str: ...
-def get_libraries() -> list[str]: ...
-def create_library_symlinks() -> None: ...
-def get_library_dirs() -> list[str]: ...
+def get_libraries() -> list[str]:
+    """
+    Return list of library names to include in the `libraries` argument for C
+    or Cython extensions using pyarrow
+    """
+def create_library_symlinks() -> None:
+    """
+    With Linux and macOS wheels, the bundled shared libraries have an embedded
+    ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them
+    with -larrow won't work unless we create symlinks at locations like
+    site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses
+    prior problems we had with shipping two copies of the shared libraries to
+    permit third party projects like turbodbc to build their C++ extensions
+    against the pyarrow wheels.
+
+    This function must only be invoked once and only when the shared libraries
+    are bundled with the Python package, which should only apply to wheel-based
+    installs. It requires write access to the site-packages/pyarrow directory
+    and so depending on your system may need to be run with root.
+    """
+def get_library_dirs() -> list[str]:
+    """
+    Return lists of directories likely to contain Arrow C++ libraries for
+    linking C or Cython extensions using pyarrow
+    """
 
 __all__ = [
     "__version__",
     "_lib",
     "_gc_enabled",
-    "BuildInfo",
-    "RuntimeInfo",
-    "set_timezone_db_path",
+    # "BuildInfo",
+    # "RuntimeInfo",
+    # "set_timezone_db_path",
     "MonthDayNano",
-    "VersionInfo",
-    "cpp_build_info",
-    "cpp_version",
-    "cpp_version_info",
-    "runtime_info",
+    # "VersionInfo",
+    # "cpp_build_info",
+    # "cpp_version",
+    # "cpp_version_info",
+    # "runtime_info",
     "cpu_count",
     "set_cpu_count",
-    "enable_signal_handlers",
+    # "enable_signal_handlers",
     "io_thread_count",
     "set_io_thread_count",
     "show_versions",
@@ -461,7 +498,6 @@ __all__ = [
     "UuidType",
     "JsonType",
     "OpaqueType",
-    "PyExtensionType",
     "UnknownExtensionType",
     "register_extension_type",
     "unregister_extension_type",
@@ -474,8 +510,8 @@ __all__ = [
     "Array",
     "Tensor",
     "array",
-    "chunked_array",
-    "record_batch",
+    # "chunked_array",
+    # "record_batch",
     "nulls",
     "repeat",
     "SparseCOOTensor",
@@ -584,10 +620,10 @@ __all__ = [
     "UuidScalar",
     "JsonScalar",
     "OpaqueScalar",
-    "DeviceAllocationType",
-    "Device",
-    "MemoryManager",
-    "default_cpu_memory_manager",
+    # "DeviceAllocationType",
+    # "Device",
+    # "MemoryManager",
+    # "default_cpu_memory_manager",
     "Buffer",
     "ResizableBuffer",
     "foreign_buffer",
@@ -630,38 +666,38 @@ __all__ = [
     "input_stream",
     "output_stream",
     "have_libhdfs",
-    "ChunkedArray",
-    "RecordBatch",
-    "Table",
-    "table",
+    # "ChunkedArray",
+    # "RecordBatch",
+    # "Table",
+    # "table",
     "concat_arrays",
-    "concat_tables",
-    "TableGroupBy",
-    "RecordBatchReader",
-    "ArrowCancelled",
-    "ArrowCapacityError",
-    "ArrowException",
-    "ArrowKeyError",
-    "ArrowIndexError",
-    "ArrowInvalid",
-    "ArrowIOError",
-    "ArrowMemoryError",
-    "ArrowNotImplementedError",
-    "ArrowTypeError",
-    "ArrowSerializationError",
-    "serialize_pandas",
-    "deserialize_pandas",
-    "ipc",
+    # "concat_tables",
+    # "TableGroupBy",
+    # "RecordBatchReader",
+    # "ArrowCancelled",
+    # "ArrowCapacityError",
+    # "ArrowException",
+    # "ArrowKeyError",
+    # "ArrowIndexError",
+    # "ArrowInvalid",
+    # "ArrowIOError",
+    # "ArrowMemoryError",
+    # "ArrowNotImplementedError",
+    # "ArrowTypeError",
+    # "ArrowSerializationError",
+    # "serialize_pandas",
+    # "deserialize_pandas",
+    # "ipc",
     "types",
-    "_deprecate_api",
-    "_deprecate_class",
-    "Message",
-    "MessageReader",
-    "MetadataVersion",
-    "RecordBatchFileReader",
-    "RecordBatchFileWriter",
-    "RecordBatchStreamReader",
-    "RecordBatchStreamWriter",
+    # "_deprecate_api",
+    # "_deprecate_class",
+    # "Message",
+    # "MessageReader",
+    # "MetadataVersion",
+    # "RecordBatchFileReader",
+    # "RecordBatchFileWriter",
+    # "RecordBatchStreamReader",
+    # "RecordBatchStreamWriter",
     "get_include",
     "_get_pkg_config_executable",
     "_has_pkg_config",
diff --git a/python/pyarrow/__lib_pxi/array.pyi b/python/pyarrow/__lib_pxi/array.pyi
index c14cd1b8c44..c6e8dfecb62 100644
--- a/python/pyarrow/__lib_pxi/array.pyi
+++ b/python/pyarrow/__lib_pxi/array.pyi
@@ -15,11 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import datetime as dt
 import sys
 
 from collections.abc import Callable
-from decimal import Decimal
 
 if sys.version_info >= (3, 11):
     from typing import Self
@@ -31,16 +29,14 @@ from typing import (
     Iterable,
     Iterator,
     Literal,
-    LiteralString,
     TypeVar,
-    overload,
 )
 
 import numpy as np
 import pandas as pd
 
 from pandas.core.dtypes.base import ExtensionDtype
-from pyarrow._compute import CastOptions
+from pyarrow._compute import CastOptions  # type: ignore[import-not-found]
 from pyarrow._stubs_typing import (
     ArrayLike,
     Indices,
@@ -49,25 +45,23 @@ from pyarrow._stubs_typing import (
     SupportArrowArray,
     SupportArrowDeviceArray,
 )
-from pyarrow.lib import (
+from pyarrow.lib import ( # type: ignore[attr-defined]
     Buffer,
-    Device,
-    MemoryManager,
+    Device,  # type: ignore[reportAttributeAccessIssue]
+    MemoryManager,  # type: ignore[reportAttributeAccessIssue]
     MemoryPool,
-    MonthDayNano,
     Tensor,
     _Weakrefable,
 )
 from typing_extensions import deprecated
 
 from . import scalar, types
-from .device import DeviceAllocationType
-from .scalar import NullableCollection, Scalar
+from .device import DeviceAllocationType  # type: ignore[import-not-found]
+from .scalar import Scalar
 from .types import (
     DataType,
     Field,
     MapType,
-    ListType,
     _AsPyType,
     _BasicDataType,
     _BasicValueT,
@@ -76,1308 +70,185 @@ from .types import (
     _RunEndType,
     _Size,
 )
+from .._stubs_typing import NullableCollection
 
-@overload
 def array(
-    values: NullableCollection[bool],
-    type: None = None,
+    values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: Any | None = None,
     mask: Mask | None = None,
     size: int | None = None,
     from_pandas: bool | None = None,
     safe: bool = True,
     memory_pool: MemoryPool | None = None,
-) -> BooleanArray: ...
-@overload
-def array(
-    values: NullableCollection[int],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Int64Array: ...
-@overload
-def array(
-    values: NullableCollection[float],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> DoubleArray: ...
-@overload
-def array(
-    values: NullableCollection[Decimal],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Decimal128Array: ...
-@overload
-def array(
-    values: NullableCollection[dict[str, Any]],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> StructArray: ...
-@overload
-def array(
-    values: NullableCollection[dt.date],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Date32Array: ...
-@overload
-def array(
-    values: NullableCollection[dt.time],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Time64Array[Literal["us"]]: ...
-@overload
-def array(
-    values: NullableCollection[dt.timedelta],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> DurationArray[Literal["us"]]: ...
-@overload
-def array(
-    values: NullableCollection[MonthDayNano],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> MonthDayNanoIntervalArray: ...
-@overload
-def array(
-    values: NullableCollection[str],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> StringArray: ...
-@overload
-def array(
-    values: NullableCollection[bytes],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryArray: ...
-@overload
-def array(
-    values: NullableCollection[list[Any]],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> ListArray[Any]: ...
-@overload
-def array(
-    values: NullableCollection[_ScalarT],
-    type: None = None,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Array[_ScalarT]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["null"] | types.NullType,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> NullArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["bool", "boolean"] | types.BoolType,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> BooleanArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["i1", "int8"] | types.Int8Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Int8Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["i2", "int16"] | types.Int16Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Int16Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["i4", "int32"] | types.Int32Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Int32Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["i8", "int64"] | types.Int64Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Int64Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["u1", "uint8"] | types.UInt8Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> UInt8Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["u2", "uint16"] | types.UInt16Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> UInt16Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["u4", "uint32"] | types.Uint32Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> UInt32Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["u8", "uint64"] | types.UInt64Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> UInt64Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["f2", "halffloat", "float16"] | types.Float16Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> HalfFloatArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["f4", "float", "float32"] | types.Float32Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> FloatArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["f8", "double", "float64"] | types.Float64Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> DoubleArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["string", "str", "utf8"] | types.StringType,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> StringArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["binary"] | types.BinaryType,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["large_string", "large_str", "large_utf8"] | types.LargeStringType,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> LargeStringArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["large_binary"] | types.LargeBinaryType,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> LargeBinaryArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["binary_view"] | types.BinaryViewType,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryViewArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["string_view"] | types.StringViewType,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> StringViewArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["date32", "date32[day]"] | types.Date32Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Date32Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["date64", "date64[ms]"] | types.Date64Type,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Date64Array: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["time32[s]"] | types.Time32Type[Literal["s"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Time32Array[Literal["s"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["time32[ms]"] | types.Time32Type[Literal["ms"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Time32Array[Literal["ms"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["time64[us]"] | types.Time64Type[Literal["us"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Time64Array[Literal["us"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["time64[ns]"] | types.Time64Type[Literal["ns"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Time64Array[Literal["ns"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["timestamp[s]"] | types.TimestampType[Literal["s"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> TimestampArray[Literal["s"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["timestamp[ms]"] | types.TimestampType[Literal["ms"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> TimestampArray[Literal["ms"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["timestamp[us]"] | types.TimestampType[Literal["us"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> TimestampArray[Literal["us"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["duration[s]"] | types.DurationType[Literal["s"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> DurationArray[Literal["s"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["duration[ms]"] | types.DurationType[Literal["ms"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> DurationArray[Literal["ms"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["duration[us]"] | types.DurationType[Literal["us"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> DurationArray[Literal["us"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["duration[ns]"] | types.DurationType[Literal["ns"]],
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> DurationArray[Literal["ns"]]: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["month_day_nano_interval"] | types.MonthDayNanoIntervalType,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> MonthDayNanoIntervalArray: ...
-@overload
-def array(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: _DataTypeT,
-    mask: Mask | None = None,
-    size: int | None = None,
-    from_pandas: bool | None = None,
-    safe: bool = True,
-    memory_pool: MemoryPool | None = None,
-) -> Array[Scalar[_DataTypeT]]: ...
-def array(*args, **kawrgs):
-    """
-    Create pyarrow.Array instance from a Python object.
-
-    Parameters
-    ----------
-    obj : sequence, iterable, ndarray, pandas.Series, Arrow-compatible array
-        If both type and size are specified may be a single use iterable. If
-        not strongly-typed, Arrow type will be inferred for resulting array.
-        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
-        (has an ``__arrow_c_array__`` or ``__arrow_c_device_array__`` method)
-        can be passed as well.
-    type : pyarrow.DataType
-        Explicit type to attempt to coerce to, otherwise will be inferred from
-        the data.
-    mask : array[bool], optional
-        Indicate which values are null (True) or not null (False).
-    size : int64, optional
-        Size of the elements. If the input is larger than size bail at this
-        length. For iterators, if size is larger than the input iterator this
-        will be treated as a "max size", but will involve an initial allocation
-        of size followed by a resize to the actual size (so if you know the
-        exact size specifying it correctly will give you better performance).
-    from_pandas : bool, default None
-        Use pandas's semantics for inferring nulls from values in
-        ndarray-like data. If passed, the mask tasks precedence, but
-        if a value is unmasked (not-null), but still null according to
-        pandas semantics, then it is null. Defaults to False if not
-        passed explicitly by user, or True if a pandas object is
-        passed in.
-    safe : bool, default True
-        Check for overflows or other unsafe conversions.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the currently-set default
-        memory pool.
-
-    Returns
-    -------
-    array : pyarrow.Array or pyarrow.ChunkedArray
-        A ChunkedArray instead of an Array is returned if:
-
-        - the object data overflowed binary storage.
-        - the object's ``__arrow_array__`` protocol method returned a chunked
-          array.
-
-    Notes
-    -----
-    Timezone will be preserved in the returned array for timezone-aware data,
-    else no timezone will be returned for naive timestamps.
-    Internally, UTC values are stored for timezone-aware data with the
-    timezone set in the data type.
-
-    Pandas's DateOffsets and dateutil.relativedelta.relativedelta are by
-    default converted as MonthDayNanoIntervalArray. relativedelta leapdays
-    are ignored as are all absolute fields on both objects. datetime.timedelta
-    can also be converted to MonthDayNanoIntervalArray but this requires
-    passing MonthDayNanoIntervalType explicitly.
-
-    Converting to dictionary array will promote to a wider integer type for
-    indices if the number of distinct values cannot be represented, even if
-    the index type was explicitly set. This means that if there are more than
-    127 values the returned dictionary array's index type will be at least
-    pa.int16() even if pa.int8() was passed to the function. Note that an
-    explicit index type will not be demoted even if it is wider than required.
-
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> import pyarrow as pa
-    >>> pa.array(pd.Series([1, 2]))
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      1,
-      2
-    ]
-
-    >>> pa.array(["a", "b", "a"], type=pa.dictionary(pa.int8(), pa.string()))
-    <pyarrow.lib.DictionaryArray object at ...>
-    ...
-    -- dictionary:
-      [
-        "a",
-        "b"
-      ]
-    -- indices:
-      [
-        0,
-        1,
-        0
-      ]
-
-    >>> import numpy as np
-    >>> pa.array(pd.Series([1, 2]), mask=np.array([0, 1], dtype=bool))
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      1,
-      null
-    ]
-
-    >>> arr = pa.array(range(1024), type=pa.dictionary(pa.int8(), pa.int64()))
-    >>> arr.type.index_type
-    DataType(int16)
-    """
-
-@overload
-def asarray(values: NullableCollection[bool]) -> BooleanArray: ...
-@overload
-def asarray(values: NullableCollection[int]) -> Int64Array: ...
-@overload
-def asarray(values: NullableCollection[float]) -> DoubleArray: ...
-@overload
-def asarray(values: NullableCollection[Decimal]) -> Decimal128Array: ...
-@overload
-def asarray(values: NullableCollection[dict[str, Any]]) -> StructArray: ...
-@overload
-def asarray(values: NullableCollection[dt.date]) -> Date32Array: ...
-@overload
-def asarray(values: NullableCollection[dt.time]) -> Time64Array: ...
-@overload
-def asarray(values: NullableCollection[dt.timedelta]) -> DurationArray: ...
-@overload
-def asarray(values: NullableCollection[MonthDayNano]) -> MonthDayNanoIntervalArray: ...
-@overload
-def asarray(values: NullableCollection[list[Any]]) -> ListArray[Any]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["null"] | types.NullType,
-) -> NullArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["bool", "boolean"] | types.BoolType,
-) -> BooleanArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["i1", "int8"] | types.Int8Type,
-) -> Int8Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["i2", "int16"] | types.Int16Type,
-) -> Int16Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["i4", "int32"] | types.Int32Type,
-) -> Int32Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["i8", "int64"] | types.Int64Type,
-) -> Int64Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["u1", "uint8"] | types.UInt8Type,
-) -> UInt8Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["u2", "uint16"] | types.UInt16Type,
-) -> UInt16Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["u4", "uint32"] | types.Uint32Type,
-) -> UInt32Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["u8", "uint64"] | types.UInt64Type,
-) -> UInt64Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["f2", "halffloat", "float16"] | types.Float16Type,
-) -> HalfFloatArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["f4", "float", "float32"] | types.Float32Type,
-) -> FloatArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["f8", "double", "float64"] | types.Float64Type,
-) -> DoubleArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["string", "str", "utf8"] | types.StringType,
-) -> StringArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["binary"] | types.BinaryType,
-) -> BinaryArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["large_string", "large_str", "large_utf8"] | types.LargeStringType,
-) -> LargeStringArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["large_binary"] | types.LargeBinaryType,
-) -> LargeBinaryArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["binary_view"] | types.BinaryViewType,
-) -> BinaryViewArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["string_view"] | types.StringViewType,
-) -> StringViewArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["date32", "date32[day]"] | types.Date32Type,
-) -> Date32Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["date64", "date64[ms]"] | types.Date64Type,
-) -> Date64Array: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["time32[s]"] | types.Time32Type[Literal["s"]],
-) -> Time32Array[Literal["s"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["time32[ms]"] | types.Time32Type[Literal["ms"]],
-) -> Time32Array[Literal["ms"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["time64[us]"] | types.Time64Type[Literal["us"]],
-) -> Time64Array[Literal["us"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["time64[ns]"] | types.Time64Type[Literal["ns"]],
-) -> Time64Array[Literal["ns"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["timestamp[s]"] | types.TimestampType[Literal["s"]],
-) -> TimestampArray[Literal["s"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["timestamp[ms]"] | types.TimestampType[Literal["ms"]],
-) -> TimestampArray[Literal["ms"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["timestamp[us]"] | types.TimestampType[Literal["us"]],
-) -> TimestampArray[Literal["us"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["timestamp[ns]"] | types.TimestampType[Literal["ns"]],
-) -> TimestampArray[Literal["ns"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["duration[s]"] | types.DurationType[Literal["s"]],
-) -> DurationArray[Literal["s"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["duration[ms]"] | types.DurationType[Literal["ms"]],
-) -> DurationArray[Literal["ms"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["duration[us]"] | types.DurationType[Literal["us"]],
-) -> DurationArray[Literal["us"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["duration[ns]"] | types.DurationType[Literal["ns"]],
-) -> DurationArray[Literal["ns"]]: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: Literal["month_day_nano_interval"] | types.MonthDayNanoIntervalType,
-) -> MonthDayNanoIntervalArray: ...
-@overload
-def asarray(
-    values: Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
-    type: _DataTypeT,
-) -> Array[Scalar[_DataTypeT]]: ...
-def asarray(*args, **kwargs):
-    """
-    Convert to pyarrow.Array, inferring type if not provided.
-
-    Parameters
-    ----------
-    values : array-like
-        This can be a sequence, numpy.ndarray, pyarrow.Array or
-        pyarrow.ChunkedArray. If a ChunkedArray is passed, the output will be
-        a ChunkedArray, otherwise the output will be a Array.
-    type : string or DataType
-        Explicitly construct the array with this type. Attempt to cast if
-        indicated type is different.
-
-    Returns
-    -------
-    arr : Array or ChunkedArray
-    """
-
-@overload
-def nulls(size: int, memory_pool: MemoryPool | None = None) -> NullArray: ...
-@overload
-def nulls(
-    size: int, type: types.NullType | None, memory_pool: MemoryPool | None = None
-) -> NullArray: ...
-@overload
-def nulls(
-    size: int, type: types.BoolType, memory_pool: MemoryPool | None = None
-) -> BooleanArray: ...
-@overload
-def nulls(size: int, type: types.Int8Type, memory_pool: MemoryPool | None = None) -> Int8Array: ...
-@overload
-def nulls(
-    size: int, type: types.Int16Type, memory_pool: MemoryPool | None = None
-) -> Int16Array: ...
-@overload
-def nulls(
-    size: int, type: types.Int32Type, memory_pool: MemoryPool | None = None
-) -> Int32Array: ...
-@overload
-def nulls(
-    size: int, type: types.Int64Type, memory_pool: MemoryPool | None = None
-) -> Int64Array: ...
-@overload
-def nulls(
-    size: int, type: types.UInt8Type, memory_pool: MemoryPool | None = None
-) -> UInt8Array: ...
-@overload
-def nulls(
-    size: int, type: types.UInt16Type, memory_pool: MemoryPool | None = None
-) -> UInt16Array: ...
-@overload
-def nulls(
-    size: int, type: types.Uint32Type, memory_pool: MemoryPool | None = None
-) -> UInt32Array: ...
-@overload
-def nulls(
-    size: int, type: types.UInt64Type, memory_pool: MemoryPool | None = None
-) -> UInt64Array: ...
-@overload
-def nulls(
-    size: int, type: types.Float16Type, memory_pool: MemoryPool | None = None
-) -> HalfFloatArray: ...
-@overload
-def nulls(
-    size: int, type: types.Float32Type, memory_pool: MemoryPool | None = None
-) -> FloatArray: ...
-@overload
-def nulls(
-    size: int, type: types.Float64Type, memory_pool: MemoryPool | None = None
-) -> DoubleArray: ...
-@overload
-def nulls(
-    size: int, type: types.Decimal32Type, memory_pool: MemoryPool | None = None
-) -> Decimal128Array: ...
-@overload
-def nulls(
-    size: int, type: types.Decimal64Type, memory_pool: MemoryPool | None = None
-) -> Decimal128Array: ...
-@overload
-def nulls(
-    size: int, type: types.Decimal128Type, memory_pool: MemoryPool | None = None
-) -> Decimal128Array: ...
-@overload
-def nulls(
-    size: int, type: types.Decimal256Type, memory_pool: MemoryPool | None = None
-) -> Decimal256Array: ...
-@overload
-def nulls(
-    size: int, type: types.Date32Type, memory_pool: MemoryPool | None = None
-) -> Date32Array: ...
-@overload
-def nulls(
-    size: int, type: types.Date64Type, memory_pool: MemoryPool | None = None
-) -> Date64Array: ...
-@overload
-def nulls(
-    size: int, type: types.Time32Type[types._Time32Unit], memory_pool: MemoryPool | None = None
-) -> Time32Array[types._Time32Unit]: ...
-@overload
-def nulls(
-    size: int, type: types.Time64Type[types._Time64Unit], memory_pool: MemoryPool | None = None
-) -> Time64Array[types._Time64Unit]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.TimestampType[types._Unit, types._Tz],
-    memory_pool: MemoryPool | None = None,
-) -> TimestampArray[types._Unit, types._Tz]: ...
-@overload
-def nulls(
-    size: int, type: types.DurationType[types._Unit], memory_pool: MemoryPool | None = None
-) -> DurationArray[types._Unit]: ...
-@overload
-def nulls(
-    size: int, type: types.MonthDayNanoIntervalType, memory_pool: MemoryPool | None = None
-) -> MonthDayNanoIntervalArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.BinaryType,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.LargeBinaryType,
-    memory_pool: MemoryPool | None = None,
-) -> LargeBinaryArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.FixedSizeBinaryType,
-    memory_pool: MemoryPool | None = None,
-) -> FixedSizeBinaryArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.StringType,
-    memory_pool: MemoryPool | None = None,
-) -> StringArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.LargeStringType,
-    memory_pool: MemoryPool | None = None,
-) -> LargeStringArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.BinaryViewType,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryViewArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.StringViewType,
-    memory_pool: MemoryPool | None = None,
-) -> StringViewArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.LargeListType[_DataTypeT],
-    memory_pool: MemoryPool | None = None,
-) -> LargeListArray[_DataTypeT]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.ListViewType[_DataTypeT],
-    memory_pool: MemoryPool | None = None,
-) -> ListViewArray[_DataTypeT]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.LargeListViewType[_DataTypeT],
-    memory_pool: MemoryPool | None = None,
-) -> LargeListViewArray[_DataTypeT]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.FixedSizeListType[_DataTypeT, _Size],
-    memory_pool: MemoryPool | None = None,
-) -> FixedSizeListArray[_DataTypeT, _Size]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.ListType[_DataTypeT],
-    memory_pool: MemoryPool | None = None,
-) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.StructType,
-    memory_pool: MemoryPool | None = None,
-) -> StructArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.MapType[_MapKeyT, _MapItemT],
-    memory_pool: MemoryPool | None = None,
-) -> MapArray[_MapKeyT, _MapItemT]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.DictionaryType[_IndexT, _BasicValueT],
-    memory_pool: MemoryPool | None = None,
-) -> DictionaryArray[_IndexT, _BasicValueT]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.RunEndEncodedType[_RunEndType, _BasicValueT],
-    memory_pool: MemoryPool | None = None,
-) -> RunEndEncodedArray[_RunEndType, _BasicValueT]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.UnionType,
-    memory_pool: MemoryPool | None = None,
-) -> UnionArray: ...
-@overload
-def nulls(
-    size: int,
-    type: types.FixedShapeTensorType[types._ValueT],
-    memory_pool: MemoryPool | None = None,
-) -> FixedShapeTensorArray[Any]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.Bool8Type,
-    memory_pool: MemoryPool | None = None,
-) -> Bool8Array: ...
-@overload
-def nulls(
-    size: int,
-    type: types.UuidType,
-    memory_pool: MemoryPool | None = None,
-) -> UuidArray[Any]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.JsonType,
-    memory_pool: MemoryPool | None = None,
-) -> JsonArray[Any]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.OpaqueType,
-    memory_pool: MemoryPool | None = None,
-) -> OpaqueArray[Any]: ...
-@overload
-def nulls(
-    size: int,
-    type: types.ExtensionType,
-    memory_pool: MemoryPool | None = None,
-) -> ExtensionArray[Any]: ...
-def nulls(*args, **kwargs):
+) -> ArrayLike:
     """
-    Create a strongly-typed Array instance with all elements null.
+    Create pyarrow.Array instance from a Python object.
 
     Parameters
     ----------
-    size : int
-        Array length.
-    type : pyarrow.DataType, default None
-        Explicit type for the array. By default use NullType.
-    memory_pool : MemoryPool, default None
-        Arrow MemoryPool to use for allocations. Uses the default memory
-        pool if not passed.
+    obj : sequence, iterable, ndarray, pandas.Series, Arrow-compatible array
+        If both type and size are specified may be a single use iterable. If
+        not strongly-typed, Arrow type will be inferred for resulting array.
+        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
+        (has an ``__arrow_c_array__`` or ``__arrow_c_device_array__`` method)
+        can be passed as well.
+    type : pyarrow.DataType
+        Explicit type to attempt to coerce to, otherwise will be inferred from
+        the data.
+    mask : array[bool], optional
+        Indicate which values are null (True) or not null (False).
+    size : int64, optional
+        Size of the elements. If the input is larger than size bail at this
+        length. For iterators, if size is larger than the input iterator this
+        will be treated as a "max size", but will involve an initial allocation
+        of size followed by a resize to the actual size (so if you know the
+        exact size specifying it correctly will give you better performance).
+    from_pandas : bool, default None
+        Use pandas's semantics for inferring nulls from values in
+        ndarray-like data. If passed, the mask tasks precedence, but
+        if a value is unmasked (not-null), but still null according to
+        pandas semantics, then it is null. Defaults to False if not
+        passed explicitly by user, or True if a pandas object is
+        passed in.
+    safe : bool, default True
+        Check for overflows or other unsafe conversions.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the currently-set default
+        memory pool.
 
     Returns
     -------
-    arr : Array
+    array : pyarrow.Array or pyarrow.ChunkedArray
+        A ChunkedArray instead of an Array is returned if:
+
+        - the object data overflowed binary storage.
+        - the object's ``__arrow_array__`` protocol method returned a chunked
+          array.
+
+    Notes
+    -----
+    Timezone will be preserved in the returned array for timezone-aware data,
+    else no timezone will be returned for naive timestamps.
+    Internally, UTC values are stored for timezone-aware data with the
+    timezone set in the data type.
+
+    Pandas's DateOffsets and dateutil.relativedelta.relativedelta are by
+    default converted as MonthDayNanoIntervalArray. relativedelta leapdays
+    are ignored as are all absolute fields on both objects. datetime.timedelta
+    can also be converted to MonthDayNanoIntervalArray but this requires
+    passing MonthDayNanoIntervalType explicitly.
+
+    Converting to dictionary array will promote to a wider integer type for
+    indices if the number of distinct values cannot be represented, even if
+    the index type was explicitly set. This means that if there are more than
+    127 values the returned dictionary array's index type will be at least
+    pa.int16() even if pa.int8() was passed to the function. Note that an
+    explicit index type will not be demoted even if it is wider than required.
 
     Examples
     --------
+    >>> import pandas as pd
     >>> import pyarrow as pa
-    >>> pa.nulls(10)
-    <pyarrow.lib.NullArray object at ...>
-    10 nulls
-
-    >>> pa.nulls(3, pa.uint32())
-    <pyarrow.lib.UInt32Array object at ...>
+    >>> pa.array(pd.Series([1, 2]))
+    <pyarrow.lib.Int64Array object at ...>
     [
-      null,
-      null,
-      null
-    ]
-    """
-
-@overload
-def repeat(
-    value: None | scalar.NullScalar, size: int, memory_pool: MemoryPool | None = None
-) -> NullArray: ...
-@overload
-def repeat(  # type: ignore[overload-overlap]
-    value: bool | scalar.BooleanScalar, size: int, memory_pool: MemoryPool | None = None
-) -> BooleanArray: ...
-@overload
-def repeat(
-    value: scalar.Int8Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Int8Array: ...
-@overload
-def repeat(
-    value: scalar.Int16Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Int16Array: ...
-@overload
-def repeat(
-    value: scalar.Int32Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Int32Array: ...
-@overload
-def repeat(
-    value: int | scalar.Int64Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Int64Array: ...
-@overload
-def repeat(
-    value: scalar.UInt8Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> UInt8Array: ...
-@overload
-def repeat(
-    value: scalar.UInt16Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> UInt16Array: ...
-@overload
-def repeat(
-    value: scalar.UInt32Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> UInt32Array: ...
-@overload
-def repeat(
-    value: scalar.UInt64Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> UInt64Array: ...
-@overload
-def repeat(
-    value: scalar.HalfFloatScalar, size: int, memory_pool: MemoryPool | None = None
-) -> HalfFloatArray: ...
-@overload
-def repeat(
-    value: scalar.FloatScalar, size: int, memory_pool: MemoryPool | None = None
-) -> FloatArray: ...
-@overload
-def repeat(
-    value: float | scalar.DoubleScalar, size: int, memory_pool: MemoryPool | None = None
-) -> DoubleArray: ...
-@overload
-def repeat(
-    value: Decimal | scalar.Decimal32Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Decimal32Array: ...
-@overload
-def repeat(
-    value: scalar.Decimal64Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Decimal64Array: ...
-@overload
-def repeat(
-    value: scalar.Decimal128Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Decimal128Array: ...
-@overload
-def repeat(
-    value: scalar.Decimal256Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Decimal256Array: ...
-@overload
-def repeat(
-    value: dt.date | scalar.Date32Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Date32Array: ...
-@overload
-def repeat(
-    value: scalar.Date64Scalar, size: int, memory_pool: MemoryPool | None = None
-) -> Date64Array: ...
-@overload
-def repeat(
-    value: scalar.Time32Scalar[types._Time32Unit], size: int, memory_pool: MemoryPool | None = None
-) -> Time32Array[types._Time32Unit]: ...
-@overload
-def repeat(
-    value: dt.time | scalar.Time64Scalar[types._Time64Unit],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> Time64Array[types._Time64Unit]: ...
-@overload
-def repeat(
-    value: scalar.TimestampScalar[types._Unit, types._Tz],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> TimestampArray[types._Unit, types._Tz]: ...
-@overload
-def repeat(
-    value: dt.timedelta | scalar.DurationScalar[types._Unit],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> DurationArray[types._Unit]: ...
-@overload
-def repeat(  # pyright: ignore[reportOverlappingOverload]
-    value: MonthDayNano | scalar.MonthDayNanoIntervalScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> MonthDayNanoIntervalArray: ...
-@overload
-def repeat(
-    value: bytes | scalar.BinaryScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryArray: ...
-@overload
-def repeat(
-    value: scalar.LargeBinaryScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> LargeBinaryArray: ...
-@overload
-def repeat(
-    value: scalar.FixedSizeBinaryScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> FixedSizeBinaryArray: ...
-@overload
-def repeat(
-    value: str | scalar.StringScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> StringArray: ...
-@overload
-def repeat(
-    value: scalar.LargeStringScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> LargeStringArray: ...
-@overload
-def repeat(
-    value: scalar.BinaryViewScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryViewArray: ...
-@overload
-def repeat(
-    value: scalar.StringViewScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> StringViewArray: ...
-@overload
-def repeat(
-    value: list[Any] | tuple[Any] | scalar.ListScalar[_DataTypeT],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
-@overload
-def repeat(
-    value: scalar.FixedSizeListScalar[_DataTypeT, _Size],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> FixedSizeListArray[_DataTypeT, _Size]: ...
-@overload
-def repeat(
-    value: scalar.LargeListScalar[_DataTypeT],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> LargeListArray[_DataTypeT]: ...
-@overload
-def repeat(
-    value: scalar.ListViewScalar[_DataTypeT],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> ListViewArray[_DataTypeT]: ...
-@overload
-def repeat(
-    value: scalar.LargeListViewScalar[_DataTypeT],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> LargeListViewArray[_DataTypeT]: ...
-@overload
-def repeat(
-    value: dict[str, Any] | scalar.StructScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> StructArray: ...
-@overload
-def repeat(
-    value: scalar.MapScalar[_MapKeyT, _MapItemT],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> MapArray[_MapKeyT, _MapItemT]: ...
-@overload
-def repeat(
-    value: scalar.DictionaryScalar[_IndexT, _BasicValueT],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> DictionaryArray[_IndexT, _BasicValueT]: ...
-@overload
-def repeat(
-    value: scalar.RunEndEncodedScalar[_RunEndType, _BasicValueT],
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> RunEndEncodedArray[_RunEndType, _BasicValueT]: ...
-@overload
-def repeat(
-    value: scalar.UnionScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> UnionArray: ...
-@overload
-def repeat(
-    value: scalar.FixedShapeTensorScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> FixedShapeTensorArray[Any]: ...
-@overload
-def repeat(
-    value: scalar.Bool8Scalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> Bool8Array: ...
-@overload
-def repeat(
-    value: scalar.UuidScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> UuidArray[Any]: ...
-@overload
-def repeat(
-    value: scalar.JsonScalar,
-    size: int,
-    memory_pool: MemoryPool | None = None,
-) -> JsonArray[Any]: ...
-@overload
-def repeat(
-    value: scalar.OpaqueScalar,
+      1,
+      2
+    ]
+
+    >>> pa.array(["a", "b", "a"], type=pa.dictionary(pa.int8(), pa.string()))
+    <pyarrow.lib.DictionaryArray object at ...>
+    ...
+    -- dictionary:
+      [
+        "a",
+        "b"
+      ]
+    -- indices:
+      [
+        0,
+        1,
+        0
+      ]
+
+    >>> import numpy as np
+    >>> pa.array(pd.Series([1, 2]), mask=np.array([0, 1], dtype=bool))
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      1,
+      null
+    ]
+
+    >>> arr = pa.array(range(1024), type=pa.dictionary(pa.int8(), pa.int64()))
+    >>> arr.type.index_type
+    DataType(int16)
+    """
+
+def asarray(
+    values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
+    type: _DataTypeT | Any | None = None,
+) -> Array[Scalar[_DataTypeT]] | ArrayLike:
+    """
+    Convert to pyarrow.Array, inferring type if not provided.
+
+    Parameters
+    ----------
+    values : array-like
+        This can be a sequence, numpy.ndarray, pyarrow.Array or
+        pyarrow.ChunkedArray. If a ChunkedArray is passed, the output will be
+        a ChunkedArray, otherwise the output will be a Array.
+    type : string or DataType
+        Explicitly construct the array with this type. Attempt to cast if
+        indicated type is different.
+
+    Returns
+    -------
+    arr : Array or ChunkedArray
+    """
+
+def nulls(
     size: int,
+    type: Any | None = None,
     memory_pool: MemoryPool | None = None,
-) -> OpaqueArray[Any]: ...
-@overload
+) -> ArrayLike:
+    """
+    Create a strongly-typed Array instance with all elements null.
+
+    Parameters
+    ----------
+    size : int
+        Array length.
+    type : pyarrow.DataType, default None
+        Explicit type for the array. By default use NullType.
+    memory_pool : MemoryPool, default None
+        Arrow MemoryPool to use for allocations. Uses the default memory
+        pool if not passed.
+
+    Returns
+    -------
+    arr : Array
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.nulls(10)
+    <pyarrow.lib.NullArray object at ...>
+    10 nulls
+
+    >>> pa.nulls(3, pa.uint32())
+    <pyarrow.lib.UInt32Array object at ...>
+    [
+      null,
+      null,
+      null
+    ]
+    """
+
 def repeat(
-    value: scalar.ExtensionScalar,
+    value: Any,
     size: int,
     memory_pool: MemoryPool | None = None,
-) -> ExtensionArray[Any]: ...
-def repeat(*args, **kwargs):
+) -> ArrayLike:
     """
     Create an Array instance whose slots are the given scalar.
 
@@ -1427,7 +298,7 @@ def repeat(*args, **kwargs):
       "string"
     ]
 
-    >>> pa.repeat(pa.scalar({"a": 1, "b": [1, 2]}), 2)
+    >>> pa.repeat(pa.scalar({'a': 1, 'b': [1, 2]}), 2)
     <pyarrow.lib.StructArray object at ...>
     -- is_valid: all not null
     -- child 0 type: int64
@@ -1620,13 +491,10 @@ class _PandasConvertible(_Weakrefable, Generic[_ConvertAs]):
 
         Convert a Table to pandas DataFrame:
 
-        >>> table = pa.table(
-        ...     [
-        ...         pa.array([2, 4, 5, 100]),
-        ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
-        ...     ],
-        ...     names=["n_legs", "animals"],
-        ... )
+        >>> table = pa.table([
+        ...    pa.array([2, 4, 5, 100]),
+        ...    pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        ...    ], names=['n_legs', 'animals'])
         >>> table.to_pandas()
            n_legs        animals
         0       2       Flamingo
@@ -1641,7 +509,8 @@ class _PandasConvertible(_Weakrefable, Generic[_ConvertAs]):
         >>> import pyarrow as pa
         >>> n_legs = pa.array([2, 4, 5, 100])
         >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-        >>> batch = pa.record_batch([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch = pa.record_batch([n_legs, animals],
+        ...                         names=["n_legs", "animals"])
         >>> batch
         pyarrow.RecordBatch
         n_legs: int64
@@ -1705,7 +574,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         >>> import pyarrow as pa
         >>> left = pa.array(["one", "two", "three"])
         >>> right = pa.array(["two", None, "two-and-a-half", "three"])
-        >>> print(left.diff(right))  # doctest: +SKIP
+        >>> print(left.diff(right)) # doctest: +SKIP
 
         @@ -0, +0 @@
         -"one"
@@ -1798,7 +667,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         encoded : DictionaryArray
             A dictionary-encoded version of this array.
         """
-    def value_count(self) -> StructArray:
+    def value_counts(self) -> StructArray:
         """
         Compute counts of unique elements in array.
 
@@ -1807,27 +676,15 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         StructArray
             An array of  <input type "Values", int64 "Counts"> structs
         """
-    @overload
-    @staticmethod
-    def from_pandas(
-        obj: pd.Series | np.ndarray | ArrayLike,
-        *,
-        mask: Mask | None = None,
-        type: _DataTypeT,
-        safe: bool = True,
-        memory_pool: MemoryPool | None = None,
-    ) -> Array[Scalar[_DataTypeT]]: ...
-    @overload
     @staticmethod
     def from_pandas(
         obj: pd.Series | np.ndarray | ArrayLike,
         *,
         mask: Mask | None = None,
+        type: _DataTypeT | None = None,
         safe: bool = True,
         memory_pool: MemoryPool | None = None,
-    ) -> Array[Scalar]: ...
-    @staticmethod
-    def from_pandas(*args, **kwargs):
+    ) -> Array[Scalar[_DataTypeT]] | Array[Scalar]:
         """
         Convert pandas.Series to an Arrow Array.
 
@@ -1926,7 +783,10 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         only be counted once.
         """
     def __sizeof__(self) -> int: ...
-    def __iter__(self) -> Iterator[_Scalar_co]: ...
+    def __iter__(self) -> Iterator[_Scalar_co]:
+        """
+        Implement iter(self).
+        """
     def to_string(
         self,
         *,
@@ -1961,10 +821,24 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         skip_new_lines : bool
             If the array should be rendered as a single line of text
             or if each element should be on its own line.
+        element_size_limit : int, default 100
+            Maximum number of characters of a single element before it is truncated.
         """
     format = to_string
-    def equals(self, other: Self | Iterable[Any]) -> bool: ...
-    def __len__(self) -> int: ...
+    def equals(self, other: Self) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.Array
+
+        Returns
+        -------
+        bool
+        """
+    def __len__(self) -> int:
+        """
+        Return len(self).
+        """
     def is_null(self, *, nan_is_null: bool = False) -> BooleanArray:
         """
         Return BooleanArray indicating the null values.
@@ -1991,7 +865,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         Return BooleanArray indicating the non-null values.
         """
     def fill_null(
-        self: Array[Scalar[_BasicDataType[_AsPyType]]] | Array[Scalar[_DataTypeT]], fill_value: Scalar[_DataTypeT] | _AsPyType | str | None
+        self: Array[Scalar[_BasicDataType[_AsPyType]]], fill_value: _AsPyType
     ) -> Array[Scalar[_BasicDataType[_AsPyType]]]:
         """
         See :func:`pyarrow.compute.fill_null` for usage.
@@ -2006,11 +880,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         result : Array
             A new array with nulls replaced by the given value.
         """
-    @overload
-    def __getitem__(self, key: int) -> _Scalar_co: ...
-    @overload
-    def __getitem__(self, key: slice) -> Self: ...
-    def __getitem__(self, key):
+    def __getitem__(self, key: int | slice) -> _Scalar_co | Self:
         """
         Slice or return value at given index
 
@@ -2085,25 +955,15 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
             An array of the same type, with only the elements selected by
             the boolean mask.
         """
-    @overload
-    def index(
-        self: Array[_ScalarT],
-        value: _ScalarT,
-        start: int | None = None,
-        end: int | None = None,
-        *,
-        memory_pool: MemoryPool | None = None,
-    ) -> scalar.Int64Scalar: ...
-    @overload
+
     def index(
-        self: Array[Scalar[_BasicDataType[_AsPyType]]],
-        value: _AsPyType | None,
+        self: Array[_ScalarT] | Array[Scalar[_BasicDataType[_AsPyType]]],
+        value: _ScalarT | _AsPyType,
         start: int | None = None,
         end: int | None = None,
         *,
         memory_pool: MemoryPool | None = None,
-    ) -> scalar.Int64Scalar: ...
-    def index(self, *args, **kwargs):
+    ) -> scalar.Int64Scalar | scalar.Int64Scalar:
         """
         Find the first index of a value.
 
@@ -2171,9 +1031,9 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         array : numpy.ndarray
         """
     def to_pylist(
-        self: Array[Scalar[_BasicDataType[_AsPyType]]] | Array[Scalar[ListType[Any]]] | StructArray | DictionaryArray[Unknown, Unknown],
+        self: Array[Scalar[_BasicDataType[_AsPyType]]],
         *,
-        map_as_pydicts: Literal["lossy", "strict"] | None = None,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
     ) -> list[_AsPyType | None]:
         """
         Convert to a list of native Python objects.
@@ -2363,7 +1223,8 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
     @classmethod
     def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
     def __dlpack__(self, stream: int | None = None) -> Any:
-        """Export a primitive array as a DLPack capsule.
+        """
+        Export a primitive array as a DLPack capsule.
 
         Parameters
         ----------
@@ -2372,163 +1233,307 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
             Stream is provided by the consumer to the producer to instruct the producer
             to ensure that operations can safely be performed on the array.
 
-        Returns
-        -------
-        capsule : PyCapsule
-            A DLPack capsule for the array, pointing to a DLManagedTensor.
+        Returns
+        -------
+        capsule : PyCapsule
+            A DLPack capsule for the array, pointing to a DLManagedTensor.
+        """
+    def __dlpack_device__(self) -> tuple[int, int]:
+        """
+        Return the DLPack device tuple this arrays resides on.
+
+        Returns
+        -------
+        tuple : Tuple[int, int]
+            Tuple with index specifying the type of the device (where
+            CPU = 1, see cpp/src/arrow/c/dpack_abi.h) and index of the
+            device which is 0 by default for CPU.
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        The device type where the array resides.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
+
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether the array is CPU-accessible.
+        """
+    @property
+    def statistics(self) -> ArrayStatistics | None:
+        """
+        Statistics of the array.
+        """
+
+class NullArray(Array[scalar.NullScalar]):
+    """
+    Concrete class for Arrow arrays of null data type.
+    """
+
+class BooleanArray(Array[scalar.BooleanScalar]):
+    """
+    Concrete class for Arrow arrays of boolean data type.
+    """
+    @property
+    def false_count(self) -> int: ...
+    @property
+    def true_count(self) -> int: ...
+
+class NumericArray(Array[_ScalarT]):
+    """
+    A base class for Arrow numeric arrays.
+    """
+class IntegerArray(NumericArray[_ScalarT]):
+    """
+    A base class for Arrow integer arrays.
+    """
+class FloatingPointArray(NumericArray[_ScalarT]):
+    """
+    A base class for Arrow floating-point arrays.
+    """
+class Int8Array(IntegerArray[scalar.Int8Scalar]):
+    """
+    Concrete class for Arrow arrays of int8 data type.
+    """
+class UInt8Array(IntegerArray[scalar.UInt8Scalar]):
+    """
+    Concrete class for Arrow arrays of uint8 data type.
+    """
+class Int16Array(IntegerArray[scalar.Int16Scalar]):
+    """
+    Concrete class for Arrow arrays of int16 data type.
+    """
+class UInt16Array(IntegerArray[scalar.UInt16Scalar]):
+    """
+    Concrete class for Arrow arrays of uint16 data type.
+    """
+class Int32Array(IntegerArray[scalar.Int32Scalar]):
+    """
+    Concrete class for Arrow arrays of int32 data type.
+    """
+class UInt32Array(IntegerArray[scalar.UInt32Scalar]):
+    """
+    Concrete class for Arrow arrays of uint32 data type.
+    """
+class Int64Array(IntegerArray[scalar.Int64Scalar]):
+    """
+    Concrete class for Arrow arrays of int64 data type.
+    """
+class UInt64Array(IntegerArray[scalar.UInt64Scalar]):
+    """
+    Concrete class for Arrow arrays of uint64 data type.
+    """
+class Date32Array(NumericArray[scalar.Date32Scalar]):
+    """
+    Concrete class for Arrow arrays of date32 data type.
+    """
+class Date64Array(NumericArray[scalar.Date64Scalar]):
+    """
+    Concrete class for Arrow arrays of date64 data type.
+    """
+class TimestampArray(NumericArray[scalar.TimestampScalar[types._Unit, types._Tz]]):
+    """
+    Concrete class for Arrow arrays of timestamp data type.
+    """
+class Time32Array(NumericArray[scalar.Time32Scalar[types._Time32Unit]]):
+    """
+    Concrete class for Arrow arrays of time32 data type.
+    """
+class Time64Array(NumericArray[scalar.Time64Scalar[types._Time64Unit]]):
+    """
+    Concrete class for Arrow arrays of time64 data type.
+    """
+class DurationArray(NumericArray[scalar.DurationScalar[types._Unit]]):
+    """
+    Concrete class for Arrow arrays of duration data type.
+    """
+class MonthDayNanoIntervalArray(Array[scalar.MonthDayNanoIntervalScalar]):
+    """
+    Concrete class for Arrow arrays of interval[MonthDayNano] type.
+    """
+class HalfFloatArray(FloatingPointArray[scalar.HalfFloatScalar]):
+    """
+    Concrete class for Arrow arrays of float16 data type.
+    """
+class FloatArray(FloatingPointArray[scalar.FloatScalar]):
+    """
+    Concrete class for Arrow arrays of float32 data type.
+    """
+class DoubleArray(FloatingPointArray[scalar.DoubleScalar]):
+    """
+    Concrete class for Arrow arrays of float64 data type.
+    """
+class FixedSizeBinaryArray(Array[scalar.FixedSizeBinaryScalar]):
+    """
+    Concrete class for Arrow arrays of a fixed-size binary data type.
+    """
+class Decimal32Array(FixedSizeBinaryArray):
+    """
+    """
+class Decimal64Array(FixedSizeBinaryArray):
+    """
+    Concrete class for Arrow arrays of decimal64 data type.
+    """
+class Decimal128Array(FixedSizeBinaryArray):
+    """
+    Concrete class for Arrow arrays of decimal128 data type.
+    """
+class Decimal256Array(FixedSizeBinaryArray):
+    """
+    Concrete class for Arrow arrays of decimal256 data type.
+    """
+
+class BaseListArray(Array[_ScalarT]):
+    def flatten(self, recursive: bool = False) -> Array:
+        """
+        Unnest this [Large]ListArray/[Large]ListViewArray/FixedSizeListArray
+        according to 'recursive'.
+
+        Note that this method is different from ``self.values`` in that
+        it takes care of the slicing offset as well as null elements backed
+        by non-empty sub-lists.
+
+        Parameters
+        ----------
+        recursive : bool, default False, optional
+            When True, flatten this logical list-array recursively until an
+            array of non-list values is formed.
+
+            When False, flatten only the top level.
+
+        Returns
+        -------
+        result : Array
+
+        Examples
+        --------
+
+        Basic logical list-array's flatten
+        >>> import pyarrow as pa
+        >>> values = [1, 2, 3, 4]
+        >>> offsets = [2, 1, 0]
+        >>> sizes = [2, 2, 2]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            3,
+            4
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            1,
+            2
+          ]
+        ]
+        >>> array.flatten()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          3,
+          4,
+          2,
+          3,
+          1,
+          2
+        ]
+
+        When recursive=True, nested list arrays are flattened recursively
+        until an array of non-list values is formed.
+
+        >>> array = pa.array([
+        ...    None,
+        ...    [
+        ...        [1, None, 2],
+        ...        None,
+        ...        [3, 4]
+        ...    ],
+        ...    [],
+        ...    [
+        ...        [],
+        ...        [5, 6],
+        ...        None
+        ...    ],
+        ...    [
+        ...        [7, 8]
+        ...    ]
+        ... ], type=pa.list_(pa.list_(pa.int64())))
+        >>> array.flatten(True)
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          null,
+          2,
+          3,
+          4,
+          5,
+          6,
+          7,
+          8
+        ]
         """
-    def __dlpack_device__(self) -> tuple[int, int]:
+    def value_parent_indices(self) -> Int64Array:
         """
-        Return the DLPack device tuple this arrays resides on.
+        Return array of same length as list child values array where each
+        output value is the index of the parent list array slot containing each
+        child value.
 
-        Returns
-        -------
-        tuple : Tuple[int, int]
-            Tuple with index specifying the type of the device (where
-            CPU = 1, see cpp/src/arrow/c/dpack_abi.h) and index of the
-            device which is 0 by default for CPU.
-        """
-    @property
-    def device_type(self) -> DeviceAllocationType:
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> arr = pa.array([[1, 2, 3], [], None, [4]],
+        ...                type=pa.list_(pa.int32()))
+        >>> arr.value_parent_indices()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          0,
+          0,
+          0,
+          3
+        ]
         """
-        The device type where the array resides.
-
-        Returns
-        -------
-        DeviceAllocationType
+    def value_lengths(self) -> Int32Array:
         """
+        Return integers array with values equal to the respective length of
+        each list element. Null list values are null in the output.
 
-    @property
-    def is_cpu(self) -> bool:
-        """
-        Whether the array is CPU-accessible.
-        """
-    @property
-    def statistics(self) -> ArrayStatistics | None:
-        """
-        Statistics of the array.
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> arr = pa.array([[1, 2, 3], [], None, [4]],
+        ...                type=pa.list_(pa.int32()))
+        >>> arr.value_lengths()
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          3,
+          0,
+          null,
+          1
+        ]
         """
 
-class NullArray(Array[scalar.NullScalar]): ...
-
-class BooleanArray(Array[scalar.BooleanScalar]):
-    @property
-    def false_count(self) -> int: ...
-    @property
-    def true_count(self) -> int: ...
-
-class NumericArray(Array[_ScalarT]): ...
-class IntegerArray(NumericArray[_ScalarT]): ...
-class FloatingPointArray(NumericArray[_ScalarT]): ...
-class Int8Array(IntegerArray[scalar.Int8Scalar]): ...
-class UInt8Array(IntegerArray[scalar.UInt8Scalar]): ...
-class Int16Array(IntegerArray[scalar.Int16Scalar]): ...
-class UInt16Array(IntegerArray[scalar.UInt16Scalar]): ...
-class Int32Array(IntegerArray[scalar.Int32Scalar]): ...
-class UInt32Array(IntegerArray[scalar.UInt32Scalar]): ...
-class Int64Array(IntegerArray[scalar.Int64Scalar]): ...
-class UInt64Array(IntegerArray[scalar.UInt64Scalar]): ...
-class Date32Array(NumericArray[scalar.Date32Scalar]): ...
-class Date64Array(NumericArray[scalar.Date64Scalar]): ...
-class TimestampArray(NumericArray[scalar.TimestampScalar[types._Unit, types._Tz]]): ...
-class Time32Array(NumericArray[scalar.Time32Scalar[types._Time32Unit]]): ...
-class Time64Array(NumericArray[scalar.Time64Scalar[types._Time64Unit]]): ...
-class DurationArray(NumericArray[scalar.DurationScalar[types._Unit]]): ...
-class MonthDayNanoIntervalArray(Array[scalar.MonthDayNanoIntervalScalar]): ...
-class HalfFloatArray(FloatingPointArray[scalar.HalfFloatScalar]): ...
-class FloatArray(FloatingPointArray[scalar.FloatScalar]): ...
-class DoubleArray(FloatingPointArray[scalar.DoubleScalar]): ...
-class FixedSizeBinaryArray(Array[scalar.FixedSizeBinaryScalar]): ...
-class Decimal32Array(FixedSizeBinaryArray): ...
-class Decimal64Array(FixedSizeBinaryArray): ...
-class Decimal128Array(FixedSizeBinaryArray): ...
-class Decimal256Array(FixedSizeBinaryArray): ...
-
-class BaseListArray(Array[_ScalarT]):
-    def flatten(self, recursive: bool = False) -> Array: ...
-    def value_parent_indices(self) -> Int64Array: ...
-    def value_lengths(self) -> Int32Array: ...
-
 class ListArray(BaseListArray[_ScalarT]):
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        offsets: Int32Array | list[int],
-        values: Array[Scalar[_DataTypeT]],
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        offsets: Int32Array | list[int],
-        values: list[int],
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> ListArray[scalar.ListScalar[types.Int64Type]]: ...
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        offsets: Int32Array | list[int],
-        values: list[float],
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> ListArray[scalar.ListScalar[types.Float64Type]]: ...
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        offsets: Int32Array | list[int],
-        values: list[str],
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> ListArray[scalar.ListScalar[types.StringType]]: ...
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        offsets: Int32Array | list[int],
-        values: list[bytes],
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> ListArray[scalar.ListScalar[types.BinaryType]]: ...
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        offsets: Int32Array | list[int],
-        values: list,
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> ListArray: ...
-    @overload
+    """
+    Concrete class for Arrow arrays of a list data type.
+    """
     @classmethod
     def from_arrays(
         cls,
         offsets: Int32Array | list[int],
-        values: Array | list,
+        values: Array[Scalar[_DataTypeT]] | list[int] | list[float] | list[str] | list[bytes] | list,
         *,
-        type: _DataTypeT,
+        type: _DataTypeT | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> ListArray[scalar.ListScalar[_DataTypeT]]: ...
-    @classmethod
-    def from_arrays(cls, *args, **kwargs):
+    ) -> ListArray[scalar.ListScalar[_DataTypeT | types.Int64Type | types.Float64Type | types.StringType | types.BinaryType]] | ListArray:
         """
         Construct ListArray from arrays of int32 offsets and values.
 
@@ -2646,7 +1651,6 @@ class ListArray(BaseListArray[_ScalarT]):
           null,
           6
         ]
-
         """
     @property
     def offsets(self) -> Int32Array:
@@ -2676,30 +1680,21 @@ class ListArray(BaseListArray[_ScalarT]):
         """
 
 class LargeListArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        offsets: Int64Array,
-        values: Array[Scalar[_DataTypeT]],
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> LargeListArray[_DataTypeT]: ...
-    @overload
+    """
+    Concrete class for Arrow arrays of a large list data type.
+
+    Identical to ListArray, but 64-bit offsets.
+    """
     @classmethod
     def from_arrays(
         cls,
         offsets: Int64Array,
-        values: Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
         *,
-        type: _DataTypeT,
+        type: _DataTypeT | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> LargeListArray[_DataTypeT]: ...
-    @classmethod
-    def from_arrays(cls, *args, **kwargs):
+    ) -> LargeListArray[_DataTypeT] | LargeListArray[_DataTypeT]:
         """
         Construct LargeListArray from arrays of int64 offsets and values.
 
@@ -2803,30 +1798,19 @@ class LargeListArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
         """
 
 class ListViewArray(BaseListArray[scalar.ListViewScalar[_DataTypeT]]):
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        offsets: Int32Array,
-        values: Array[Scalar[_DataTypeT]],
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> ListViewArray[_DataTypeT]: ...
-    @overload
+    """
+    Concrete class for Arrow arrays of a list view data type.
+    """
     @classmethod
     def from_arrays(
         cls,
         offsets: Int32Array,
-        values: Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
         *,
-        type: _DataTypeT,
+        type: _DataTypeT | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> ListViewArray[_DataTypeT]: ...
-    @classmethod
-    def from_arrays(cls, *args, **kwargs):
+    ) -> ListViewArray[_DataTypeT] | ListViewArray[_DataTypeT]:
         """
         Construct ListViewArray from arrays of int32 offsets, sizes, and values.
 
@@ -3009,30 +1993,21 @@ class ListViewArray(BaseListArray[scalar.ListViewScalar[_DataTypeT]]):
         """
 
 class LargeListViewArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        offsets: Int64Array,
-        values: Array[Scalar[_DataTypeT]],
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> LargeListViewArray[_DataTypeT]: ...
-    @overload
+    """
+    Concrete class for Arrow arrays of a large list view data type.
+
+    Identical to ListViewArray, but with 64-bit offsets.
+    """
     @classmethod
     def from_arrays(
         cls,
         offsets: Int64Array,
-        values: Array,
+        values: Array[Scalar[_DataTypeT]] | Array,
         *,
-        type: _DataTypeT,
+        type: _DataTypeT | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> LargeListViewArray[_DataTypeT]: ...
-    @classmethod
-    def from_arrays(cls, *args, **kwargs):
+    ) -> LargeListViewArray[_DataTypeT]:
         """
         Construct LargeListViewArray from arrays of int64 offsets and values.
 
@@ -3222,27 +2197,18 @@ class LargeListViewArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
         """
 
 class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _Size]]):
-    @overload
-    @classmethod
-    def from_arrays(
-        cls,
-        values: Array[Scalar[_DataTypeT]],
-        *,
-        type: types.FixedSizeListType[_DataTypeT, Literal[int]] | None = None,
-        mask: Mask | None = None,
-    ) -> FixedSizeListArray[_DataTypeT, None]: ...
-    @overload
+    """
+    Concrete class for Arrow arrays of a fixed size list data type.
+    """
     @classmethod
     def from_arrays(
         cls,
         values: Array[Scalar[_DataTypeT]],
-        limit_size: _Size,
+        limit_size: _Size | None = None,
         *,
         type: None = None,
         mask: Mask | None = None,
-    ) -> FixedSizeListArray[_DataTypeT, _Size]: ...
-    @classmethod
-    def from_arrays(cls, *args, **kwargs):
+    ) -> FixedSizeListArray[_DataTypeT, _Size | None]:
         """
         Construct FixedSizeListArray from array of values and a list length.
 
@@ -3304,7 +2270,7 @@ class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _S
     def values(self) -> BaseListArray[scalar.ListScalar[_DataTypeT]]:
         """
         Return the underlying array of values which backs the
-        FixedSizeListArray.
+        FixedSizeListArray ignoring the array's offset.
 
         Note even null elements are included.
 
@@ -3322,7 +2288,10 @@ class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _S
         Examples
         --------
         >>> import pyarrow as pa
-        >>> array = pa.array([[1, 2], None, [3, None]], type=pa.list_(pa.int32(), 2))
+        >>> array = pa.array(
+        ...     [[1, 2], None, [3, None]],
+        ...     type=pa.list_(pa.int32(), 2)
+        ... )
         >>> array.values
         <pyarrow.lib.Int32Array object at ...>
         [
@@ -3333,38 +2302,27 @@ class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _S
           3,
           null
         ]
-
         """
 
 _MapKeyT = TypeVar("_MapKeyT", bound=_BasicDataType)
 _MapItemT = TypeVar("_MapItemT", bound=_BasicDataType)
 
-class MapArray(ListArray[scalar.MapScalar[_MapKeyT, _MapItemT]]):
-    @overload
+class MapArray(BaseListArray[scalar.MapScalar[_MapKeyT, _MapItemT]]):
+    """
+    Concrete class for Arrow arrays of a map data type.
+    """
     @classmethod
     def from_arrays(
         cls,
-        offsets: Int64Array,
-        keys: Array[Scalar[_MapKeyT]],
-        items: Array[Scalar[_MapItemT]],
-        *,
-        type: None = None,
-        pool: MemoryPool | None = None,
-        mask: Mask | None = None,
-    ) -> MapArray[_MapKeyT, _MapItemT]: ...
-    @overload
-    @classmethod
-    def from_arrays(  # pyright: ignore[reportIncompatibleMethodOverride]
-        cls,
-        offsets: Int64Array,
-        values: Array,
+        offsets: Int64Array | list[int] | None,
+        keys: Array[Scalar[_MapKeyT]] | None = None,
+        items: Array[Scalar[_MapItemT]] | None = None,
+        values: Array | None = None,
         *,
-        type: MapType[_MapKeyT, _MapItemT],
+        type: MapType[_MapKeyT, _MapItemT] | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> MapArray[_MapKeyT, _MapItemT]: ...
-    @classmethod
-    def from_arrays(cls, *args, **kwargs):  # pyright: ignore[reportIncompatibleMethodOverride]
+    ) -> MapArray[_MapKeyT, _MapItemT]:
         """
         Construct MapArray from arrays of int32 offsets and key, item arrays.
 
@@ -3391,43 +2349,41 @@ class MapArray(ListArray[scalar.MapScalar[_MapKeyT, _MapItemT]]):
         represents the null bitmask corresponding to the missing values in the integer array.
 
         >>> import pyarrow as pa
-        >>> movies_rectangular = np.ma.masked_array(
-        ...     [[10, -1, -1], [8, 4, 5], [-1, 10, 3], [-1, -1, -1], [-1, -1, -1]],
-        ...     [
-        ...         [False, True, True],
-        ...         [False, False, False],
-        ...         [True, False, False],
-        ...         [True, True, True],
-        ...         [True, True, True],
-        ...     ],
-        ... )
+        >>> movies_rectangular = np.ma.masked_array([
+        ...     [10, -1, -1],
+        ...     [8, 4, 5],
+        ...     [-1, 10, 3],
+        ...     [-1, -1, -1],
+        ...     [-1, -1, -1]
+        ... ],
+        ... [
+        ...     [False, True, True],
+        ...     [False, False, False],
+        ...     [True, False, False],
+        ...     [True, True, True],
+        ...     [True, True, True],
+        ... ])
 
         To represent the same data with the MapArray and from_arrays, the data is
         formed like this:
 
         >>> offsets = [
-        ...     0,  #  -- row 1 start
-        ...     1,  #  -- row 2 start
-        ...     4,  #  -- row 3 start
-        ...     6,  #  -- row 4 start
-        ...     6,  #  -- row 5 start
-        ...     6,  #  -- row 5 end
+        ...     0, #  -- row 1 start
+        ...     1, #  -- row 2 start
+        ...     4, #  -- row 3 start
+        ...     6, #  -- row 4 start
+        ...     6, #  -- row 5 start
+        ...     6, #  -- row 5 end
         ... ]
         >>> movies = [
-        ...     "Dark Knight",  #  ---------------------------------- row 1
-        ...     "Dark Knight",
-        ...     "Meet the Parents",
-        ...     "Superman",  #  -- row 2
-        ...     "Meet the Parents",
-        ...     "Superman",  #  ----------------- row 3
+        ...     "Dark Knight", #  ---------------------------------- row 1
+        ...     "Dark Knight", "Meet the Parents", "Superman", #  -- row 2
+        ...     "Meet the Parents", "Superman", #  ----------------- row 3
         ... ]
         >>> likings = [
-        ...     10,  #  -------- row 1
-        ...     8,
-        ...     4,
-        ...     5,  #  --- row 2
-        ...     10,
-        ...     3,  #  ------ row 3
+        ...     10, #  -------- row 1
+        ...     8, 4, 5, #  --- row 2
+        ...     10, 3 #  ------ row 3
         ... ]
         >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
         0                                  [(Dark Knight, 10)]
@@ -3443,12 +2399,12 @@ class MapArray(ListArray[scalar.MapScalar[_MapKeyT, _MapItemT]]):
         offset still has to refer to the existing value from keys (and values):
 
         >>> offsets = [
-        ...     0,  #  ----- row 1 start
-        ...     1,  #  ----- row 2 start
-        ...     4,  #  ----- row 3 start
-        ...     None,  #  -- row 4 start
-        ...     None,  #  -- row 5 start
-        ...     6,  #  ----- row 5 end
+        ...     0, #  ----- row 1 start
+        ...     1, #  ----- row 2 start
+        ...     4, #  ----- row 3 start
+        ...     None, #  -- row 4 start
+        ...     None, #  -- row 5 start
+        ...     6, #  ----- row 5 end
         ... ]
         >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
         0                                  [(Dark Knight, 10)]
@@ -3460,12 +2416,19 @@ class MapArray(ListArray[scalar.MapScalar[_MapKeyT, _MapItemT]]):
         """
     @property
     def keys(self) -> Array:
-        """Flattened array of keys across all maps in array"""
+        """
+        Flattened array of keys across all maps in array
+        """
     @property
     def items(self) -> Array:
-        """Flattened array of items across all maps in array"""
+        """
+        Flattened array of items across all maps in array
+        """
 
 class UnionArray(Array[scalar.UnionScalar]):
+    """
+    Concrete class for Arrow arrays of a Union data type.
+    """
     @deprecated("Use fields() instead")
     def child(self, pos: int) -> Field:
         """
@@ -3502,7 +2465,9 @@ class UnionArray(Array[scalar.UnionScalar]):
         """
     @property
     def type_codes(self) -> Int8Array:
-        """Get the type codes array."""
+        """
+        Get the type codes array.
+        """
     @property
     def offsets(self) -> Int32Array:
         """
@@ -3558,6 +2523,9 @@ class UnionArray(Array[scalar.UnionScalar]):
         """
 
 class StringArray(Array[scalar.StringScalar]):
+    """
+    Concrete class for Arrow arrays of string (or utf8) data type.
+    """
     @staticmethod
     def from_buffers(  # type: ignore[override]
         length: int,
@@ -3587,6 +2555,9 @@ class StringArray(Array[scalar.StringScalar]):
         """
 
 class LargeStringArray(Array[scalar.LargeStringScalar]):
+    """
+    Concrete class for Arrow arrays of large string (or utf8) data type.
+    """
     @staticmethod
     def from_buffers(  # type: ignore[override]
         length: int,
@@ -3615,9 +2586,15 @@ class LargeStringArray(Array[scalar.LargeStringScalar]):
         string_array : StringArray
         """
 
-class StringViewArray(Array[scalar.StringViewScalar]): ...
+class StringViewArray(Array[scalar.StringViewScalar]):
+    """
+    Concrete class for Arrow arrays of string (or utf8) view data type.
+    """
 
 class BinaryArray(Array[scalar.BinaryScalar]):
+    """
+    Concrete class for Arrow arrays of variable-sized binary data type.
+    """
     @property
     def total_values_length(self) -> int:
         """
@@ -3626,6 +2603,9 @@ class BinaryArray(Array[scalar.BinaryScalar]):
         """
 
 class LargeBinaryArray(Array[scalar.LargeBinaryScalar]):
+    """
+    Concrete class for Arrow arrays of large variable-sized binary data type.
+    """
     @property
     def total_values_length(self) -> int:
         """
@@ -3633,9 +2613,15 @@ class LargeBinaryArray(Array[scalar.LargeBinaryScalar]):
         by the offsets of this LargeBinaryArray.
         """
 
-class BinaryViewArray(Array[scalar.BinaryViewScalar]): ...
+class BinaryViewArray(Array[scalar.BinaryViewScalar]):
+    """
+    Concrete class for Arrow arrays of variable-sized binary view data type.
+    """
 
 class DictionaryArray(Array[scalar.DictionaryScalar[_IndexT, _BasicValueT]]):
+    """
+    Concrete class for dictionary-encoded Arrow arrays.
+    """
     def dictionary_encode(self) -> Self: ...  # type: ignore[override]
     def dictionary_decode(self) -> Array[Scalar[_BasicValueT]]:
         """
@@ -3680,7 +2666,7 @@ class DictionaryArray(Array[scalar.DictionaryScalar[_IndexT, _BasicValueT]]):
     @staticmethod
     def from_arrays(
         indices: Indices,
-        dictionary: Array | np.ndarray | pd.Series | list[Any],
+        dictionary: Array | np.ndarray | pd.Series,
         mask: np.ndarray | pd.Series | BooleanArray | None = None,
         ordered: bool = False,
         from_pandas: bool = False,
@@ -3715,6 +2701,9 @@ class DictionaryArray(Array[scalar.DictionaryScalar[_IndexT, _BasicValueT]]):
         """
 
 class StructArray(Array[scalar.StructScalar]):
+    """
+    Concrete class for Arrow arrays of a struct data type.
+    """
     def field(self, index: int | str) -> Array:
         """
         Retrieves the child array belonging to field.
@@ -3743,8 +2732,8 @@ class StructArray(Array[scalar.StructScalar]):
         """
     @staticmethod
     def from_arrays(
-        arrays: Iterable[Array] | list[list[Any]],
-        names: list[str] | list[LiteralString] | None = None,
+        arrays: Iterable[Array],
+        names: list[str] | None = None,
         fields: list[Field] | None = None,
         mask=None,
         memory_pool: MemoryPool | None = None,
@@ -3796,29 +2785,15 @@ class StructArray(Array[scalar.StructScalar]):
         """
 
 class RunEndEncodedArray(Array[scalar.RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
-    @overload
-    @staticmethod
-    def from_arrays(
-        run_ends: Int16Array,
-        values: Array,
-        type: DataType | None = None,
-    ) -> RunEndEncodedArray[types.Int16Type, _BasicValueT]: ...
-    @overload
-    @staticmethod
-    def from_arrays(
-        run_ends: Int32Array,
-        values: Array,
-        type: DataType | None = None,
-    ) -> RunEndEncodedArray[types.Int32Type, _BasicValueT]: ...
-    @overload
+    """
+    Concrete class for Arrow run-end encoded arrays.
+    """
     @staticmethod
     def from_arrays(
-        run_ends: Int64Array,
+        run_ends: Int16Array | Int32Array | Int64Array,
         values: Array,
         type: DataType | None = None,
-    ) -> RunEndEncodedArray[types.Int64Type, _BasicValueT]: ...
-    @staticmethod
-    def from_arrays(*args, **kwargs):
+    ) -> RunEndEncodedArray[types.Int16Type | types.Int32Type | types.Int64Type, _BasicValueT]:  # type: ignore[type-var]
         """
         Construct RunEndEncodedArray from run_ends and values arrays.
 
@@ -3836,7 +2811,7 @@ class RunEndEncodedArray(Array[scalar.RunEndEncodedScalar[_RunEndType, _BasicVal
         RunEndEncodedArray
         """
     @staticmethod
-    def from_buffers(  # pyright: ignore[reportIncompatibleMethodOverride]
+    def from_buffers(  # type: ignore[override]
         type: DataType,
         length: int,
         buffers: list[Buffer],
@@ -3910,6 +2885,9 @@ class RunEndEncodedArray(Array[scalar.RunEndEncodedScalar[_RunEndType, _BasicVal
 _ArrayT = TypeVar("_ArrayT", bound=Array)
 
 class ExtensionArray(Array[scalar.ExtensionScalar], Generic[_ArrayT]):
+    """
+    Concrete class for Arrow extension arrays.
+    """
     @property
     def storage(self) -> Any: ...
     @staticmethod
@@ -3954,8 +2932,35 @@ class JsonArray(ExtensionArray[_ArrayT]):
       "{ "id":30, "values":["a", "b"] }"
     ]
     """
+    """
+    Concrete class for Arrow arrays of JSON data type.
+
+    This does not guarantee that the JSON data actually
+    is valid JSON.
+
+    Examples
+    --------
+    Define the extension type for JSON array
+
+    >>> import pyarrow as pa
+    >>> json_type = pa.json_(pa.large_utf8())
+
+    Create an extension array
+
+    >>> arr = [None, '{ "id":30, "values":["a", "b"] }']
+    >>> storage = pa.array(arr, pa.large_utf8())
+    >>> pa.ExtensionArray.from_storage(json_type, storage)
+    <pyarrow.lib.JsonArray object at ...>
+    [
+      null,
+      "{ "id":30, "values":["a", "b"] }"
+    ]
+    """
 
-class UuidArray(ExtensionArray[_ArrayT]): ...
+class UuidArray(ExtensionArray[_ArrayT]):
+    """
+    Concrete class for Arrow arrays of UUID data type.
+    """
 
 class FixedShapeTensorArray(ExtensionArray[_ArrayT]):
     """
@@ -4042,12 +3047,16 @@ class FixedShapeTensorArray(ExtensionArray[_ArrayT]):
         Parameters
         ----------
         obj : numpy.ndarray
+        dim_names : tuple or list of strings, default None
+            Explicit names to tensor dimensions.
 
         Examples
         --------
         >>> import pyarrow as pa
         >>> import numpy as np
-        >>> arr = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]], dtype=np.float32)
+        >>> arr = np.array(
+        ...         [[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]],
+        ...         dtype=np.float32)
         >>> pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
         <pyarrow.lib.FixedShapeTensorArray object at ...>
         [
@@ -4221,7 +3230,6 @@ def concat_arrays(arrays: Iterable[_ArrayT], memory_pool: MemoryPool | None = No
       2,
       4
     ]
-
     """
 
 def _empty_array(type: _DataTypeT) -> Array[scalar.Scalar[_DataTypeT]]:
@@ -4236,7 +3244,6 @@ __all__ = [
     "repeat",
     "infer_type",
     "_PandasConvertible",
-    "_CastAs",
     "Array",
     "NullArray",
     "BooleanArray",
diff --git a/python/pyarrow/__lib_pxi/io.pyi b/python/pyarrow/__lib_pxi/io.pyi
index dca26a52940..ebcfa8c470b 100644
--- a/python/pyarrow/__lib_pxi/io.pyi
+++ b/python/pyarrow/__lib_pxi/io.pyi
@@ -31,12 +31,12 @@ if sys.version_info >= (3, 10):
 else:
     from typing_extensions import TypeAlias
 
-from typing import Any, Literal, SupportsIndex, overload
+from typing import Any, Literal, SupportsIndex
 
 from pyarrow._stubs_typing import Compression, SupportPyBuffer
 from pyarrow.lib import MemoryPool, _Weakrefable
 
-from .device import Device, DeviceAllocationType, MemoryManager
+# from .device import Device, DeviceAllocationType, MemoryManager
 from .types import KeyValueMetadata
 
 def have_libhdfs() -> bool:
@@ -113,7 +113,10 @@ class NativeFile(_Weakrefable):
     def readable(self) -> bool: ...
     def seekable(self) -> bool: ...
     def isatty(self) -> bool: ...
-    def fileno(self) -> int: ...
+    def fileno(self) -> int:
+        """
+        NOT IMPLEMENTED
+        """
     @property
     def closed(self) -> bool: ...
     def close(self) -> None: ...
@@ -216,7 +219,8 @@ class NativeFile(_Weakrefable):
         data : bytes
         """
     def read1(self) -> bytes:
-        """Read and return up to n bytes.
+        """
+        Read and return up to n bytes.
 
         Unlike read(), if *nbytes* is None then a chunk is read, not the
         entire file.
@@ -259,14 +263,18 @@ class NativeFile(_Weakrefable):
             maximum number of bytes read
         """
     def readlines(self, hint: int | None = None) -> list[bytes]:
-        """Read lines of the file
+        """
+        NOT IMPLEMENTED. Read lines of the file
 
         Parameters
         ----------
         hint : int
             maximum number of bytes read until we stop
         """
-    def __iter__(self) -> Self: ...
+    def __iter__(self) -> Self:
+        """
+        Implement iter(self).
+        """
     def __next__(self) -> bytes: ...
     def read_buffer(self, nbytes: int | None = None) -> Buffer:
         """
@@ -277,7 +285,10 @@ class NativeFile(_Weakrefable):
         nbytes : int, optional
             maximum number of bytes read
         """
-    def truncate(self) -> None: ...
+    def truncate(self) -> None:
+        """
+        NOT IMPLEMENTED
+        """
     def writelines(self, lines: list[bytes]):
         """
         Write lines to the file.
@@ -337,10 +348,10 @@ class PythonFile(NativeFile):
     Create a stream for writing:
 
     >>> buf = io.BytesIO()
-    >>> f = pa.PythonFile(buf, mode="w")
+    >>> f =  pa.PythonFile(buf, mode = 'w')
     >>> f.writable()
     True
-    >>> f.write(b"PythonFile")
+    >>> f.write(b'PythonFile')
     10
     >>> buf.getvalue()
     b'PythonFile'
@@ -350,8 +361,8 @@ class PythonFile(NativeFile):
 
     Create a stream for reading:
 
-    >>> buf = io.BytesIO(b"PythonFile")
-    >>> f = pa.PythonFile(buf, mode="r")
+    >>> buf = io.BytesIO(b'PythonFile')
+    >>> f =  pa.PythonFile(buf, mode = 'r')
     >>> f.mode
     'rb'
     >>> f.read()
@@ -381,15 +392,16 @@ class MemoryMappedFile(NativeFile):
     Create a new file with memory map:
 
     >>> import pyarrow as pa
-    >>> mmap = pa.create_memory_map("example_mmap.dat", 10)
+    >>> mmap = pa.create_memory_map('example_mmap.dat', 10)
     >>> mmap
     <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=True is_readable=True>
     >>> mmap.close()
 
     Open an existing file with memory map:
 
-    >>> with pa.memory_map("example_mmap.dat") as mmap:
+    >>> with pa.memory_map('example_mmap.dat') as mmap:
     ...     mmap
+    ...
     <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
     """
     @classmethod
@@ -436,11 +448,13 @@ def memory_map(
     Reading from a memory map without any memory allocation or copying:
 
     >>> import pyarrow as pa
-    >>> with pa.output_stream("example_mmap.txt") as stream:
-    ...     stream.write(b"Constructing a buffer referencing the mapped memory")
+    >>> with pa.output_stream('example_mmap.txt') as stream:
+    ...     stream.write(b'Constructing a buffer referencing the mapped memory')
+    ...
     51
-    >>> with pa.memory_map("example_mmap.txt") as mmap:
-    ...     mmap.read_at(6, 45)
+    >>> with pa.memory_map('example_mmap.txt') as mmap:
+    ...     mmap.read_at(6,45)
+    ...
     b'memory'
     """
 
@@ -455,36 +469,40 @@ class OSFile(NativeFile):
     Create a new file to write to:
 
     >>> import pyarrow as pa
-    >>> with pa.OSFile("example_osfile.arrow", mode="w") as f:
+    >>> with pa.OSFile('example_osfile.arrow', mode='w') as f:
     ...     f.writable()
-    ...     f.write(b"OSFile")
+    ...     f.write(b'OSFile')
     ...     f.seekable()
+    ...
     True
     6
     False
 
     Open the file to read:
 
-    >>> with pa.OSFile("example_osfile.arrow", mode="r") as f:
+    >>> with pa.OSFile('example_osfile.arrow', mode='r') as f:
     ...     f.mode
     ...     f.read()
+    ...
     'rb'
     b'OSFile'
 
     Open the file to append:
 
-    >>> with pa.OSFile("example_osfile.arrow", mode="ab") as f:
+    >>> with pa.OSFile('example_osfile.arrow', mode='ab') as f:
     ...     f.mode
-    ...     f.write(b" is super!")
+    ...     f.write(b' is super!')
+    ...
     'ab'
     10
-    >>> with pa.OSFile("example_osfile.arrow") as f:
+    >>> with pa.OSFile('example_osfile.arrow') as f:
     ...     f.read()
+    ...
     b'OSFile is super!'
 
     Inspect created OSFile:
 
-    >>> pa.OSFile("example_osfile.arrow")
+    >>> pa.OSFile('example_osfile.arrow')
     <pyarrow.OSFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
     """
     def __init__(
@@ -505,8 +523,9 @@ class FixedSizeBufferWriter(NativeFile):
     >>> import pyarrow as pa
     >>> buf = pa.allocate_buffer(5)
     >>> with pa.output_stream(buf) as stream:
-    ...     stream.write(b"abcde")
+    ...     stream.write(b'abcde')
     ...     stream
+    ...
     5
     <pyarrow.FixedSizeBufferWriter closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
 
@@ -518,9 +537,24 @@ class FixedSizeBufferWriter(NativeFile):
     <pyarrow.Buffer address=... size=5 is_cpu=True is_mutable=True>
     """
     def __init__(self, buffer: Buffer) -> None: ...
-    def set_memcopy_threads(self, num_threads: int) -> None: ...
-    def set_memcopy_blocksize(self, blocksize: int) -> None: ...
-    def set_memcopy_threshold(self, threshold: int) -> None: ...
+    def set_memcopy_threads(self, num_threads: int) -> None:
+        """
+        Parameters
+        ----------
+        num_threads : int
+        """
+    def set_memcopy_blocksize(self, blocksize: int) -> None:
+        """
+        Parameters
+        ----------
+        blocksize : int64
+        """
+    def set_memcopy_threshold(self, threshold: int) -> None:
+        """
+        Parameters
+        ----------
+        threshold : int64
+        """
 
 # ----------------------------------------------------------------------
 # Arrow buffers
@@ -532,7 +566,10 @@ class Buffer(_Weakrefable):
     A buffer represents a contiguous memory area.  Many buffers will own
     their memory, though not all of them do.
     """
-    def __len__(self) -> int: ...
+    def __len__(self) -> int:
+        """
+        Return len(self).
+        """
     def _assert_cpu(self) -> None: ...
     @property
     def size(self) -> int:
@@ -565,39 +602,40 @@ class Buffer(_Weakrefable):
         """
         Whether the buffer is CPU-accessible.
         """
+    # TODO
+    # @property
+    # def device(self) -> Device:
+    #     """
+    #     The device where the buffer resides.
+    #
+    #     Returns
+    #     -------
+    #     Device
+    #     """
+    # @property
+    # def memory_manager(self) -> MemoryManager:
+    #     """
+    #     The memory manager associated with the buffer.
+    #
+    #     Returns
+    #     -------
+    #     MemoryManager
+    #     """
+    # @property
+    # def device_type(self) -> DeviceAllocationType:
+    #     """
+    #     The device type where the buffer resides.
+    #
+    #     Returns
+    #     -------
+    #     DeviceAllocationType
+    #     """
     @property
-    def device(self) -> Device:
-        """
-        The device where the buffer resides.
-
-        Returns
-        -------
-        Device
-        """
-    @property
-    def memory_manager(self) -> MemoryManager:
-        """
-        The memory manager associated with the buffer.
-
-        Returns
-        -------
-        MemoryManager
-        """
-    @property
-    def device_type(self) -> DeviceAllocationType:
+    def parent(self) -> Buffer | None: ...
+    def __getitem__(self, key: slice | int) -> Self | int:
         """
-        The device type where the buffer resides.
-
-        Returns
-        -------
-        DeviceAllocationType
+        Return self[key].
         """
-    @property
-    def parent(self) -> Buffer | None: ...
-    @overload
-    def __getitem__(self, key: slice) -> Self: ...
-    @overload
-    def __getitem__(self, key: int) -> int: ...
     def slice(self, offset: int = 0, length: int | None = None) -> Self:
         """
         Slice this buffer.  Memory is not copied.
@@ -635,7 +673,6 @@ class Buffer(_Weakrefable):
         """
         Return this buffer as a Python bytes object. Memory is copied.
         """
-    def __buffer__(self, flags: int, /) -> memoryview: ...
 
 class ResizableBuffer(Buffer):
     """
@@ -656,17 +693,9 @@ class ResizableBuffer(Buffer):
             If this is false, the buffer is never shrunk.
         """
 
-@overload
-def allocate_buffer(size: int, memory_pool: MemoryPool | None = None) -> Buffer: ...
-@overload
-def allocate_buffer(
-    size: int, memory_pool: MemoryPool | None, resizable: Literal[False]
-) -> Buffer: ...
-@overload
 def allocate_buffer(
-    size: int, memory_pool: MemoryPool | None, resizable: Literal[True]
-) -> ResizableBuffer: ...
-def allocate_buffer(*args, **kwargs):
+    size: int, memory_pool: MemoryPool | None = None, resizable: Literal[False] | Literal[True] | None = None
+) -> Buffer | ResizableBuffer:
     """
     Allocate a mutable buffer.
 
@@ -700,7 +729,7 @@ class BufferOutputStream(NativeFile):
 
     >>> import pyarrow as pa
     >>> f = pa.BufferOutputStream()
-    >>> f.write(b"pyarrow.Buffer")
+    >>> f.write(b'pyarrow.Buffer')
     14
     >>> f.closed
     False
@@ -734,19 +763,23 @@ class BufferReader(NativeFile):
     Create an Arrow input stream and inspect it:
 
     >>> import pyarrow as pa
-    >>> data = b"reader data"
+    >>> data = b'reader data'
     >>> buf = memoryview(data)
     >>> with pa.input_stream(buf) as stream:
     ...     stream.size()
     ...     stream.read(6)
     ...     stream.seek(7)
     ...     stream.read(15)
+    ...
     11
     b'reader'
     7
     b'data'
     """
-    def __init__(self, obj) -> None: ...
+    def __init__(self, obj) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
 
 class CompressedInputStream(NativeFile):
     """
@@ -768,6 +801,7 @@ class CompressedInputStream(NativeFile):
     >>> raw = pa.BufferOutputStream()
     >>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
     ...     compressed.write(data)
+    ...
     17
 
     Create an input stream with decompression referencing the
@@ -776,6 +810,7 @@ class CompressedInputStream(NativeFile):
     >>> cdata = raw.getvalue()
     >>> with pa.input_stream(cdata, compression="gzip") as compressed:
     ...     compressed.read()
+    ...
     b'Compressed stream'
 
     which actually translates to the use of ``BufferReader``and
@@ -784,6 +819,7 @@ class CompressedInputStream(NativeFile):
     >>> raw = pa.BufferReader(cdata)
     >>> with pa.CompressedInputStream(raw, "gzip") as compressed:
     ...     compressed.read()
+    ...
     b'Compressed stream'
     """
 
@@ -791,7 +827,10 @@ class CompressedInputStream(NativeFile):
         self,
         stream: StrPath | NativeFile | IOBase,
         compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
-    ) -> None: ...
+    ) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
 
 class CompressedOutputStream(NativeFile):
     """
@@ -813,13 +852,17 @@ class CompressedOutputStream(NativeFile):
     >>> raw = pa.BufferOutputStream()
     >>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
     ...     compressed.write(data)
+    ...
     17
     """
     def __init__(
         self,
         stream: StrPath | NativeFile | IOBase,
         compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
-    ) -> None: ...
+    ) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
 
 class BufferedInputStream(NativeFile):
     """
@@ -838,7 +881,10 @@ class BufferedInputStream(NativeFile):
     """
     def __init__(
         self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
-    ) -> None: ...
+    ) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
     def detach(self) -> NativeFile:
         """
         Release the raw InputStream.
@@ -867,7 +913,10 @@ class BufferedOutputStream(NativeFile):
     """
     def __init__(
         self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
-    ) -> None: ...
+    ) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
     def detach(self) -> NativeFile:
         """
         Flush any buffered writes and release the raw OutputStream.
@@ -890,7 +939,10 @@ class TransformInputStream(NativeFile):
     transform_func : callable
         The transformation to apply.
     """
-    def __init__(self, stream: NativeFile, transform_func: Callable[[Buffer], Any]) -> None: ...
+    def __init__(self, stream: NativeFile, transform_func: Callable[[Buffer], Any]) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
 
 class Transcoder:
     def __init__(self, decoder, encoder) -> None: ...
@@ -986,7 +1038,10 @@ class CacheOptions(_Weakrefable):
         range_size_limit: int | None = None,
         lazy: bool = True,
         prefetch_limit: int = 0,
-    ) -> None: ...
+    ) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
     @classmethod
     def from_network_metrics(
         cls,
@@ -1073,15 +1128,18 @@ class Codec(_Weakrefable):
     Examples
     --------
     >>> import pyarrow as pa
-    >>> pa.Codec.is_available("gzip")
+    >>> pa.Codec.is_available('gzip')
     True
-    >>> codec = pa.Codec("gzip")
+    >>> codec = pa.Codec('gzip')
     >>> codec.name
     'gzip'
     >>> codec.compression_level
     9
     """
-    def __init__(self, compression: Compression, compression_level: int | None = None) -> None: ...
+    def __init__(self, compression: Compression, compression_level: int | None = None) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
     @classmethod
     def detect(cls, path: StrPath) -> Self:
         """
@@ -1166,34 +1224,21 @@ class Codec(_Weakrefable):
         """
     @property
     def name(self) -> Compression:
-        """Returns the name of the codec"""
+        """
+        Returns the name of the codec
+        """
     @property
     def compression_level(self) -> int:
-        """Returns the compression level parameter of the codec"""
-    @overload
-    def compress(
-        self,
-        buf: Buffer | bytes | SupportPyBuffer,
-        *,
-        memory_pool: MemoryPool | None = None,
-    ) -> Buffer: ...
-    @overload
-    def compress(
-        self,
-        buf: Buffer | bytes | SupportPyBuffer,
-        *,
-        asbytes: Literal[False],
-        memory_pool: MemoryPool | None = None,
-    ) -> Buffer: ...
-    @overload
+        """
+        Returns the compression level parameter of the codec
+        """
     def compress(
         self,
         buf: Buffer | bytes | SupportPyBuffer,
         *,
-        asbytes: Literal[True],
+        asbytes: Literal[False] | Literal[True] | None = None,
         memory_pool: MemoryPool | None = None,
-    ) -> bytes: ...
-    def compress(self, *args, **kwargs):
+    ) -> Buffer | bytes:
         """
         Compress data from buffer-like object.
 
@@ -1209,33 +1254,14 @@ class Codec(_Weakrefable):
         -------
         compressed : pyarrow.Buffer or bytes (if asbytes=True)
         """
-    @overload
     def decompress(
         self,
         buf: Buffer | bytes | SupportPyBuffer,
         decompressed_size: int | None = None,
         *,
+        asbytes: Literal[False] | Literal[True] | None = None,
         memory_pool: MemoryPool | None = None,
-    ) -> Buffer: ...
-    @overload
-    def decompress(
-        self,
-        buf: Buffer | bytes | SupportPyBuffer,
-        decompressed_size: int | None = None,
-        *,
-        asbytes: Literal[False],
-        memory_pool: MemoryPool | None = None,
-    ) -> Buffer: ...
-    @overload
-    def decompress(
-        self,
-        buf: Buffer | bytes | SupportPyBuffer,
-        decompressed_size: int | None = None,
-        *,
-        asbytes: Literal[True],
-        memory_pool: MemoryPool | None = None,
-    ) -> bytes: ...
-    def decompress(self, *args, **kwargs):
+    ) -> Buffer | bytes:
         """
         Decompress data from buffer-like object.
 
@@ -1254,30 +1280,13 @@ class Codec(_Weakrefable):
         uncompressed : pyarrow.Buffer or bytes (if asbytes=True)
         """
 
-@overload
-def compress(
-    buf: Buffer | bytes | SupportPyBuffer,
-    codec: Compression = "lz4",
-    *,
-    memory_pool: MemoryPool | None = None,
-) -> Buffer: ...
-@overload
 def compress(
     buf: Buffer | bytes | SupportPyBuffer,
     codec: Compression = "lz4",
     *,
-    asbytes: Literal[False],
+    asbytes: Literal[False] | Literal[True] | None = None,
     memory_pool: MemoryPool | None = None,
-) -> Buffer: ...
-@overload
-def compress(
-    buf: Buffer | bytes | SupportPyBuffer,
-    codec: Compression = "lz4",
-    *,
-    asbytes: Literal[True],
-    memory_pool: MemoryPool | None = None,
-) -> bytes: ...
-def compress(*args, **kwargs):
+) -> Buffer | bytes:
     """
     Compress data from buffer-like object.
 
@@ -1297,33 +1306,14 @@ def compress(*args, **kwargs):
     compressed : pyarrow.Buffer or bytes (if asbytes=True)
     """
 
-@overload
-def decompress(
-    buf: Buffer | bytes | SupportPyBuffer,
-    decompressed_size: int | None = None,
-    codec: Compression = "lz4",
-    *,
-    memory_pool: MemoryPool | None = None,
-) -> Buffer: ...
-@overload
-def decompress(
-    buf: Buffer | bytes | SupportPyBuffer,
-    decompressed_size: int | None = None,
-    codec: Compression = "lz4",
-    *,
-    asbytes: Literal[False],
-    memory_pool: MemoryPool | None = None,
-) -> Buffer: ...
-@overload
 def decompress(
     buf: Buffer | bytes | SupportPyBuffer,
     decompressed_size: int | None = None,
     codec: Compression = "lz4",
     *,
-    asbytes: Literal[True],
+    asbytes: Literal[False] | Literal[True] | None = None,
     memory_pool: MemoryPool | None = None,
-) -> bytes: ...
-def decompress(*args, **kwargs):
+) -> Buffer | bytes:
     """
     Decompress data from buffer-like object.
 
@@ -1376,25 +1366,30 @@ def input_stream(
     >>> buf = memoryview(b"some data")
     >>> with pa.input_stream(buf) as stream:
     ...     stream.read(4)
+    ...
     b'some'
 
     Create a readable OSFile (NativeFile) from a string or file path:
 
     >>> import gzip
-    >>> with gzip.open("example.gz", "wb") as f:
-    ...     f.write(b"some data")
+    >>> with gzip.open('example.gz', 'wb') as f:
+    ...     f.write(b'some data')
+    ...
     9
-    >>> with pa.input_stream("example.gz") as stream:
+    >>> with pa.input_stream('example.gz') as stream:
     ...     stream.read()
+    ...
     b'some data'
 
     Create a readable PythonFile (NativeFile) from a a Python file object:
 
-    >>> with open("example.txt", mode="w") as f:
-    ...     f.write("some text")
+    >>> with open('example.txt', mode='w') as f:
+    ...     f.write('some text')
+    ...
     9
-    >>> with pa.input_stream("example.txt") as stream:
+    >>> with pa.input_stream('example.txt') as stream:
     ...     stream.read(6)
+    ...
     b'some t'
     """
 
@@ -1430,9 +1425,11 @@ def output_stream(
     >>> buf = pa.py_buffer(empty_obj)
     >>> with pa.output_stream(buf) as stream:
     ...     stream.write(data)
+    ...
     11
     >>> with pa.input_stream(buf) as stream:
     ...     stream.read(6)
+    ...
     b'buffer'
 
     or from a memoryview object:
@@ -1440,18 +1437,22 @@ def output_stream(
     >>> buf = memoryview(empty_obj)
     >>> with pa.output_stream(buf) as stream:
     ...     stream.write(data)
+    ...
     11
     >>> with pa.input_stream(buf) as stream:
     ...     stream.read()
+    ...
     b'buffer data'
 
     Create a writable NativeFile from a string or file path:
 
-    >>> with pa.output_stream("example_second.txt") as stream:
-    ...     stream.write(b"Write some data")
+    >>> with pa.output_stream('example_second.txt') as stream:
+    ...     stream.write(b'Write some data')
+    ...
     15
-    >>> with pa.input_stream("example_second.txt") as stream:
+    >>> with pa.input_stream('example_second.txt') as stream:
     ...     stream.read()
+    ...
     b'Write some data'
     """
 
diff --git a/python/pyarrow/__lib_pxi/memory.pyi b/python/pyarrow/__lib_pxi/memory.pyi
index e969e3738b8..4fc723a1950 100644
--- a/python/pyarrow/__lib_pxi/memory.pyi
+++ b/python/pyarrow/__lib_pxi/memory.pyi
@@ -73,7 +73,12 @@ class MemoryPool(_Weakrefable):
         """
 
 class LoggingMemoryPool(MemoryPool): ...
-class ProxyMemoryPool(MemoryPool): ...
+class ProxyMemoryPool(MemoryPool):
+    """
+    Memory pool implementation that tracks the number of bytes and
+    maximum memory allocated through its direct calls, while redirecting
+    to another memory pool.
+    """
 
 def default_memory_pool() -> MemoryPool:
     """
diff --git a/python/pyarrow/__lib_pxi/scalar.pyi b/python/pyarrow/__lib_pxi/scalar.pyi
index c6819f7e863..b979ec43a3a 100644
--- a/python/pyarrow/__lib_pxi/scalar.pyi
+++ b/python/pyarrow/__lib_pxi/scalar.pyi
@@ -19,8 +19,6 @@ import collections.abc
 import datetime as dt
 import sys
 
-from decimal import Decimal
-
 if sys.version_info >= (3, 11):
     from typing import Self
 else:
@@ -29,17 +27,17 @@ if sys.version_info >= (3, 10):
     from typing import TypeAlias
 else:
     from typing_extensions import TypeAlias
-from typing import Any, Generic, Iterator, Literal, Mapping, overload
+from typing import Any, Generic, Iterator, Literal
 
 import numpy as np
 
-from pyarrow._compute import CastOptions
+from pyarrow._compute import CastOptions  # type: ignore[import-not-found]
 from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable
-from typing_extensions import Protocol, TypeVar
+from typing_extensions import  TypeVar
 
 from . import types
 from .types import (
-    _AsPyType,
+    # _AsPyType,
     _DataTypeT,
     _Time32Unit,
     _Time64Unit,
@@ -65,23 +63,13 @@ class Scalar(_Weakrefable, Generic[_DataType_co]):
         """
         Holds a valid (non-null) value.
         """
-    @overload
-    def cast(
-        self,
-        target_type: None,
-        safe: bool = True,
-        options: CastOptions | None = None,
-        memory_pool: MemoryPool | None = None,
-    ) -> Self: ...
-    @overload
     def cast(
         self,
-        target_type: _DataTypeT,
+        target_type: None | _DataTypeT,
         safe: bool = True,
         options: CastOptions | None = None,
         memory_pool: MemoryPool | None = None,
-    ) -> Scalar[_DataTypeT]: ...
-    def cast(self, *args, **kwargs):
+    ) -> Self | Scalar[_DataTypeT]:
         """
         Cast scalar value to another data type.
 
@@ -118,77 +106,21 @@ class Scalar(_Weakrefable, Generic[_DataType_co]):
         ------
         ArrowInvalid
         """
-    def equals(self, other: Scalar) -> bool: ...
-    def __hash__(self) -> int: ...
-    @overload
-    def as_py(
-        self: Scalar[types._BasicDataType[_AsPyType]],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> _AsPyType: ...
-    @overload
-    def as_py(
-        self: Scalar[types.ListType[types._BasicDataType[_AsPyType]]],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[_AsPyType]: ...
-    @overload
-    def as_py(
-        self: Scalar[
-            types.ListType[
-                types.DictionaryType[types._IndexT, types._BasicDataType[_AsPyTypeV], Any]
-            ]
-        ],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[dict[int, _AsPyTypeV]]: ...
-    @overload
-    def as_py(
-        self: Scalar[
-            types.ListType[types.DictionaryType[Any, types._BasicDataType[_AsPyTypeV], Any]],
-        ],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[dict[Any, _AsPyTypeV]]: ...
-    @overload
-    def as_py(
-        self: Scalar[types.ListType[types.DictionaryType[types._IndexT, Any, Any]],],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[dict[int, Any]]: ...
-    @overload
-    def as_py(
-        self: Scalar[types.StructType],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[dict[str, Any]]: ...
-    @overload
-    def as_py(
-        self: Scalar[
-            types.MapType[types._BasicDataType[_AsPyTypeK], types._BasicDataType[_AsPyTypeV]]
-        ],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[tuple[_AsPyTypeK, _AsPyTypeV]]: ...
-    @overload
-    def as_py(
-        self: Scalar[types.MapType[Any, types._BasicDataType[_AsPyTypeV]]],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[tuple[Any, _AsPyTypeV]]: ...
-    @overload
-    def as_py(
-        self: Scalar[types.MapType[types._BasicDataType[_AsPyTypeK], Any]],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[tuple[_AsPyTypeK, Any]]: ...
-    @overload
-    def as_py(
-        self: Scalar[Any],
-        *,
-        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> Any: ...
-    def as_py(self, *args, **kwargs):
+    def equals(self, other: Scalar) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.Scalar
+
+        Returns
+        -------
+        bool
+        """
+    def __hash__(self) -> int:
+        """
+        Return hash(self).
+        """
+    def as_py(self: Scalar[Any], *, maps_as_pydicts: Literal["lossy", "strict"] | None = None) -> Any:
         """
         Return this value as a Python representation.
 
@@ -210,153 +142,413 @@ class Scalar(_Weakrefable, Generic[_DataType_co]):
 _NULL: TypeAlias = None
 NA = _NULL
 
-class NullScalar(Scalar[types.NullType]): ...
-class BooleanScalar(Scalar[types.BoolType]): ...
-class UInt8Scalar(Scalar[types.UInt8Type]): ...
-class Int8Scalar(Scalar[types.Int8Type]): ...
-class UInt16Scalar(Scalar[types.UInt16Type]): ...
-class Int16Scalar(Scalar[types.Int16Type]): ...
-class UInt32Scalar(Scalar[types.Uint32Type]): ...
-class Int32Scalar(Scalar[types.Int32Type]): ...
-class UInt64Scalar(Scalar[types.UInt64Type]): ...
-class Int64Scalar(Scalar[types.Int64Type]): ...
-class HalfFloatScalar(Scalar[types.Float16Type]): ...
-class FloatScalar(Scalar[types.Float32Type]): ...
-class DoubleScalar(Scalar[types.Float64Type]): ...
-class Decimal32Scalar(Scalar[types.Decimal32Type[types._Precision, types._Scale]]): ...
-class Decimal64Scalar(Scalar[types.Decimal64Type[types._Precision, types._Scale]]): ...
-class Decimal128Scalar(Scalar[types.Decimal128Type[types._Precision, types._Scale]]): ...
-class Decimal256Scalar(Scalar[types.Decimal256Type[types._Precision, types._Scale]]): ...
-class Date32Scalar(Scalar[types.Date32Type]): ...
+class NullScalar(Scalar[types.NullType]):
+    """
+    Concrete class for null scalars.
+    """
+class BooleanScalar(Scalar[types.BoolType]):
+    """
+    Concrete class for boolean scalars.
+    """
+class UInt8Scalar(Scalar[types.UInt8Type]):
+    """
+    Concrete class for uint8 scalars.
+    """
+class Int8Scalar(Scalar[types.Int8Type]):
+    """
+    Concrete class for int8 scalars.
+    """
+class UInt16Scalar(Scalar[types.UInt16Type]):
+    """
+    Concrete class for uint16 scalars.
+    """
+class Int16Scalar(Scalar[types.Int16Type]):
+    """
+    Concrete class for int16 scalars.
+    """
+class UInt32Scalar(Scalar[types.Uint32Type]):
+    """
+    Concrete class for uint32 scalars.
+    """
+class Int32Scalar(Scalar[types.Int32Type]):
+    """
+    Concrete class for int32 scalars.
+    """
+class UInt64Scalar(Scalar[types.UInt64Type]):
+    """
+    Concrete class for uint64 scalars.
+    """
+class Int64Scalar(Scalar[types.Int64Type]):
+    """
+    Concrete class for int64 scalars.
+    """
+class HalfFloatScalar(Scalar[types.Float16Type]):
+    """
+    Concrete class for float scalars.
+    """
+class FloatScalar(Scalar[types.Float32Type]):
+    """
+    Concrete class for float scalars.
+    """
+class DoubleScalar(Scalar[types.Float64Type]):
+    """
+    Concrete class for double scalars.
+    """
+class Decimal32Scalar(Scalar[types.Decimal32Type[types._Precision, types._Scale]]):
+    """
+    Concrete class for decimal32 scalars.
+    """
+class Decimal64Scalar(Scalar[types.Decimal64Type[types._Precision, types._Scale]]):
+    """
+    Concrete class for decimal64 scalars.
+    """
+class Decimal128Scalar(Scalar[types.Decimal128Type[types._Precision, types._Scale]]):
+    """
+    Concrete class for decimal128 scalars.
+    """
+class Decimal256Scalar(Scalar[types.Decimal256Type[types._Precision, types._Scale]]):
+    """
+    Concrete class for decimal256 scalars.
+    """
+class Date32Scalar(Scalar[types.Date32Type]):
+    """
+    Concrete class for date32 scalars.
+    """
 
 class Date64Scalar(Scalar[types.Date64Type]):
+    """
+    Concrete class for date64 scalars.
+    """
     @property
     def value(self) -> dt.date | None: ...
 
 class Time32Scalar(Scalar[types.Time32Type[_Time32Unit]]):
+    """
+    Concrete class for time32 scalars.
+    """
     @property
     def value(self) -> dt.time | None: ...
 
 class Time64Scalar(Scalar[types.Time64Type[_Time64Unit]]):
+    """
+    Concrete class for time64 scalars.
+    """
     @property
     def value(self) -> dt.time | None: ...
 
 class TimestampScalar(Scalar[types.TimestampType[_Unit, _Tz]]):
+    """
+    Concrete class for timestamp scalars.
+    """
     @property
     def value(self) -> int | None: ...
 
 class DurationScalar(Scalar[types.DurationType[_Unit]]):
+    """
+    Concrete class for duration scalars.
+    """
     @property
     def value(self) -> dt.timedelta | None: ...
 
 class MonthDayNanoIntervalScalar(Scalar[types.MonthDayNanoIntervalType]):
+    """
+    Concrete class for month, day, nanosecond interval scalars.
+    """
     @property
-    def value(self) -> MonthDayNano | None: ...
+    def value(self) -> MonthDayNano | None:
+        """
+        Same as self.as_py()
+        """
 
 class BinaryScalar(Scalar[types.BinaryType]):
-    def as_buffer(self) -> Buffer: ...
+    """
+    Concrete class for binary-like scalars.
+    """
+    def as_buffer(self) -> Buffer:
+        """
+        Return a view over this value as a Buffer object.
+        """
 
 class LargeBinaryScalar(Scalar[types.LargeBinaryType]):
-    def as_buffer(self) -> Buffer: ...
+    """
+    """
+    def as_buffer(self) -> Buffer:
+        """
+        BinaryScalar.as_buffer(self)
+
+        Return a view over this value as a Buffer object.
+        """
 
 class FixedSizeBinaryScalar(Scalar[types.FixedSizeBinaryType]):
-    def as_buffer(self) -> Buffer: ...
+    """
+    """
+    def as_buffer(self) -> Buffer:
+        """
+        BinaryScalar.as_buffer(self)
+
+        Return a view over this value as a Buffer object.
+        """
 
 class StringScalar(Scalar[types.StringType]):
-    def as_buffer(self) -> Buffer: ...
+    """
+    Concrete class for string-like (utf8) scalars.
+    """
+    def as_buffer(self) -> Buffer:
+        """
+        BinaryScalar.as_buffer(self)
+
+        Return a view over this value as a Buffer object.
+        """
 
 class LargeStringScalar(Scalar[types.LargeStringType]):
-    def as_buffer(self) -> Buffer: ...
+    """
+    """
+    def as_buffer(self) -> Buffer:
+        """
+        BinaryScalar.as_buffer(self)
+
+        Return a view over this value as a Buffer object.
+        """
 
 class BinaryViewScalar(Scalar[types.BinaryViewType]):
-    def as_buffer(self) -> Buffer: ...
+    """
+    """
+    def as_buffer(self) -> Buffer:
+        """
+        BinaryScalar.as_buffer(self)
+
+        Return a view over this value as a Buffer object.
+        """
 
 class StringViewScalar(Scalar[types.StringViewType]):
-    def as_buffer(self) -> Buffer: ...
+    """
+    """
+    def as_buffer(self) -> Buffer:
+        """
+        BinaryScalar.as_buffer(self)
+
+        Return a view over this value as a Buffer object.
+        """
 
 class ListScalar(Scalar[types.ListType[_DataTypeT]]):
+    """
+    Concrete class for list-like scalars.
+    """
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int: ...
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
-    def __iter__(self) -> Iterator[Array]: ...
+    def __len__(self) -> int:
+        """
+        Return the number of values.
+        """
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
+        """
+        Return the value at the given index.
+        """
+    def __iter__(self) -> Iterator[Array]:
+        """
+        Iterate over this element's values.
+        """
 
 class FixedSizeListScalar(Scalar[types.FixedSizeListType[_DataTypeT, types._Size]]):
+    """
+    """
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int: ...
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
-    def __iter__(self) -> Iterator[Array]: ...
+    def __len__(self) -> int:
+        """
+        ListScalar.__len__(self)
+
+        Return the number of values.
+        """
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
+        """
+        ListScalar.__getitem__(self, i)
+
+        Return the value at the given index.
+        """
+    def __iter__(self) -> Iterator[Array]:
+        """
+        ListScalar.__iter__(self)
+
+        Iterate over this element's values.
+        """
 
 class LargeListScalar(Scalar[types.LargeListType[_DataTypeT]]):
+    """
+    """
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int: ...
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
-    def __iter__(self) -> Iterator[Array]: ...
+    def __len__(self) -> int:
+        """
+        ListScalar.__len__(self)
+
+        Return the number of values.
+        """
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
+        """
+        ListScalar.__getitem__(self, i)
+
+        Return the value at the given index.
+        """
+    def __iter__(self) -> Iterator[Array]:
+        """
+        ListScalar.__iter__(self)
+
+        Iterate over this element's values.
+        """
 
 class ListViewScalar(Scalar[types.ListViewType[_DataTypeT]]):
+    """
+    """
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int: ...
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
-    def __iter__(self) -> Iterator[Array]: ...
+    def __len__(self) -> int:
+        """
+        ListScalar.__len__(self)
+
+        Return the number of values.
+        """
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
+        """
+        ListScalar.__getitem__(self, i)
+
+        Return the value at the given index.
+        """
+    def __iter__(self) -> Iterator[Array]:
+        """
+        ListScalar.__iter__(self)
+
+        Iterate over this element's values.
+        """
 
 class LargeListViewScalar(Scalar[types.LargeListViewType[_DataTypeT]]):
+    """
+    """
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int: ...
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
-    def __iter__(self) -> Iterator[Array]: ...
+    def __len__(self) -> int:
+        """
+        ListScalar.__len__(self)
+
+        Return the number of values.
+        """
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
+        """
+        ListScalar.__getitem__(self, i)
+
+        Return the value at the given index.
+        """
+    def __iter__(self) -> Iterator[Array]:
+        """
+        ListScalar.__iter__(self)
+
+        Iterate over this element's values.
+        """
 
 class StructScalar(Scalar[types.StructType], collections.abc.Mapping[str, Scalar]):
-    def __len__(self) -> int: ...
-    def __iter__(self) -> Iterator[str]: ...
-    def __getitem__(self, __key: str) -> Scalar[Any]: ...  # type: ignore[override]
+    """
+    Concrete class for struct scalars.
+    """
+    def __len__(self) -> int:
+        """
+        Return len(self).
+        """
+    def __iter__(self) -> Iterator[str]:
+        """
+        Implement iter(self).
+        """
+    def __getitem__(self, key: int | str) -> Scalar[Any]:
+        """
+        Return the child value for the given field.
+
+        Parameters
+        ----------
+        key : Union[int, str]
+            Index / position or name of the field.
+
+        Returns
+        -------
+        result : Scalar
+        """
     def _as_py_tuple(self) -> list[tuple[str, Any]]: ...
-    def tolist(self) -> list[Any]: ...
 
 class MapScalar(Scalar[types.MapType[types._K, types._ValueT]]):
+    """
+    Concrete class for map scalars.
+    """
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int: ...
-    def __getitem__(self, i: int) -> tuple[Scalar[types._K], types._ValueT, Any]: ...
-    @overload
+    def __len__(self) -> int:
+        """
+        ListScalar.__len__(self)
+
+        Return the number of values.
+        """
+    def __getitem__(self, i: int) -> tuple[Scalar[types._K], types._ValueT, Any]:
+        """
+        Return the value at the given index or key.
+        """
     def __iter__(
         self: Scalar[
-            types.MapType[types._BasicDataType[_AsPyTypeK], types._BasicDataType[_AsPyTypeV]]
-        ],
-    ) -> Iterator[tuple[_AsPyTypeK, _AsPyTypeV]]: ...
-    @overload
-    def __iter__(
-        self: Scalar[types.MapType[Any, types._BasicDataType[_AsPyTypeV]],],
-    ) -> Iterator[tuple[Any, _AsPyTypeV]]: ...
-    @overload
-    def __iter__(
-        self: Scalar[types.MapType[types._BasicDataType[_AsPyTypeK], Any],],
-    ) -> Iterator[tuple[_AsPyTypeK, Any]]: ...
+            types.MapType[types._BasicDataType[_AsPyTypeK], types._BasicDataType[_AsPyTypeV]],]
+            | Scalar[types.MapType[Any, types._BasicDataType[_AsPyTypeV]]]
+            | Scalar[types.MapType[types._BasicDataType[_AsPyTypeK], Any]]
+    ) -> Iterator[tuple[_AsPyTypeK, _AsPyTypeV]] | Iterator[tuple[Any, _AsPyTypeV]] | Iterator[tuple[_AsPyTypeK, Any]]:
+        """
+        Iterate over this element's values.
+        """
 
 class DictionaryScalar(Scalar[types.DictionaryType[types._IndexT, types._BasicValueT]]):
+    """
+    Concrete class for dictionary-encoded scalars.
+    """
     @property
-    def index(self) -> Scalar[types._IndexT]: ...
+    def index(self) -> Scalar[types._IndexT]:
+        """
+        Return this value's underlying index as a scalar.
+        """
     @property
-    def value(self) -> Scalar[types._BasicValueT]: ...
+    def value(self) -> Scalar[types._BasicValueT]:
+        """
+        Return the encoded value as a scalar.
+        """
     @property
     def dictionary(self) -> Array: ...
 
 class RunEndEncodedScalar(Scalar[types.RunEndEncodedType[types._RunEndType, types._BasicValueT]]):
+    """
+    Concrete class for RunEndEncoded scalars.
+    """
     @property
-    def value(self) -> tuple[int, types._BasicValueT] | None: ...
+    def value(self) -> tuple[int, types._BasicValueT] | None:
+        """
+        Return underlying value as a scalar.
+        """
 
 class UnionScalar(Scalar[types.UnionType]):
+    """
+    Concrete class for Union scalars.
+    """
     @property
-    def value(self) -> Any | None: ...
+    def value(self) -> Any | None:
+        """
+        Return underlying value as a scalar.
+        """
     @property
-    def type_code(self) -> str: ...
+    def type_code(self) -> str:
+        """
+        Return the union type code for this scalar.
+        """
 
 class ExtensionScalar(Scalar[types.ExtensionType]):
+    """
+    Concrete class for Extension scalars.
+    """
     @property
-    def value(self) -> Any | None: ...
+    def value(self) -> Any | None:
+        """
+        Return storage value as a scalar.
+        """
     @staticmethod
     def from_storage(typ: types.BaseExtensionType, value) -> ExtensionScalar:
         """
@@ -374,12 +566,27 @@ class ExtensionScalar(Scalar[types.ExtensionType]):
         ext_scalar : ExtensionScalar
         """
 
-class Bool8Scalar(Scalar[types.Bool8Type]): ...
-class UuidScalar(Scalar[types.UuidType]): ...
-class JsonScalar(Scalar[types.JsonType]): ...
-class OpaqueScalar(Scalar[types.OpaqueType]): ...
+class Bool8Scalar(Scalar[types.Bool8Type]):
+    """
+    Concrete class for bool8 extension scalar.
+    """
+class UuidScalar(Scalar[types.UuidType]):
+    """
+    Concrete class for Uuid extension scalar.
+    """
+class JsonScalar(Scalar[types.JsonType]):
+    """
+    Concrete class for JSON extension scalar.
+    """
+class OpaqueScalar(Scalar[types.OpaqueType]):
+    """
+    Concrete class for opaque extension scalar.
+    """
 
 class FixedShapeTensorScalar(ExtensionScalar):
+    """
+    Concrete class for fixed shape tensor extension scalar.
+    """
     def to_numpy(self) -> np.ndarray:
         """
         Convert fixed shape tensor scalar to a numpy.ndarray.
@@ -405,542 +612,13 @@ class FixedShapeTensorScalar(ExtensionScalar):
             Tensor represented stored in FixedShapeTensorScalar.
         """
 
-_V = TypeVar("_V")
-
-class NullableCollection(Protocol[_V]):  # pyright: ignore[reportInvalidTypeVarUse]
-    def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ...
-    def __len__(self) -> int: ...
-    def __contains__(self, item: Any, /) -> bool: ...
-
-@overload
-def scalar(
-    value: str,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> StringScalar: ...
-@overload
-def scalar(
-    value: bytes,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryScalar: ...
-@overload
-def scalar(  # pyright: ignore[reportOverlappingOverload]
-    value: bool,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> BooleanScalar: ...
-@overload
-def scalar(
-    value: int,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Int64Scalar: ...
-@overload
-def scalar(
-    value: float,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> DoubleScalar: ...
-@overload
-def scalar(
-    value: Decimal,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Decimal128Scalar: ...
-@overload
-def scalar(  # pyright: ignore[reportOverlappingOverload]
-    value: dt.datetime,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> TimestampScalar[Literal["us"]]: ...
-@overload
-def scalar(
-    value: dt.date,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Date32Scalar: ...
-@overload
-def scalar(
-    value: dt.time,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Time64Scalar[Literal["us"]]: ...
-@overload
-def scalar(
-    value: dt.timedelta,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> DurationScalar[Literal["us"]]: ...
-@overload
-def scalar(  # pyright: ignore[reportOverlappingOverload]
-    value: MonthDayNano,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> MonthDayNanoIntervalScalar: ...
-@overload
-def scalar(
-    value: Mapping[str, Any],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> StructScalar: ...
-@overload
-def scalar(
-    value: NullableCollection[str],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.StringType]]: ...
-@overload
-def scalar(
-    value: NullableCollection[bytes],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.BinaryType]]: ...
-@overload
-def scalar(
-    value: NullableCollection[bool],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.BoolType]]: ...
-@overload
-def scalar(
-    value: NullableCollection[int],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.Int64Type]]: ...
-@overload
-def scalar(
-    value: NullableCollection[float],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.Float64Type]]: ...
-@overload
-def scalar(
-    value: NullableCollection[Decimal],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.Decimal32Type]]: ...
-@overload
-def scalar(
-    value: NullableCollection[dt.datetime],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.TimestampType[Literal["us"]]]]: ...
-@overload
-def scalar(
-    value: NullableCollection[dt.date],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.Date32Type]]: ...
-@overload
-def scalar(
-    value: NullableCollection[dt.time],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.Time64Type[Literal["us"]]]]: ...
-@overload
-def scalar(
-    value: NullableCollection[dt.timedelta],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.DurationType[Literal["us"]]]]: ...
-@overload
-def scalar(
-    value: NullableCollection[MonthDayNano],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[types.ListType[types.MonthDayNanoIntervalType]]: ...
-@overload
-def scalar(
-    value: NullableCollection[Any],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[Any]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.NullType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> NullScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.BoolType | Literal["bool"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> BooleanScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.UInt8Type | Literal["uint8"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> UInt8Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Int8Type | Literal["int8"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Int8Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.UInt16Type | Literal["uint16"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> UInt16Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Int16Type | Literal["int16"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Int16Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Uint32Type | Literal["uint32"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> UInt32Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Int32Type | Literal["int32"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Int32Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.UInt64Type | Literal["uint64"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> UInt64Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Int64Type | Literal["int64"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Int64Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Float16Type | Literal["f16"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> HalfFloatScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Float32Type | Literal["f32"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> FloatScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Float64Type | Literal["f64"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> DoubleScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Date32Type,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Date32Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Date64Type,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Date64Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.MonthDayNanoIntervalType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> MonthDayNanoIntervalScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.StringType | Literal["string"],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> StringScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.LargeStringType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> LargeStringScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.StringViewType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> StringViewScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.BinaryType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.LargeBinaryType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> LargeBinaryScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.BinaryViewType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> BinaryViewScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.TimestampType[types._Unit, types._Tz],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> TimestampScalar[types._Unit, types._Tz]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Time32Type[types._Time32Unit],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Time32Scalar[types._Time32Unit]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Time64Type[types._Time64Unit],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Time64Scalar[types._Time64Unit]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.DurationType[types._Unit],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> DurationScalar[types._Unit]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Decimal32Type[types._Precision, types._Scale],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Decimal32Scalar[types._Precision, types._Scale]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Decimal64Type[types._Precision, types._Scale],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Decimal64Scalar[types._Precision, types._Scale]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Decimal128Type[types._Precision, types._Scale],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Decimal128Scalar[types._Precision, types._Scale]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Decimal256Type[types._Precision, types._Scale],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Decimal256Scalar[types._Precision, types._Scale]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.ListType[_DataTypeT],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListScalar[_DataTypeT]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.LargeListType[_DataTypeT],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> LargeListScalar[_DataTypeT]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.ListViewType[_DataTypeT],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> ListViewScalar[_DataTypeT]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.LargeListViewType[_DataTypeT],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> LargeListViewScalar[_DataTypeT]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.FixedSizeListType[_DataTypeT, types._Size],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> FixedSizeListScalar[_DataTypeT, types._Size]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.DictionaryType[types._IndexT, types._BasicValueT],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> DictionaryScalar[types._IndexT, types._BasicValueT]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.MapType[types._K, types._ValueT],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> MapScalar[types._K, types._ValueT]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.StructType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> StructScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.UnionType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> UnionScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.RunEndEncodedType[types._RunEndType, types._BasicValueT],
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> RunEndEncodedScalar[types._RunEndType, types._BasicValueT]: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.Bool8Type,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> Bool8Scalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.UuidType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> UuidScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.JsonType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> JsonScalar: ...
-@overload
-def scalar(
-    value: Any,
-    type: types.OpaqueType,
-    *,
-    from_pandas: bool | None = None,
-    memory_pool: MemoryPool | None = None,
-) -> OpaqueScalar: ...
-@overload
 def scalar(
     value: Any,
     type: _DataTypeT,
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
-) -> Scalar[_DataTypeT]: ...
-def scalar(*args, **kwargs):
+) -> Scalar[_DataTypeT]:
     """
     Create a pyarrow.Scalar instance from a Python object.
 
@@ -1032,5 +710,4 @@ __all__ = [
     "JsonScalar",
     "OpaqueScalar",
     "scalar",
-    "NullableCollection",
 ]
diff --git a/python/pyarrow/__lib_pxi/tensor.pyi b/python/pyarrow/__lib_pxi/tensor.pyi
index 5ad950c84d0..ac34fa08ffc 100644
--- a/python/pyarrow/__lib_pxi/tensor.pyi
+++ b/python/pyarrow/__lib_pxi/tensor.pyi
@@ -26,7 +26,7 @@ import numpy as np
 
 from pyarrow.lib import _Weakrefable
 from scipy.sparse import coo_matrix, csr_matrix
-from sparse import COO
+from sparse import COO  # type: ignore
 
 class Tensor(_Weakrefable):
     """
@@ -37,7 +37,7 @@ class Tensor(_Weakrefable):
     >>> import pyarrow as pa
     >>> import numpy as np
     >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-    >>> pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+    >>> pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
     <pyarrow.Tensor>
     type: int32
     shape: (2, 3)
@@ -61,7 +61,7 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         <pyarrow.Tensor>
         type: int32
         shape: (2, 3)
@@ -76,7 +76,7 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> tensor.to_numpy()
         array([[  2,   2,   4],
                [  4,   5, 100]], dtype=int32)
@@ -95,9 +95,9 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> y = np.array([[2, 2, 4], [4, 5, 10]], np.int32)
-        >>> tensor2 = pa.Tensor.from_numpy(y, dim_names=["a", "b"])
+        >>> tensor2 = pa.Tensor.from_numpy(y, dim_names=["a","b"])
         >>> tensor.equals(tensor)
         True
         >>> tensor.equals(tensor2)
@@ -117,7 +117,7 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> tensor.dim_name(0)
         'dim1'
         >>> tensor.dim_name(1)
@@ -133,7 +133,7 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> tensor.dim_names
         ['dim1', 'dim2']
         """
@@ -147,7 +147,7 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> tensor.is_mutable
         True
         """
@@ -161,7 +161,7 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> tensor.is_contiguous
         True
         """
@@ -175,7 +175,7 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> tensor.ndim
         2
         """
@@ -189,7 +189,7 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> tensor.size
         6
         """
@@ -203,7 +203,7 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> tensor.shape
         (2, 3)
         """
@@ -217,12 +217,15 @@ class Tensor(_Weakrefable):
         >>> import pyarrow as pa
         >>> import numpy as np
         >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1", "dim2"])
+        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
         >>> tensor.strides
         (12, 4)
         """
 
 class SparseCOOTensor(_Weakrefable):
+    """
+    A sparse COO tensor.
+    """
     @classmethod
     def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
         """
@@ -265,12 +268,12 @@ class SparseCOOTensor(_Weakrefable):
     @classmethod
     def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
         """
-        Convert scipy.sparse.coo_matrix to arrow::SparseCOOTensor
+        Convert scipy.sparse.coo_array or scipy.sparse.coo_matrix to arrow::SparseCOOTensor
 
         Parameters
         ----------
-        obj : scipy.sparse.csr_matrix
-            The scipy matrix that should be converted.
+        obj : scipy.sparse.coo_array or scipy.sparse.coo_matrix
+            The scipy array or matrix that should be converted.
         dim_names : list, optional
             Names of the dimensions.
         """
@@ -302,7 +305,7 @@ class SparseCOOTensor(_Weakrefable):
         """
     def to_scipy(self) -> coo_matrix:
         """
-        Convert arrow::SparseCOOTensor to scipy.sparse.coo_matrix.
+        Convert arrow::SparseCOOTensor to scipy.sparse.coo_array.
         """
     def to_pydata_sparse(self) -> COO:
         """
@@ -399,11 +402,11 @@ class SparseCSRMatrix(_Weakrefable):
     @classmethod
     def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
         """
-        Convert scipy.sparse.csr_matrix to arrow::SparseCSRMatrix.
+        Convert scipy.sparse.csr_array or scipy.sparse.csr_matrix to arrow::SparseCSRMatrix.
 
         Parameters
         ----------
-        obj : scipy.sparse.csr_matrix
+        obj : scipy.sparse.csr_array or scipy.sparse.csr_matrix
             The scipy matrix that should be converted.
         dim_names : list, optional
             Names of the dimensions.
@@ -424,7 +427,7 @@ class SparseCSRMatrix(_Weakrefable):
         """
     def to_scipy(self) -> csr_matrix:
         """
-        Convert arrow::SparseCSRMatrix to scipy.sparse.csr_matrix.
+        Convert arrow::SparseCSRMatrix to scipy.sparse.csr_array.
         """
     def to_tensor(self) -> Tensor:
         """
@@ -515,11 +518,11 @@ class SparseCSCMatrix(_Weakrefable):
     @classmethod
     def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
         """
-        Convert scipy.sparse.csc_matrix to arrow::SparseCSCMatrix
+        Convert scipy.sparse.csc_array or scipy.sparse.csc_matrix to arrow::SparseCSCMatrix
 
         Parameters
         ----------
-        obj : scipy.sparse.csc_matrix
+        obj : scipy.sparse.csc_array or scipy.sparse.csc_matrix
             The scipy matrix that should be converted.
         dim_names : list, optional
             Names of the dimensions.
@@ -540,7 +543,7 @@ class SparseCSCMatrix(_Weakrefable):
         """
     def to_scipy(self) -> csr_matrix:
         """
-        Convert arrow::SparseCSCMatrix to scipy.sparse.csc_matrix
+        Convert arrow::SparseCSCMatrix to scipy.sparse.csc_array
         """
     def to_tensor(self) -> Tensor:
         """
diff --git a/python/pyarrow/__lib_pxi/types.pyi b/python/pyarrow/__lib_pxi/types.pyi
index aa965e3506c..27a2c75d68d 100644
--- a/python/pyarrow/__lib_pxi/types.pyi
+++ b/python/pyarrow/__lib_pxi/types.pyi
@@ -26,19 +26,20 @@ if sys.version_info >= (3, 11):
 else:
     from typing_extensions import Self
 
-from typing import Any, Generic, Iterable, Iterator, Literal, overload
+from typing import Any, Generic, Iterable, Iterator, Literal
 
 import numpy as np
 import pandas as pd
 
 from pyarrow._stubs_typing import SupportArrowSchema
+# TODO
 from pyarrow.lib import (
     Array,
-    ChunkedArray,
+    # ChunkedArray,
     ExtensionArray,
     MemoryPool,
     MonthDayNano,
-    Table,
+    # Table,
 )
 from typing_extensions import TypeVar, deprecated
 
@@ -119,7 +120,7 @@ class DataType(_Weakrefable):
         ListType(list<item: string>)
         >>> pa.list_(pa.string()).num_fields
         1
-        >>> struct = pa.struct({"x": pa.int32(), "y": pa.string()})
+        >>> struct = pa.struct({'x': pa.int32(), 'y': pa.string()})
         >>> struct.num_fields
         2
         """
@@ -137,7 +138,10 @@ class DataType(_Weakrefable):
         >>> pa.string().num_buffers
         3
         """
-    def __hash__(self) -> int: ...
+    def __hash__(self) -> int:
+        """
+        Return hash(self).
+        """
     def equals(self, other: DataType | str, *, check_metadata: bool = False) -> bool:
         """
         Return true if type is equivalent to passed value.
@@ -240,12 +244,12 @@ class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
 
     Create an instance of timestamp type:
 
-    >>> pa.timestamp("us")
+    >>> pa.timestamp('us')
     TimestampType(timestamp[us])
 
     Create an instance of timestamp type with timezone:
 
-    >>> pa.timestamp("s", tz="UTC")
+    >>> pa.timestamp('s', tz='UTC')
     TimestampType(timestamp[s, tz=UTC])
     """
     @property
@@ -256,7 +260,7 @@ class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> t = pa.timestamp("us")
+        >>> t = pa.timestamp('us')
         >>> t.unit
         'us'
         """
@@ -268,7 +272,7 @@ class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> t = pa.timestamp("s", tz="UTC")
+        >>> t = pa.timestamp('s', tz='UTC')
         >>> t.tz
         'UTC'
         """
@@ -287,7 +291,7 @@ class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]):
     Create an instance of time32 type:
 
     >>> import pyarrow as pa
-    >>> pa.time32("ms")
+    >>> pa.time32('ms')
     Time32Type(time32[ms])
     """
     @property
@@ -298,7 +302,7 @@ class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> t = pa.time32("ms")
+        >>> t = pa.time32('ms')
         >>> t.unit
         'ms'
         """
@@ -317,7 +321,7 @@ class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]):
     Create an instance of time64 type:
 
     >>> import pyarrow as pa
-    >>> pa.time64("us")
+    >>> pa.time64('us')
     Time64Type(time64[us])
     """
     @property
@@ -328,7 +332,7 @@ class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> t = pa.time64("us")
+        >>> t = pa.time64('us')
         >>> t.unit
         'us'
         """
@@ -342,7 +346,7 @@ class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]):
     Create an instance of duration type:
 
     >>> import pyarrow as pa
-    >>> pa.duration("s")
+    >>> pa.duration('s')
     DurationType(duration[s])
     """
     @property
@@ -353,7 +357,7 @@ class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> t = pa.duration("s")
+        >>> t = pa.duration('s')
         >>> t.unit
         's'
         """
@@ -860,17 +864,17 @@ class StructType(DataType):
 
     Accessing fields using direct indexing:
 
-    >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+    >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()})
     >>> struct_type[0]
     pyarrow.Field<x: int32>
-    >>> struct_type["y"]
+    >>> struct_type['y']
     pyarrow.Field<y: string>
 
     Accessing fields using ``field()``:
 
     >>> struct_type.field(1)
     pyarrow.Field<y: string>
-    >>> struct_type.field("x")
+    >>> struct_type.field('x')
     pyarrow.Field<x: int32>
 
     # Creating a schema from the struct type's fields:
@@ -897,16 +901,16 @@ class StructType(DataType):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+        >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()})
 
         Index of the field with a name 'y':
 
-        >>> struct_type.get_field_index("y")
+        >>> struct_type.get_field_index('y')
         1
 
         Index of the field that does not exist:
 
-        >>> struct_type.get_field_index("z")
+        >>> struct_type.get_field_index('z')
         -1
         """
     def field(self, i: int | str) -> Field:
@@ -925,7 +929,7 @@ class StructType(DataType):
         --------
 
         >>> import pyarrow as pa
-        >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
+        >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()})
 
         Select the second field:
 
@@ -934,7 +938,7 @@ class StructType(DataType):
 
         Select the field named 'x':
 
-        >>> struct_type.field("x")
+        >>> struct_type.field('x')
         pyarrow.Field<x: int32>
         """
     def get_all_field_indices(self, name: str) -> list[int]:
@@ -953,12 +957,18 @@ class StructType(DataType):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> struct_type = pa.struct({"x": pa.int32(), "y": pa.string()})
-        >>> struct_type.get_all_field_indices("x")
+        >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()})
+        >>> struct_type.get_all_field_indices('x')
         [0]
         """
-    def __len__(self) -> int: ...
-    def __iter__(self) -> Iterator[Field]: ...
+    def __len__(self) -> int:
+        """
+        Like num_fields().
+        """
+    def __iter__(self) -> Iterator[Field]:
+        """
+        Iterate over struct fields, in order.
+        """
     __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
     @property
     def names(self) -> list[str]:
@@ -968,7 +978,7 @@ class StructType(DataType):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> struct_type = pa.struct([("a", pa.int64()), ("b", pa.float64()), ("c", pa.string())])
+        >>> struct_type = pa.struct([('a', pa.int64()), ('b', pa.float64()), ('c', pa.string())])
         >>> struct_type.names
         ['a', 'b', 'c']
         """
@@ -980,7 +990,7 @@ class StructType(DataType):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> struct_type = pa.struct([("a", pa.int64()), ("b", pa.float64()), ("c", pa.string())])
+        >>> struct_type = pa.struct([('a', pa.int64()), ('b', pa.float64()), ('c', pa.string())])
         >>> struct_type.fields
         [pyarrow.Field<a: int64>, pyarrow.Field<b: double>, pyarrow.Field<c: string>]
         """
@@ -994,32 +1004,24 @@ class UnionType(DataType):
     Create an instance of a dense UnionType using ``pa.union``:
 
     >>> import pyarrow as pa
-    >>> (
-    ...     pa.union(
-    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
-    ...         mode=pa.lib.UnionMode_DENSE,
-    ...     ),
-    ... )
+    >>> pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())],
+    ...          mode=pa.lib.UnionMode_DENSE),
     (DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>),)
 
     Create an instance of a dense UnionType using ``pa.dense_union``:
 
-    >>> pa.dense_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    >>> pa.dense_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
     DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>)
 
     Create an instance of a sparse UnionType using ``pa.union``:
 
-    >>> (
-    ...     pa.union(
-    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
-    ...         mode=pa.lib.UnionMode_SPARSE,
-    ...     ),
-    ... )
+    >>> pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())],
+    ...          mode=pa.lib.UnionMode_SPARSE),
     (SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>),)
 
     Create an instance of a sparse UnionType using ``pa.sparse_union``:
 
-    >>> pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    >>> pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
     SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>)
     """
     @property
@@ -1030,7 +1032,7 @@ class UnionType(DataType):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> union = pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
         >>> union.mode
         'sparse'
         """
@@ -1042,12 +1044,18 @@ class UnionType(DataType):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> union = pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
         >>> union.type_codes
         [0, 1]
         """
-    def __len__(self) -> int: ...
-    def __iter__(self) -> Iterator[Field]: ...
+    def __len__(self) -> int:
+        """
+        Like num_fields().
+        """
+    def __iter__(self) -> Iterator[Field]:
+        """
+        Iterate over union members, in order.
+        """
     def field(self, i: int) -> Field:
         """
         Return a child field by its numeric index.
@@ -1063,7 +1071,7 @@ class UnionType(DataType):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> union = pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
         >>> union[0]
         pyarrow.Field<a: fixed_size_binary[10]>
         """
@@ -1078,21 +1086,27 @@ class SparseUnionType(UnionType):
     Create an instance of a sparse UnionType using ``pa.union``:
 
     >>> import pyarrow as pa
-    >>> (
-    ...     pa.union(
-    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
-    ...         mode=pa.lib.UnionMode_SPARSE,
-    ...     ),
-    ... )
+    >>> pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())],
+    ...          mode=pa.lib.UnionMode_SPARSE),
     (SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>),)
 
     Create an instance of a sparse UnionType using ``pa.sparse_union``:
 
-    >>> pa.sparse_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    >>> pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
     SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>)
     """
     @property
-    def mode(self) -> Literal["sparse"]: ...
+    def mode(self) -> Literal["sparse"]:
+        """
+        The mode of the union ("dense" or "sparse").
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
+        >>> union.mode
+        'sparse'
+        """
 
 class DenseUnionType(UnionType):
     """
@@ -1103,22 +1117,28 @@ class DenseUnionType(UnionType):
     Create an instance of a dense UnionType using ``pa.union``:
 
     >>> import pyarrow as pa
-    >>> (
-    ...     pa.union(
-    ...         [pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
-    ...         mode=pa.lib.UnionMode_DENSE,
-    ...     ),
-    ... )
+    >>> pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())],
+    ...          mode=pa.lib.UnionMode_DENSE),
     (DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>),)
 
     Create an instance of a dense UnionType using ``pa.dense_union``:
 
-    >>> pa.dense_union([pa.field("a", pa.binary(10)), pa.field("b", pa.string())])
+    >>> pa.dense_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
     DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>)
     """
 
     @property
-    def mode(self) -> Literal["dense"]: ...
+    def mode(self) -> Literal["dense"]:
+        """
+        The mode of the union ("dense" or "sparse").
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
+        >>> union.mode
+        'sparse'
+        """
 
 _RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type)
 
@@ -1131,7 +1151,9 @@ class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]):
     @property
     def value_type(self) -> _BasicValueT: ...
 
-_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
+# TODO: replace below with:
+# _StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
+_StorageT = TypeVar("_StorageT", bound=Array | Any)
 
 class BaseExtensionType(DataType):
     """
@@ -1155,7 +1177,19 @@ class BaseExtensionType(DataType):
         """
         The underlying storage type.
         """
-    def wrap_array(self, storage: _StorageT) -> _StorageT: ...
+    def wrap_array(self, storage: _StorageT) -> _StorageT:
+        """
+        Wrap the given storage array as an extension array.
+
+        Parameters
+        ----------
+        storage : Array or ChunkedArray
+
+        Returns
+        -------
+        array : Array or ChunkedArray
+            Extension array wrapping the storage array
+        """
 
 class ExtensionType(BaseExtensionType):
     """
@@ -1219,7 +1253,7 @@ class ExtensionType(BaseExtensionType):
     ...         {"numer": 10, "denom": 17},
     ...         {"numer": 20, "denom": 13},
     ...     ],
-    ...     type=rational_type.storage_type,
+    ...     type=rational_type.storage_type
     ... )
     >>> rational_array = rational_type.wrap_array(storage_array)
     >>> rational_array
@@ -1264,7 +1298,13 @@ class ExtensionType(BaseExtensionType):
     ``__arrow_ext_deserialize__``.
     """
 
-    def __init__(self, storage_type: DataType, extension_name: str) -> None: ...
+    def __init__(self, storage_type: DataType, extension_name: str) -> None:
+        """
+        Initialize an extension type instance.
+
+        This should be called at the end of the subclass'
+        ``__init__`` method.
+        """
     def __arrow_ext_serialize__(self) -> bytes:
         """
         Serialized representation of metadata to reconstruct the type object.
@@ -1301,7 +1341,8 @@ class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]):
     Create an instance of fixed shape tensor extension type with
     permutation:
 
-    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3), permutation=[0, 2, 1])
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3),
+    ...                                     permutation=[0, 2, 1])
     >>> tensor_type.permutation
     [0, 2, 1]
     """
@@ -1397,37 +1438,37 @@ class OpaqueType(BaseExtensionType):
         The name of the external system.
         """
 
-@deprecated(
-    "This class is deprecated and its deserialization is disabled by default. "
-    ":class:`ExtensionType` is recommended instead."
-)
-class PyExtensionType(ExtensionType):
-    """
-    Concrete base class for Python-defined extension types based on pickle
-    for (de)serialization.
-
-    .. warning::
-       This class is deprecated and its deserialization is disabled by default.
-       :class:`ExtensionType` is recommended instead.
-
-    Parameters
-    ----------
-    storage_type : DataType
-        The storage type for which the extension is built.
-    """
-    def __init__(self, storage_type: DataType) -> None: ...
-    @classmethod
-    def set_auto_load(cls, value: bool) -> None:
-        """
-        Enable or disable auto-loading of serialized PyExtensionType instances.
-
-        Parameters
-        ----------
-        value : bool
-            Whether to enable auto-loading.
-        """
+# @deprecated(
+#     "This class is deprecated and its deserialization is disabled by default. "
+#     ":class:`ExtensionType` is recommended instead."
+# )
+# class PyExtensionType(ExtensionType):
+#     """
+#     Concrete base class for Python-defined extension types based on pickle
+#     for (de)serialization.
+#
+#     .. warning::
+#        This class is deprecated and its deserialization is disabled by default.
+#        :class:`ExtensionType` is recommended instead.
+#
+#     Parameters
+#     ----------
+#     storage_type : DataType
+#         The storage type for which the extension is built.
+#     """
+#     def __init__(self, storage_type: DataType) -> None: ...
+#     @classmethod
+#     def set_auto_load(cls, value: bool) -> None:
+#         """
+#         Enable or disable auto-loading of serialized PyExtensionType instances.
+#
+#         Parameters
+#         ----------
+#         value : bool
+#             Whether to enable auto-loading.
+#         """
 
-class UnknownExtensionType(PyExtensionType):  # type: ignore
+class UnknownExtensionType(ExtensionType):  # type: ignore
     """
     A concrete class for Python-defined extension types that refer to
     an unknown Python implementation.
@@ -1439,9 +1480,12 @@ class UnknownExtensionType(PyExtensionType):  # type: ignore
     serialized : bytes
         The serialised output.
     """
-    def __init__(self, storage_type: DataType, serialized: bytes) -> None: ...
+    def __init__(self, storage_type: DataType, serialized: bytes) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
 
-def register_extension_type(ext_type: PyExtensionType) -> None:  # type: ignore
+def register_extension_type(ext_type: ExtensionType) -> None:  # type: ignore
     """
     Register a Python extension type.
 
@@ -1549,23 +1593,52 @@ class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]):
     **kwargs : optional
         additional key-value metadata
     """
-    def __init__(self, __arg0__: Mapping[bytes, bytes] | Mapping[str, str] | None = None, **kwargs) -> None: ...
-    def equals(self, other: KeyValueMetadata) -> bool: ...
-    def __len__(self) -> int: ...
-    def __contains__(self, __key: object) -> bool: ...
-    def __getitem__(self, __key: Any) -> Any: ...
-    def __iter__(self) -> Iterator[bytes]: ...
-    def get_all(self, key: str) -> list[bytes]: ...
+    def __init__(self, __arg0__: Mapping[bytes, bytes] | None = None, **kwargs) -> None:
+        """
+        Initialize self.  See help(type(self)) for accurate signature.
+        """
+    def equals(self, other: KeyValueMetadata) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.KeyValueMetadata
+
+        Returns
+        -------
+        bool
+        """
+    def __len__(self) -> int:
+        """
+        Return len(self).
+        """
+    def __contains__(self, __key: object) -> bool:
+        """
+        Return bool(key in self).
+        """
+    def __getitem__(self, __key: Any) -> Any:
+        """
+        Return self[key].
+        """
+    def __iter__(self) -> Iterator[bytes]:
+        """
+        Implement iter(self).
+        """
+    def get_all(self, key: str) -> list[bytes]:
+        """
+        Parameters
+        ----------
+        key : str
+
+        Returns
+        -------
+        list[byte]
+        """
     def to_dict(self) -> dict[bytes, bytes]:
         """
         Convert KeyValueMetadata to dict. If a key occurs twice, the value for
         the first one is returned
         """
 
-def ensure_metadata(
-    meta: Mapping[bytes | str, bytes | str] | KeyValueMetadata | None, allow_none: bool = False
-) -> KeyValueMetadata | None: ...
-
 class Field(_Weakrefable, Generic[_DataTypeT]):
     """
     A named field, with a data type, nullability, and optional metadata.
@@ -1579,11 +1652,12 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
     Create an instance of pyarrow.Field:
 
     >>> import pyarrow as pa
-    >>> pa.field("key", pa.int32())
+    >>> pa.field('key', pa.int32())
     pyarrow.Field<key: int32>
-    >>> pa.field("key", pa.int32(), nullable=False)
+    >>> pa.field('key', pa.int32(), nullable=False)
     pyarrow.Field<key: int32 not null>
-    >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+    >>> field = pa.field('key', pa.int32(),
+    ...                  metadata={"key": "Something important"})
     >>> field
     pyarrow.Field<key: int32>
     >>> field.metadata
@@ -1612,14 +1686,17 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> f1 = pa.field("key", pa.int32())
-        >>> f2 = pa.field("key", pa.int32(), nullable=False)
+        >>> f1 = pa.field('key', pa.int32())
+        >>> f2 = pa.field('key', pa.int32(), nullable=False)
         >>> f1.equals(f2)
         False
         >>> f1.equals(f1)
         True
         """
-    def __hash__(self) -> int: ...
+    def __hash__(self) -> int:
+        """
+        Return hash(self).
+        """
     @property
     def nullable(self) -> bool:
         """
@@ -1628,8 +1705,8 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> f1 = pa.field("key", pa.int32())
-        >>> f2 = pa.field("key", pa.int32(), nullable=False)
+        >>> f1 = pa.field('key', pa.int32())
+        >>> f2 = pa.field('key', pa.int32(), nullable=False)
         >>> f1.nullable
         True
         >>> f2.nullable
@@ -1643,7 +1720,7 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> field = pa.field("key", pa.int32())
+        >>> field = pa.field('key', pa.int32())
         >>> field.name
         'key'
         """
@@ -1659,7 +1736,8 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+        >>> field = pa.field('key', pa.int32(),
+        ...                  metadata={"key": "Something important"})
         >>> field.metadata
         {b'key': b'Something important'}
         """
@@ -1681,7 +1759,7 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> field = pa.field("key", pa.int32())
+        >>> field = pa.field('key', pa.int32())
 
         Create new field by adding metadata to existing one:
 
@@ -1702,7 +1780,8 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+        >>> field = pa.field('key', pa.int32(),
+        ...                  metadata={"key": "Something important"})
         >>> field.metadata
         {b'key': b'Something important'}
 
@@ -1726,7 +1805,7 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> field = pa.field("key", pa.int32())
+        >>> field = pa.field('key', pa.int32())
         >>> field
         pyarrow.Field<key: int32>
 
@@ -1751,13 +1830,13 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> field = pa.field("key", pa.int32())
+        >>> field = pa.field('key', pa.int32())
         >>> field
         pyarrow.Field<key: int32>
 
         Create new field by replacing the name of an existing one:
 
-        >>> field_new = field.with_name("lock")
+        >>> field_new = field.with_name('lock')
         >>> field_new
         pyarrow.Field<lock: int32>
         """
@@ -1776,7 +1855,7 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> field = pa.field("key", pa.int32())
+        >>> field = pa.field('key', pa.int32())
         >>> field
         pyarrow.Field<key: int32>
         >>> field.nullable
@@ -1802,9 +1881,9 @@ class Field(_Weakrefable, Generic[_DataTypeT]):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> f1 = pa.field("bar", pa.float64(), nullable=False)
-        >>> f2 = pa.field("foo", pa.int32()).with_metadata({"key": "Something important"})
-        >>> ff = pa.field("ff", pa.struct([f1, f2]), nullable=False)
+        >>> f1 = pa.field('bar', pa.float64(), nullable=False)
+        >>> f2 = pa.field('foo', pa.int32()).with_metadata({"key": "Something important"})
+        >>> ff = pa.field('ff', pa.struct([f1, f2]), nullable=False)
 
         Flatten a struct field:
 
@@ -1865,27 +1944,42 @@ class Schema(_Weakrefable):
     Create a new Arrow Schema object:
 
     >>> import pyarrow as pa
-    >>> pa.schema([("some_int", pa.int32()), ("some_string", pa.string())])
+    >>> pa.schema([
+    ...     ('some_int', pa.int32()),
+    ...     ('some_string', pa.string())
+    ... ])
     some_int: int32
     some_string: string
 
     Create Arrow Schema with metadata:
 
-    >>> pa.schema(
-    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-    ...     metadata={"n_legs": "Number of legs per animal"},
-    ... )
+    >>> pa.schema([
+    ...     pa.field('n_legs', pa.int64()),
+    ...     pa.field('animals', pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"})
     n_legs: int64
     animals: string
     -- schema metadata --
     n_legs: 'Number of legs per animal'
     """
 
-    def __len__(self) -> int: ...
-    def __getitem__(self, key: str) -> Field: ...
+    def __len__(self) -> int:
+        """
+        Return len(self).
+        """
+    def __getitem__(self, key: str) -> Field:
+        """
+        Return self[key].
+        """
     _field = __getitem__  # pyright: ignore[reportUnknownVariableType]
-    def __iter__(self) -> Iterator[Field]: ...
-    def __hash__(self) -> int: ...
+    def __iter__(self) -> Iterator[Field]:
+        """
+        Implement iter(self).
+        """
+    def __hash__(self) -> int:
+        """
+        Return hash(self).
+        """
     def __sizeof__(self) -> int: ...
     @property
     def pandas_metadata(self) -> dict:
@@ -1896,12 +1990,8 @@ class Schema(_Weakrefable):
         --------
         >>> import pyarrow as pa
         >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
+        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
         >>> schema = pa.Table.from_pandas(df).schema
 
         Select pandas metadata field from Arrow Schema:
@@ -1921,7 +2011,9 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Get the names of the schema's fields:
 
@@ -1940,7 +2032,9 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Get the types of the schema's fields:
 
@@ -1959,17 +2053,19 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"})
 
         Get the metadata of the schema's fields:
 
         >>> schema.metadata
         {b'n_legs': b'Number of legs per animal'}
         """
-    def empty_table(self) -> Table:
+    # TODO: replace below with:
+    # def empty_table(self) -> Table:
+    def empty_table(self) -> Any:
         """
         Provide an empty table according to the schema.
 
@@ -1980,7 +2076,9 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Create an empty table with schema's fields:
 
@@ -2009,11 +2107,14 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema1 = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> schema2 = pa.schema([("some_int", pa.int32()), ("some_string", pa.string())])
+        >>> schema1 = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"})
+        >>> schema2 = pa.schema([
+        ...     ('some_int', pa.int32()),
+        ...     ('some_string', pa.string())
+        ... ])
 
         Test two equal schemas:
 
@@ -2048,7 +2149,10 @@ class Schema(_Weakrefable):
         --------
         >>> import pandas as pd
         >>> import pyarrow as pa
-        >>> df = pd.DataFrame({"int": [1, 2], "str": ["a", "b"]})
+        >>> df = pd.DataFrame({
+        ...     'int': [1, 2],
+        ...     'str': ['a', 'b']
+        ... })
 
         Create an Arrow Schema from the schema of a pandas dataframe:
 
@@ -2073,7 +2177,9 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Select the second field:
 
@@ -2082,7 +2188,7 @@ class Schema(_Weakrefable):
 
         Select the field of the column named 'n_legs':
 
-        >>> schema.field("n_legs")
+        >>> schema.field('n_legs')
         pyarrow.Field<n_legs: int64>
         """
     @deprecated("Use 'field' instead")
@@ -2117,7 +2223,9 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Get the index of the field named 'animals':
 
@@ -2126,14 +2234,11 @@ class Schema(_Weakrefable):
 
         Index in case of several fields with the given name:
 
-        >>> schema = pa.schema(
-        ...     [
-        ...         pa.field("n_legs", pa.int64()),
-        ...         pa.field("animals", pa.string()),
-        ...         pa.field("animals", pa.bool_()),
-        ...     ],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string()),
+        ...     pa.field('animals', pa.bool_())],
+        ...     metadata={"n_legs": "Number of legs per animal"})
         >>> schema.get_field_index("animals")
         -1
         """
@@ -2153,13 +2258,10 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema(
-        ...     [
-        ...         pa.field("n_legs", pa.int64()),
-        ...         pa.field("animals", pa.string()),
-        ...         pa.field("animals", pa.bool_()),
-        ...     ]
-        ... )
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string()),
+        ...     pa.field('animals', pa.bool_())])
 
         Get the indexes of the fields named 'animals':
 
@@ -2185,11 +2287,13 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Append a field 'extra' at the end of the schema:
 
-        >>> schema_new = schema.append(pa.field("extra", pa.bool_()))
+        >>> schema_new = schema.append(pa.field('extra', pa.bool_()))
         >>> schema_new
         n_legs: int64
         animals: string
@@ -2217,11 +2321,13 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Insert a new field on the second position:
 
-        >>> schema.insert(1, pa.field("extra", pa.bool_()))
+        >>> schema.insert(1, pa.field('extra', pa.bool_()))
         n_legs: int64
         extra: bool
         animals: string
@@ -2241,7 +2347,9 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Remove the second field of the schema:
 
@@ -2264,11 +2372,13 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Replace the second field of the schema with a new field 'extra':
 
-        >>> schema.set(1, pa.field("replaced", pa.bool_()))
+        >>> schema.set(1, pa.field('replaced', pa.bool_()))
         n_legs: int64
         replaced: bool
         """
@@ -2298,7 +2408,9 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Add metadata to existing schema field:
 
@@ -2324,7 +2436,9 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema([pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())])
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())])
 
         Write schema to Buffer:
 
@@ -2342,10 +2456,10 @@ class Schema(_Weakrefable):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
+        >>> schema = pa.schema([
+        ...     pa.field('n_legs', pa.int64()),
+        ...     pa.field('animals', pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"})
         >>> schema
         n_legs: int64
         animals: string
@@ -2376,6 +2490,8 @@ class Schema(_Weakrefable):
             Display Field-level KeyValueMetadata
         show_schema_metadata : boolean, default True
             Display Schema-level KeyValueMetadata
+        element_size_limit : int, default 100
+            Maximum number of characters of a single element before it is truncated.
 
         Returns
         -------
@@ -2452,13 +2568,9 @@ def unify_schemas(
         If Fields of the same name are not mergeable.
     """
 
-@overload
-def field(name: SupportArrowSchema) -> Field[Any]: ...
-@overload
 def field(
-    name: str, type: _DataTypeT, nullable: bool = ..., metadata: dict[Any, Any] | None = None
-) -> Field[_DataTypeT]: ...
-def field(*args, **kwargs):
+    name: SupportArrowSchema | str, type: _DataTypeT, nullable: bool = ..., metadata: dict[Any, Any] | None = None
+) -> Field[_DataTypeT] | Field[Any]:
     """
     Create a pyarrow.Field instance.
 
@@ -2485,12 +2597,13 @@ def field(*args, **kwargs):
     Create an instance of pyarrow.Field:
 
     >>> import pyarrow as pa
-    >>> pa.field("key", pa.int32())
+    >>> pa.field('key', pa.int32())
     pyarrow.Field<key: int32>
-    >>> pa.field("key", pa.int32(), nullable=False)
+    >>> pa.field('key', pa.int32(), nullable=False)
     pyarrow.Field<key: int32 not null>
 
-    >>> field = pa.field("key", pa.int32(), metadata={"key": "Something important"})
+    >>> field = pa.field('key', pa.int32(),
+    ...                  metadata={"key": "Something important"})
     >>> field
     pyarrow.Field<key: int32>
     >>> field.metadata
@@ -2503,7 +2616,7 @@ def field(*args, **kwargs):
 
     A str can also be passed for the type parameter:
 
-    >>> pa.field("key", "int32")
+    >>> pa.field('key', 'int32')
     pyarrow.Field<key: int32>
     """
 
@@ -2523,7 +2636,7 @@ def null() -> NullType:
 
     Create a ``Field`` type with a null type and a name:
 
-    >>> pa.field("null_field", pa.null())
+    >>> pa.field('null_field', pa.null())
     pyarrow.Field<null_field: null>
     """
 
@@ -2544,7 +2657,7 @@ def bool_() -> BoolType:
     Create a ``Field`` type with a boolean type
     and a name:
 
-    >>> pa.field("bool_field", pa.bool_())
+    >>> pa.field('bool_field', pa.bool_())
     pyarrow.Field<bool_field: bool>
     """
 
@@ -2748,50 +2861,7 @@ def uint64() -> UInt64Type:
     ]
     """
 
-def tzinfo_to_string(tz: dt.tzinfo) -> str:
-    """
-    Converts a time zone object into a string indicating the name of a time
-    zone, one of:
-    * As used in the Olson time zone database (the "tz database" or
-      "tzdata"), such as "America/New_York"
-    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
-
-    Parameters
-    ----------
-      tz : datetime.tzinfo
-        Time zone object
-
-    Returns
-    -------
-      name : str
-        Time zone name
-    """
-
-def string_to_tzinfo(name: str) -> dt.tzinfo:
-    """
-    Convert a time zone name into a time zone object.
-
-    Supported input strings are:
-    * As used in the Olson time zone database (the "tz database" or
-      "tzdata"), such as "America/New_York"
-    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
-
-    Parameters
-    ----------
-      name: str
-        Time zone name.
-
-    Returns
-    -------
-      tz : datetime.tzinfo
-        Time zone object
-    """
-
-@overload
-def timestamp(unit: _Unit | str) -> TimestampType[_Unit, _Tz]: ...
-@overload
-def timestamp(unit: _Unit | str, tz: _Tz) -> TimestampType[_Unit, _Tz]: ...
-def timestamp(*args, **kwargs):
+def timestamp(unit: _Unit, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]:
     """
     Create instance of timestamp type with resolution and optional time zone.
 
@@ -2808,19 +2878,19 @@ def timestamp(*args, **kwargs):
     Create an instance of timestamp type:
 
     >>> import pyarrow as pa
-    >>> pa.timestamp("us")
+    >>> pa.timestamp('us')
     TimestampType(timestamp[us])
-    >>> pa.timestamp("s", tz="America/New_York")
+    >>> pa.timestamp('s', tz='America/New_York')
     TimestampType(timestamp[s, tz=America/New_York])
-    >>> pa.timestamp("s", tz="+07:30")
+    >>> pa.timestamp('s', tz='+07:30')
     TimestampType(timestamp[s, tz=+07:30])
 
     Use timestamp type when creating a scalar object:
 
     >>> from datetime import datetime
-    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp("s", tz="UTC"))
+    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp('s', tz='UTC'))
     <pyarrow.TimestampScalar: '2012-01-01T00:00:00+0000'>
-    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp("us"))
+    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp('us'))
     <pyarrow.TimestampScalar: '2012-01-01T00:00:00.000000'>
 
     Returns
@@ -2844,9 +2914,9 @@ def time32(unit: _Time32Unit) -> Time32Type[_Time32Unit]:
     Examples
     --------
     >>> import pyarrow as pa
-    >>> pa.time32("s")
+    >>> pa.time32('s')
     Time32Type(time32[s])
-    >>> pa.time32("ms")
+    >>> pa.time32('ms')
     Time32Type(time32[ms])
     """
 
@@ -2866,9 +2936,9 @@ def time64(unit: _Time64Unit) -> Time64Type[_Time64Unit]:
     Examples
     --------
     >>> import pyarrow as pa
-    >>> pa.time64("us")
+    >>> pa.time64('us')
     Time64Type(time64[us])
-    >>> pa.time64("ns")
+    >>> pa.time64('ns')
     Time64Type(time64[ns])
     """
 
@@ -2891,14 +2961,14 @@ def duration(unit: _Unit) -> DurationType[_Unit]:
     Create an instance of duration type:
 
     >>> import pyarrow as pa
-    >>> pa.duration("us")
+    >>> pa.duration('us')
     DurationType(duration[us])
-    >>> pa.duration("s")
+    >>> pa.duration('s')
     DurationType(duration[s])
 
     Create an array with duration type:
 
-    >>> pa.array([0, 1, 2], type=pa.duration("s"))
+    >>> pa.array([0, 1, 2], type=pa.duration('s'))
     <pyarrow.lib.DurationArray object at ...>
     [
       0,
@@ -2985,15 +3055,15 @@ def float16() -> Float16Type:
     >>> a
     <pyarrow.lib.HalfFloatArray object at ...>
     [
-      15872,
-      32256
+      1.5,
+      nan
     ]
 
     Note that unlike other float types, if you convert this array
     to a python list, the types of its elements will be ``np.float16``
 
     >>> [type(val) for val in a.to_pylist()]
-    [<class 'numpy.float16'>, <class 'numpy.float16'>]
+    [<class 'float'>, <class 'float'>]
     """
 
 def float32() -> Float32Type:
@@ -3046,11 +3116,7 @@ def float64() -> Float64Type:
     ]
     """
 
-@overload
-def decimal32(precision: _Precision) -> Decimal32Type[_Precision, Literal[0]]: ...
-@overload
-def decimal32(precision: _Precision, scale: _Scale) -> Decimal32Type[_Precision, _Scale]: ...
-def decimal32(*args, **kwargs):
+def decimal32(precision: _Precision, scale: _Scale | None = None) -> Decimal32Type[_Precision, _Scale| Literal[0]]:
     """
     Create decimal type with precision and scale and 32-bit width.
 
@@ -3091,7 +3157,7 @@ def decimal32(*args, **kwargs):
     Create an array with decimal type:
 
     >>> import decimal
-    >>> a = decimal.Decimal("123.45")
+    >>> a = decimal.Decimal('123.45')
     >>> pa.array([a], pa.decimal32(5, 2))
     <pyarrow.lib.Decimal32Array object at ...>
     [
@@ -3099,11 +3165,7 @@ def decimal32(*args, **kwargs):
     ]
     """
 
-@overload
-def decimal64(precision: _Precision) -> Decimal64Type[_Precision, Literal[0]]: ...
-@overload
-def decimal64(precision: _Precision, scale: _Scale) -> Decimal64Type[_Precision, _Scale]: ...
-def decimal64(*args, **kwargs):
+def decimal64(precision: _Precision, scale: _Scale | None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]:
     """
     Create decimal type with precision and scale and 64-bit width.
 
@@ -3144,7 +3206,7 @@ def decimal64(*args, **kwargs):
     Create an array with decimal type:
 
     >>> import decimal
-    >>> a = decimal.Decimal("123.45")
+    >>> a = decimal.Decimal('123.45')
     >>> pa.array([a], pa.decimal64(5, 2))
     <pyarrow.lib.Decimal64Array object at ...>
     [
@@ -3152,11 +3214,7 @@ def decimal64(*args, **kwargs):
     ]
     """
 
-@overload
-def decimal128(precision: _Precision) -> Decimal128Type[_Precision, Literal[0]]: ...
-@overload
-def decimal128(precision: _Precision, scale: _Scale) -> Decimal128Type[_Precision, _Scale]: ...
-def decimal128(*args, **kwargs):
+def decimal128(precision: _Precision, scale: _Scale | None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]:
     """
     Create decimal type with precision and scale and 128-bit width.
 
@@ -3197,7 +3255,7 @@ def decimal128(*args, **kwargs):
     Create an array with decimal type:
 
     >>> import decimal
-    >>> a = decimal.Decimal("123.45")
+    >>> a = decimal.Decimal('123.45')
     >>> pa.array([a], pa.decimal128(5, 2))
     <pyarrow.lib.Decimal128Array object at ...>
     [
@@ -3205,11 +3263,7 @@ def decimal128(*args, **kwargs):
     ]
     """
 
-@overload
-def decimal256(precision: _Precision) -> Decimal256Type[_Precision, Literal[0]]: ...
-@overload
-def decimal256(precision: _Precision, scale: _Scale) -> Decimal256Type[_Precision, _Scale]: ...
-def decimal256(*args, **kwargs):
+def decimal256(precision: _Precision, scale: _Scale | None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]:
     """
     Create decimal type with precision and scale and 256-bit width.
 
@@ -3248,7 +3302,7 @@ def string() -> StringType:
 
     and use the string type to create an array:
 
-    >>> pa.array(["foo", "bar", "baz"], type=pa.string())
+    >>> pa.array(['foo', 'bar', 'baz'], type=pa.string())
     <pyarrow.lib.StringArray object at ...>
     [
       "foo",
@@ -3280,11 +3334,7 @@ and use the string type to create an array:
 ]
 """
 
-@overload
-def binary(length: Literal[-1] = ...) -> BinaryType: ...
-@overload
-def binary(length: int) -> FixedSizeBinaryType: ...
-def binary(length):
+def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType:
     """
     Create variable-length or fixed size binary type.
 
@@ -3305,7 +3355,7 @@ def binary(length):
 
     and use the variable-length binary type to create an array:
 
-    >>> pa.array(["foo", "bar", "baz"], type=pa.binary())
+    >>> pa.array(['foo', 'bar', 'baz'], type=pa.binary())
     <pyarrow.lib.BinaryArray object at ...>
     [
       666F6F,
@@ -3320,7 +3370,7 @@ def binary(length):
 
     and use the fixed-length binary type to create an array:
 
-    >>> pa.array(["foo", "bar", "baz"], type=pa.binary(3))
+    >>> pa.array(['foo', 'bar', 'baz'], type=pa.binary(3))
     <pyarrow.lib.FixedSizeBinaryArray object at ...>
     [
       666F6F,
@@ -3346,7 +3396,7 @@ def large_binary() -> LargeBinaryType:
 
     and use the type to create an array:
 
-    >>> pa.array(["foo", "bar", "baz"], type=pa.large_binary())
+    >>> pa.array(['foo', 'bar', 'baz'], type=pa.large_binary())
     <pyarrow.lib.LargeBinaryArray object at ...>
     [
       666F6F,
@@ -3372,7 +3422,7 @@ def large_string() -> LargeStringType:
 
     and use the type to create an array:
 
-    >>> pa.array(["foo", "bar"] * 50, type=pa.large_string())
+    >>> pa.array(['foo', 'bar'] * 50, type=pa.large_string())
     <pyarrow.lib.LargeStringArray object at ...>
     [
       "foo",
@@ -3434,15 +3484,9 @@ def string_view() -> StringViewType:
     DataType(string_view)
     """
 
-@overload
 def list_(
-    value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] = ...
-) -> ListType[_DataTypeT]: ...
-@overload
-def list_(
-    value_type: _DataTypeT | Field[_DataTypeT], list_size: _Size
-) -> FixedSizeListType[_DataTypeT, _Size]: ...
-def list_(*args, **kwargs):
+    value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] | _Size | None = None
+) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]:
     """
     Create ListType instance from child data type or field.
 
@@ -3469,7 +3513,7 @@ def list_(*args, **kwargs):
 
     Use the ListType to create a scalar:
 
-    >>> pa.scalar(["foo", None], type=pa.list_(pa.string(), 2))
+    >>> pa.scalar(['foo', None], type=pa.list_(pa.string(), 2))
     <pyarrow.FixedSizeListScalar: ['foo', None]>
 
     or an array:
@@ -3578,13 +3622,9 @@ def large_list_view(
     LargeListViewType(large_list_view<item: int8>)
     """
 
-@overload
-def map_(key_type: _K, item_type: _ValueT) -> MapType[_K, _ValueT, _Ordered]: ...
-@overload
 def map_(
-    key_type: _K, item_type: _ValueT, key_sorted: _Ordered
-) -> MapType[_K, _ValueT, _Ordered]: ...
-def map_(*args, **kwargs):
+    key_type: _K, item_type: _ValueT, key_sorted: _Ordered | None = None
+) -> MapType[_K, _ValueT, _Ordered]:
     """
     Create MapType instance from key and item data types or fields.
 
@@ -3610,7 +3650,7 @@ def map_(*args, **kwargs):
 
     Use MapType to create an array:
 
-    >>> data = [[{"key": "a", "value": 1}, {"key": "b", "value": 2}], [{"key": "c", "value": 3}]]
+    >>> data = [[{'key': 'a', 'value': 1}, {'key': 'b', 'value': 2}], [{'key': 'c', 'value': 3}]]
     >>> pa.array(data, type=pa.map_(pa.string(), pa.int32(), keys_sorted=True))
     <pyarrow.lib.MapArray object at ...>
     [
@@ -3635,15 +3675,9 @@ def map_(*args, **kwargs):
     ]
     """
 
-@overload
-def dictionary(
-    index_type: _IndexT, value_type: _BasicValueT
-) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
-@overload
 def dictionary(
-    index_type: _IndexT, value_type: _BasicValueT, ordered: _Ordered
-) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
-def dictionary(*args, **kwargs):
+    index_type: _IndexT, value_type: _BasicValueT, ordered: _Ordered | None = None
+) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]:
     """
     Dictionary (categorical, or simply encoded) type.
 
@@ -3707,8 +3741,8 @@ def struct(
 
     >>> import pyarrow as pa
     >>> fields = [
-    ...     ("f1", pa.int32()),
-    ...     ("f2", pa.string()),
+    ...     ('f1', pa.int32()),
+    ...     ('f2', pa.string()),
     ... ]
     >>> struct_type = pa.struct(fields)
     >>> struct_type
@@ -3718,14 +3752,14 @@ def struct(
 
     >>> struct_type[0]
     pyarrow.Field<f1: int32>
-    >>> struct_type["f1"]
+    >>> struct_type['f1']
     pyarrow.Field<f1: int32>
 
     Create an instance of StructType from an iterable of Fields:
 
     >>> fields = [
-    ...     pa.field("f1", pa.int32()),
-    ...     pa.field("f2", pa.string(), nullable=False),
+    ...     pa.field('f1', pa.int32()),
+    ...     pa.field('f2', pa.string(), nullable=False),
     ... ]
     >>> pa.struct(fields)
     StructType(struct<f1: int32, f2: string not null>)
@@ -3790,15 +3824,9 @@ def dense_union(
     type : DenseUnionType
     """
 
-@overload
 def union(
-    child_fields: list[Field[Any]], mode: Literal["sparse"], type_codes: list[int] | None = None
-) -> SparseUnionType: ...
-@overload
-def union(
-    child_fields: list[Field[Any]], mode: Literal["dense"], type_codes: list[int] | None = None
-) -> DenseUnionType: ...
-def union(*args, **kwargs):
+    child_fields: list[Field[Any]], mode: Literal["sparse"] | Literal["dense"], type_codes: list[int] | None = None
+) -> SparseUnionType | DenseUnionType:
     """
     Create UnionType from child fields.
 
@@ -3939,14 +3967,16 @@ def fixed_shape_tensor(
     Create an instance of fixed shape tensor extension type with names
     of tensor dimensions:
 
-    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3), dim_names=["C", "H", "W"])
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3),
+    ...                                     dim_names=['C', 'H', 'W'])
     >>> tensor_type.dim_names
     ['C', 'H', 'W']
 
     Create an instance of fixed shape tensor extension type with
     permutation:
 
-    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3), permutation=[0, 2, 1])
+    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3),
+    ...                                     permutation=[0, 2, 1])
     >>> tensor_type.permutation
     [0, 2, 1]
 
@@ -4036,77 +4066,7 @@ def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueTy
     type : OpaqueType
     """
 
-@overload
-def type_for_alias(name: Literal["null"]) -> NullType: ...
-@overload
-def type_for_alias(name: Literal["bool", "boolean"]) -> BoolType: ...
-@overload
-def type_for_alias(name: Literal["i1", "int8"]) -> Int8Type: ...
-@overload
-def type_for_alias(name: Literal["i2", "int16"]) -> Int16Type: ...
-@overload
-def type_for_alias(name: Literal["i4", "int32"]) -> Int32Type: ...
-@overload
-def type_for_alias(name: Literal["i8", "int64"]) -> Int64Type: ...
-@overload
-def type_for_alias(name: Literal["u1", "uint8"]) -> UInt8Type: ...
-@overload
-def type_for_alias(name: Literal["u2", "uint16"]) -> UInt16Type: ...
-@overload
-def type_for_alias(name: Literal["u4", "uint32"]) -> Uint32Type: ...
-@overload
-def type_for_alias(name: Literal["u8", "uint64"]) -> UInt64Type: ...
-@overload
-def type_for_alias(name: Literal["f2", "halffloat", "float16"]) -> Float16Type: ...
-@overload
-def type_for_alias(name: Literal["f4", "float", "float32"]) -> Float32Type: ...
-@overload
-def type_for_alias(name: Literal["f8", "double", "float64"]) -> Float64Type: ...
-@overload
-def type_for_alias(name: Literal["string", "str", "utf8"]) -> StringType: ...
-@overload
-def type_for_alias(name: Literal["binary"]) -> BinaryType: ...
-@overload
-def type_for_alias(
-    name: Literal["large_string", "large_str", "large_utf8"],
-) -> LargeStringType: ...
-@overload
-def type_for_alias(name: Literal["large_binary"]) -> LargeBinaryType: ...
-@overload
-def type_for_alias(name: Literal["binary_view"]) -> BinaryViewType: ...
-@overload
-def type_for_alias(name: Literal["string_view"]) -> StringViewType: ...
-@overload
-def type_for_alias(name: Literal["date32", "date32[day]"]) -> Date32Type: ...
-@overload
-def type_for_alias(name: Literal["date64", "date64[ms]"]) -> Date64Type: ...
-@overload
-def type_for_alias(name: Literal["time32[s]"]) -> Time32Type[Literal["s"]]: ...
-@overload
-def type_for_alias(name: Literal["time32[ms]"]) -> Time32Type[Literal["ms"]]: ...
-@overload
-def type_for_alias(name: Literal["time64[us]"]) -> Time64Type[Literal["us"]]: ...
-@overload
-def type_for_alias(name: Literal["time64[ns]"]) -> Time64Type[Literal["ns"]]: ...
-@overload
-def type_for_alias(name: Literal["timestamp[s]"]) -> TimestampType[Literal["s"], Any]: ...
-@overload
-def type_for_alias(name: Literal["timestamp[ms]"]) -> TimestampType[Literal["ms"], Any]: ...
-@overload
-def type_for_alias(name: Literal["timestamp[us]"]) -> TimestampType[Literal["us"], Any]: ...
-@overload
-def type_for_alias(name: Literal["timestamp[ns]"]) -> TimestampType[Literal["ns"], Any]: ...
-@overload
-def type_for_alias(name: Literal["duration[s]"]) -> DurationType[Literal["s"]]: ...
-@overload
-def type_for_alias(name: Literal["duration[ms]"]) -> DurationType[Literal["ms"]]: ...
-@overload
-def type_for_alias(name: Literal["duration[us]"]) -> DurationType[Literal["us"]]: ...
-@overload
-def type_for_alias(name: Literal["duration[ns]"]) -> DurationType[Literal["ns"]]: ...
-@overload
-def type_for_alias(name: Literal["month_day_nano_interval"]) -> MonthDayNanoIntervalType: ...
-def type_for_alias(name):
+def type_for_alias(name: Any) -> DataType:
     """
     Return DataType given a string alias if one exists.
 
@@ -4120,80 +4080,6 @@ def type_for_alias(name):
     type : DataType
     """
 
-@overload
-def ensure_type(ty: None, allow_none: Literal[True]) -> None: ...
-@overload
-def ensure_type(ty: _DataTypeT) -> _DataTypeT: ...
-@overload
-def ensure_type(ty: Literal["null"]) -> NullType: ...
-@overload
-def ensure_type(ty: Literal["bool", "boolean"]) -> BoolType: ...
-@overload
-def ensure_type(ty: Literal["i1", "int8"]) -> Int8Type: ...
-@overload
-def ensure_type(ty: Literal["i2", "int16"]) -> Int16Type: ...
-@overload
-def ensure_type(ty: Literal["i4", "int32"]) -> Int32Type: ...
-@overload
-def ensure_type(ty: Literal["i8", "int64"]) -> Int64Type: ...
-@overload
-def ensure_type(ty: Literal["u1", "uint8"]) -> UInt8Type: ...
-@overload
-def ensure_type(ty: Literal["u2", "uint16"]) -> UInt16Type: ...
-@overload
-def ensure_type(ty: Literal["u4", "uint32"]) -> Uint32Type: ...
-@overload
-def ensure_type(ty: Literal["u8", "uint64"]) -> UInt64Type: ...
-@overload
-def ensure_type(ty: Literal["f2", "halffloat", "float16"]) -> Float16Type: ...
-@overload
-def ensure_type(ty: Literal["f4", "float", "float32"]) -> Float32Type: ...
-@overload
-def ensure_type(ty: Literal["f8", "double", "float64"]) -> Float64Type: ...
-@overload
-def ensure_type(ty: Literal["string", "str", "utf8"]) -> StringType: ...
-@overload
-def ensure_type(ty: Literal["binary"]) -> BinaryType: ...
-@overload
-def ensure_type(
-    ty: Literal["large_string", "large_str", "large_utf8"],
-) -> LargeStringType: ...
-@overload
-def ensure_type(ty: Literal["large_binary"]) -> LargeBinaryType: ...
-@overload
-def ensure_type(ty: Literal["binary_view"]) -> BinaryViewType: ...
-@overload
-def ensure_type(ty: Literal["string_view"]) -> StringViewType: ...
-@overload
-def ensure_type(ty: Literal["date32", "date32[day]"]) -> Date32Type: ...
-@overload
-def ensure_type(ty: Literal["date64", "date64[ms]"]) -> Date64Type: ...
-@overload
-def ensure_type(ty: Literal["time32[s]"]) -> Time32Type[Literal["s"]]: ...
-@overload
-def ensure_type(ty: Literal["time32[ms]"]) -> Time32Type[Literal["ms"]]: ...
-@overload
-def ensure_type(ty: Literal["time64[us]"]) -> Time64Type[Literal["us"]]: ...
-@overload
-def ensure_type(ty: Literal["time64[ns]"]) -> Time64Type[Literal["ns"]]: ...
-@overload
-def ensure_type(ty: Literal["timestamp[s]"]) -> TimestampType[Literal["s"], Any]: ...
-@overload
-def ensure_type(ty: Literal["timestamp[ms]"]) -> TimestampType[Literal["ms"], Any]: ...
-@overload
-def ensure_type(ty: Literal["timestamp[us]"]) -> TimestampType[Literal["us"], Any]: ...
-@overload
-def ensure_type(ty: Literal["timestamp[ns]"]) -> TimestampType[Literal["ns"], Any]: ...
-@overload
-def ensure_type(ty: Literal["duration[s]"]) -> DurationType[Literal["s"]]: ...
-@overload
-def ensure_type(ty: Literal["duration[ms]"]) -> DurationType[Literal["ms"]]: ...
-@overload
-def ensure_type(ty: Literal["duration[us]"]) -> DurationType[Literal["us"]]: ...
-@overload
-def ensure_type(ty: Literal["duration[ns]"]) -> DurationType[Literal["ns"]]: ...
-@overload
-def ensure_type(ty: Literal["month_day_nano_interval"]) -> MonthDayNanoIntervalType: ...
 def schema(
     fields: Iterable[Field[Any]] | Iterable[tuple[str, DataType]] | Mapping[str, DataType],
     metadata: dict[bytes | str, bytes | str] | None = None,
@@ -4214,33 +4100,40 @@ def schema(
     Create a Schema from iterable of tuples:
 
     >>> import pyarrow as pa
-    >>> pa.schema(
-    ...     [
-    ...         ("some_int", pa.int32()),
-    ...         ("some_string", pa.string()),
-    ...         pa.field("some_required_string", pa.string(), nullable=False),
-    ...     ]
-    ... )
+    >>> pa.schema([
+    ...     ('some_int', pa.int32()),
+    ...     ('some_string', pa.string()),
+    ...     pa.field('some_required_string', pa.string(), nullable=False)
+    ... ])
     some_int: int32
     some_string: string
     some_required_string: string not null
 
     Create a Schema from iterable of Fields:
 
-    >>> pa.schema([pa.field("some_int", pa.int32()), pa.field("some_string", pa.string())])
+    >>> pa.schema([
+    ...     pa.field('some_int', pa.int32()),
+    ...     pa.field('some_string', pa.string())
+    ... ])
     some_int: int32
     some_string: string
 
     DataTypes can also be passed as strings. The following is equivalent to the
     above example:
 
-    >>> pa.schema([pa.field("some_int", "int32"), pa.field("some_string", "string")])
+    >>> pa.schema([
+    ...     pa.field('some_int', "int32"),
+    ...     pa.field('some_string', "string")
+    ... ])
     some_int: int32
     some_string: string
 
     Or more concisely:
 
-    >>> pa.schema([("some_int", "int32"), ("some_string", "string")])
+    >>> pa.schema([
+    ...     ('some_int', "int32"),
+    ...     ('some_string', "string")
+    ... ])
     some_int: int32
     some_string: string
 
@@ -4264,9 +4157,9 @@ def from_numpy_dtype(dtype: np.dtype[Any]) -> DataType:
 
     >>> import pyarrow as pa
     >>> import numpy as np
-    >>> pa.from_numpy_dtype(np.dtype("float16"))
+    >>> pa.from_numpy_dtype(np.dtype('float16'))
     DataType(halffloat)
-    >>> pa.from_numpy_dtype("U")
+    >>> pa.from_numpy_dtype('U')
     DataType(string)
     >>> pa.from_numpy_dtype(bool)
     DataType(bool)
@@ -4274,42 +4167,10 @@ def from_numpy_dtype(dtype: np.dtype[Any]) -> DataType:
     DataType(string)
     """
 
-def is_boolean_value(obj: Any) -> bool:
-    """
-    Check if the object is a boolean.
-
-    Parameters
-    ----------
-    obj : object
-        The object to check
-    """
-
-def is_integer_value(obj: Any) -> bool:
-    """
-    Check if the object is an integer.
-
-    Parameters
-    ----------
-    obj : object
-        The object to check
-    """
-
-def is_float_value(obj: Any) -> bool:
-    """
-    Check if the object is a float.
-
-    Parameters
-    ----------
-    obj : object
-        The object to check
-    """
-
 __all__ = [
     "_Weakrefable",
     "_Metadata",
-    "_AsPyType",
     "DataType",
-    "_DataTypeT",
     "_BasicDataType",
     "NullType",
     "BoolType",
@@ -4362,12 +4223,10 @@ __all__ = [
     "UuidType",
     "JsonType",
     "OpaqueType",
-    "PyExtensionType",
     "UnknownExtensionType",
     "register_extension_type",
     "unregister_extension_type",
     "KeyValueMetadata",
-    "ensure_metadata",
     "Field",
     "Schema",
     "unify_schemas",
@@ -4382,8 +4241,6 @@ __all__ = [
     "int32",
     "int64",
     "uint64",
-    "tzinfo_to_string",
-    "string_to_tzinfo",
     "timestamp",
     "time32",
     "time64",
@@ -4423,10 +4280,6 @@ __all__ = [
     "bool8",
     "opaque",
     "type_for_alias",
-    "ensure_type",
     "schema",
     "from_numpy_dtype",
-    "is_boolean_value",
-    "is_integer_value",
-    "is_float_value",
 ]
diff --git a/python/pyarrow/_stubs_typing.pyi b/python/pyarrow/_stubs_typing.pyi
index 549dc4059c3..98479791103 100644
--- a/python/pyarrow/_stubs_typing.pyi
+++ b/python/pyarrow/_stubs_typing.pyi
@@ -19,13 +19,13 @@ import datetime as dt
 
 from collections.abc import Sequence
 from decimal import Decimal
-from typing import Any, Collection, Literal, Protocol, TypeAlias, TypeVar
+from typing import Any, Collection, Literal, Protocol, TypeAlias, TypeVar, Iterator
 
 import numpy as np
 
 from numpy.typing import NDArray
 
-from .compute import BooleanArray, IntegerArray
+from pyarrow.lib import BooleanArray, IntegerArray
 
 ArrayLike: TypeAlias = Any
 ScalarLike: TypeAlias = Any
@@ -52,6 +52,8 @@ PyScalar: TypeAlias = (
 )
 
 _T = TypeVar("_T")
+_V = TypeVar("_V", covariant=True)
+
 SingleOrList: TypeAlias = list[_T] | _T
 
 class SupportEq(Protocol):
@@ -78,11 +80,9 @@ FilterTuple: TypeAlias = (
     | tuple[str, Literal["in", "not in"], Collection]
 )
 
-class Buffer(Protocol):
-    def __buffer__(self, flags: int, /) -> memoryview: ...
+class Buffer(Protocol): ...
 
-class SupportPyBuffer(Protocol):
-    def __buffer__(self, flags: int, /) -> memoryview: ...
+class SupportPyBuffer(Protocol): ...
 
 class SupportArrowStream(Protocol):
     def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
@@ -95,3 +95,8 @@ class SupportArrowDeviceArray(Protocol):
 
 class SupportArrowSchema(Protocol):
     def __arrow_c_schema(self) -> Any: ...
+
+class NullableCollection(Protocol[_V]):  # pyright: ignore[reportInvalidTypeVarUse]
+    def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ...
+    def __len__(self) -> int: ...
+    def __contains__(self, item: Any, /) -> bool: ...
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index bf5beab589d..109d8ebe597 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -3634,7 +3634,7 @@ cdef class FixedSizeListArray(BaseListArray):
         Or create from a values array, list size and matching type:
 
         >>> typ = pa.list_(pa.field("values", pa.int64()), 2)
-        >>> arr = pa.FixedSizeListArray.from_arrays(values,type=typ)
+        >>> arr = pa.FixedSizeListArray.from_arrays(values, type=typ)
         >>> arr
         <pyarrow.lib.FixedSizeListArray object at ...>
         [
diff --git a/python/pyarrow/lib.pyi b/python/pyarrow/lib.pyi
index 3292c52b2c0..9d5bd7bedb2 100644
--- a/python/pyarrow/lib.pyi
+++ b/python/pyarrow/lib.pyi
@@ -16,24 +16,30 @@
 # under the License.
 
 # ruff: noqa: F403
-from typing import NamedTuple
+from collections.abc import Mapping
+import datetime as dt
+from typing import NamedTuple, Literal
+from typing_extensions import TypeVar
 
 from .__lib_pxi.array import *
-from .__lib_pxi.benchmark import *
-from .__lib_pxi.builder import *
-from .__lib_pxi.compat import *
-from .__lib_pxi.config import *
-from .__lib_pxi.device import *
-from .__lib_pxi.error import *
+# TODO
+# from .__lib_pxi.benchmark import *
+# from .__lib_pxi.builder import *
+# from .__lib_pxi.compat import *
+# from .__lib_pxi.config import *
+# from .__lib_pxi.device import *
+# from .__lib_pxi.error import *
 from .__lib_pxi.io import *
-from .__lib_pxi.ipc import *
+# from .__lib_pxi.ipc import *
 from .__lib_pxi.memory import *
-from .__lib_pxi.pandas_shim import *
+# from .__lib_pxi.pandas_shim import *
 from .__lib_pxi.scalar import *
-from .__lib_pxi.table import *
+# from .__lib_pxi.table import *
 from .__lib_pxi.tensor import *
 from .__lib_pxi.types import *
 
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
+
 class MonthDayNano(NamedTuple):
     days: int
     months: int
@@ -79,6 +85,51 @@ def is_threading_enabled() -> bool:
     threading doesn't work (e.g. Emscripten).
     """
 
+def ensure_metadata(
+    meta: Mapping[bytes | str, bytes | str] | KeyValueMetadata | None, allow_none: bool = False
+) -> KeyValueMetadata | None: ...
+
+def tzinfo_to_string(tz: dt.tzinfo) -> str:
+    """
+    Converts a time zone object into a string indicating the name of a time
+    zone, one of:
+    * As used in the Olson time zone database (the "tz database" or
+      "tzdata"), such as "America/New_York"
+    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+
+    Parameters
+    ----------
+      tz : datetime.tzinfo
+        Time zone object
+
+    Returns
+    -------
+      name : str
+        Time zone name
+    """
+
+def string_to_tzinfo(name: str) -> dt.tzinfo:
+    """
+    Convert a time zone name into a time zone object.
+
+    Supported input strings are:
+    * As used in the Olson time zone database (the "tz database" or
+      "tzdata"), such as "America/New_York"
+    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+
+    Parameters
+    ----------
+      name: str
+        Time zone name.
+
+    Returns
+    -------
+      tz : datetime.tzinfo
+        Time zone object
+    """
+
+def ensure_type(ty: _DataTypeT | None, allow_none: Literal[True] | Literal[False] | None = None) -> _DataTypeT | None: ...
+
 Type_NA: int
 Type_BOOL: int
 Type_UINT8: int
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 5934a7aa8cf..d26933e3f39 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -1036,7 +1036,7 @@ cdef class StructScalar(Scalar, Mapping):
 
         Parameters
         ----------
-        index : Union[int, str]
+        key : Union[int, str]
             Index / position or name of the field.
 
         Returns
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 450cce74f1d..6d7ec6f724f 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -18,21 +18,21 @@
 import datetime
 import sys
 
-import pytest
-import hypothesis as h
-import hypothesis.strategies as st
+import pytest  # type: ignore[import-not-found]
+import hypothesis as h  # type: ignore[import-not-found]
+import hypothesis.strategies as st  # type: ignore[import-not-found]
 try:
-    import hypothesis.extra.numpy as npst
+    import hypothesis.extra.numpy as npst  # type: ignore[import-not-found]
 except ImportError:
-    npst = None
+    npst = None  # type: ignore[assignment]
 try:
-    import hypothesis.extra.pytz as tzst
+    import hypothesis.extra.pytz as tzst  # type: ignore[import-not-found]
 except ImportError:
-    tzst = None
+    tzst = None  # type: ignore[assignment]
 try:
     import zoneinfo
 except ImportError:
-    zoneinfo = None
+    zoneinfo = None  # type: ignore[assignment]
 if sys.platform == 'win32':
     try:
         import tzdata  # noqa:F401
@@ -41,7 +41,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 
@@ -234,13 +234,13 @@ def schemas(type_strategy=primitive_types, max_fields=None):
 
 all_types = st.deferred(
     lambda: (
-        primitive_types |
-        list_types() |
-        struct_types() |
-        dictionary_types() |
-        map_types() |
-        list_types(all_types) |
-        struct_types(all_types)
+        primitive_types
+        | list_types()
+        | struct_types()
+        | dictionary_types()
+        | map_types()
+        | list_types(all_types)  # type: ignore[has-type]
+        | struct_types(all_types)  # type: ignore[has-type]
     )
 )
 all_fields = fields(all_types)
@@ -467,7 +467,9 @@ def pandas_compatible_list_types(
         dictionary_types(
             value_strategy=pandas_compatible_dictionary_value_types
         ),
-        pandas_compatible_list_types(pandas_compatible_types),
-        struct_types(pandas_compatible_types)
+        pandas_compatible_list_types(
+            pandas_compatible_types  # type: ignore[has-type]
+        ),
+        struct_types(pandas_compatible_types)  # type: ignore[has-type]
     )
 )
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 009ab1e849b..5686420c688 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -18,10 +18,10 @@
 from collections.abc import Iterable
 import datetime
 import decimal
-import hypothesis as h
-import hypothesis.strategies as st
+import hypothesis as h  # type: ignore[import-not-found]
+import hypothesis.strategies as st  # type: ignore[import-not-found]
 import itertools
-import pytest
+import pytest  # type: ignore[import-not-found]
 import struct
 import subprocess
 import sys
@@ -30,7 +30,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 7820111b70f..5441dd493d3 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -28,17 +28,31 @@
 import sys
 import textwrap
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
+try:
+    import pandas as pd
+except ImportError:
+    pd = None
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow.lib import ArrowNotImplementedError, ArrowTypeError
+from pyarrow.lib import ArrowNotImplementedError
+
+try:
+    import pyarrow.substrait as pas
+except ImportError:
+    pas = None
 
 exported_functions = [
-    func for (_, func) in sorted(pc.__dict__.items())
+    func for (name, func) in sorted(pc.__dict__.items())
     if hasattr(func, '__arrow_compute_function__')]
 
 exported_option_classes = [
-    cls for (_, cls) in sorted(pc.__dict__.items())
+    cls for (name, cls) in sorted(pc.__dict__.items())
     if (isinstance(cls, type) and
         cls is not pc.FunctionOptions and
         issubclass(cls, pc.FunctionOptions))]
@@ -203,7 +217,7 @@ def test_option_class_equality(request):
             and cls != pc.AssumeTimezoneOptions
         ):
             try:
-                options.append(cls())  # type: ignore[reportArgumentType]
+                options.append(cls())
             except TypeError:
                 pytest.fail(f"Options class is not tested: {cls}")
 
@@ -262,8 +276,6 @@ def test_get_function_hash_aggregate():
 
 @pytest.mark.numpy
 def test_call_function_with_memory_pool():
-    import numpy as np
-
     arr = pa.array(["foo", "bar", "baz"])
     indices = np.array([2, 2, 1])
     result1 = arr.take(indices)
@@ -787,11 +799,11 @@ def test_min_max():
     # Wrong options type
     options = pc.TakeOptions()
     with pytest.raises(TypeError):
-        s = pc.min_max(data, options=options)  # type: ignore[reportCallIssue]
+        s = pc.min_max(data, options=options)
 
     # Missing argument
     with pytest.raises(TypeError, match="min_max takes 1 positional"):
-        s = pc.min_max()  # type: ignore[reportCallIssue]
+        s = pc.min_max()
 
 
 def test_any():
@@ -842,11 +854,11 @@ def test_all():
 
 def test_is_valid():
     # An example generated function wrapper without options
-    data = pa.array([4, 5, None])
+    data = [4, 5, None]
     assert pc.is_valid(data).to_pylist() == [True, True, False]
 
     with pytest.raises(TypeError):
-        pc.is_valid(data, options=None)  # type: ignore[no-matching-overload]
+        pc.is_valid(data, options=None)
 
 
 def test_generated_docstrings():
@@ -1025,6 +1037,21 @@ def find_new_unicode_codepoints():
     0x2097, 0x2098, 0x2099, 0x209a, 0x209b, 0x209c,
     0x2c7c, 0x2c7d, 0xa69c, 0xa69d, 0xa770, 0xa7f8,
     0xa7f9, 0xab5c, 0xab5d, 0xab5e, 0xab5f, }
+# utf8proc does not store if a codepoint is numeric
+numeric_info_missing = {
+    0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
+    0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
+    0x4ebf, 0x4ec0, 0x4edf, 0x4ee8, 0x4f0d, 0x4f70,
+    0x5104, 0x5146, 0x5169, 0x516b, 0x516d, 0x5341,
+    0x5343, 0x5344, 0x5345, 0x534c, 0x53c1, 0x53c2,
+    0x53c3, 0x53c4, 0x56db, 0x58f1, 0x58f9, 0x5e7a,
+    0x5efe, 0x5eff, 0x5f0c, 0x5f0d, 0x5f0e, 0x5f10,
+    0x62fe, 0x634c, 0x67d2, 0x6f06, 0x7396, 0x767e,
+    0x8086, 0x842c, 0x8cae, 0x8cb3, 0x8d30, 0x9621,
+    0x9646, 0x964c, 0x9678, 0x96f6, 0xf96b, 0xf973,
+    0xf978, 0xf9b2, 0xf9d1, 0xf9d3, 0xf9fd, 0x10fc5,
+    0x10fc6, 0x10fc7, 0x10fc8, 0x10fc9, 0x10fca,
+    0x10fcb, }
 # utf8proc has no no digit/numeric information
 digit_info_missing = {
     0xb2, 0xb3, 0xb9, 0x1369, 0x136a, 0x136b, 0x136c,
@@ -1043,7 +1070,6 @@ def find_new_unicode_codepoints():
     0x278f, 0x2790, 0x2791, 0x2792, 0x10a40, 0x10a41,
     0x10a42, 0x10a43, 0x10e60, 0x10e61, 0x10e62, 0x10e63,
     0x10e64, 0x10e65, 0x10e66, 0x10e67, 0x10e68, }
-# utf8proc does not store if a codepoint is numeric
 numeric_info_missing = {
     0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
     0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
@@ -1078,8 +1104,7 @@ def test_string_py_compat_boolean(function_name, variant):
     py_name = function_name.replace('_', '')
     ignore = codepoints_ignore.get(function_name, set()) | \
         find_new_unicode_codepoints()
-    for i in range(128 if ascii  # type: ignore[reportUnnecessaryComparison]
-                   else 0x11000):
+    for i in range(128 if ascii else 0x11000):
         if i in range(0xD800, 0xE000):
             continue  # bug? pyarrow doesn't allow utf16 surrogates
         # the issues we know of, we skip
@@ -1145,8 +1170,6 @@ def test_utf8_zfill():
 
 @pytest.mark.pandas
 def test_replace_slice():
-    import numpy as np
-
     offsets = range(-3, 4)
 
     arr = pa.array([None, '', 'a', 'ab', 'abc', 'abcd', 'abcde'])
@@ -1223,7 +1246,6 @@ def test_binary_join():
     expected = pa.array(['a1b', 'c2d'], type=pa.binary())
     ar_list = pa.array([['a', 'b'], ['c', 'd']], type=pa.list_(pa.binary()))
     assert pc.binary_join(ar_list, separator_array).equals(expected)
-    assert expected.equals(pc.binary_join(ar_list, separator_array))
 
 
 def test_binary_join_element_wise():
@@ -1287,8 +1309,7 @@ def test_take_indices_types():
 
     for indices_type in ['uint8', 'int8', 'uint16', 'int16',
                          'uint32', 'int32', 'uint64', 'int64']:
-        indices = pa.array(
-            [0, 4, 2, None], type=indices_type)  # type: ignore[reportArgumentType]
+        indices = pa.array([0, 4, 2, None], type=indices_type)
         result = arr.take(indices)
         result.validate()
         expected = pa.array([0, 4, 2, None])
@@ -1297,7 +1318,7 @@ def test_take_indices_types():
     for indices_type in [pa.float32(), pa.float64()]:
         indices = pa.array([0, 4, 2], type=indices_type)
         with pytest.raises(NotImplementedError):
-            arr.take(indices)  # type: ignore[reportArgumentType]
+            arr.take(indices)
 
 
 def test_take_on_chunked_array():
@@ -1465,8 +1486,6 @@ def test_filter(ty, values):
 @pytest.mark.numpy
 @pytest.mark.parametrize(('ty', 'values'), all_array_types)
 def test_filter_numpy_array_mask(ty, values):
-    import numpy as np
-
     arr = pa.array(values, type=ty)
     # same test as test_filter with different array type
     mask = np.array([True, False, False, True, None])
@@ -1543,7 +1562,7 @@ def test_filter_errors():
         # non-boolean dtype
         mask = pa.array([0, 1, 0, 1, 0])
         with pytest.raises(NotImplementedError):
-            obj.filter(mask)  # type: ignore[reportArgumentType]
+            obj.filter(mask)
 
         # wrong length
         mask = pa.array([True, False, True])
@@ -1554,7 +1573,7 @@ def test_filter_errors():
     scalar = pa.scalar(True)
     for filt in [batch, table, scalar]:
         with pytest.raises(TypeError):
-            table.filter(filt)  # type: ignore[reportArgumentType]
+            table.filter(filt)
 
 
 def test_filter_null_type():
@@ -1573,10 +1592,11 @@ def test_filter_null_type():
 
 @pytest.mark.parametrize("typ", ["array", "chunked_array"])
 def test_compare_array(typ):
-    def con(values):
-        if typ == "array":
+    if typ == "array":
+        def con(values):
             return pa.array(values)
-        else:
+    else:
+        def con(values):
             return pa.chunked_array([values])
 
     arr1 = con([1, 2, 3, 4, None])
@@ -1603,10 +1623,11 @@ def con(values):
 
 @pytest.mark.parametrize("typ", ["array", "chunked_array"])
 def test_compare_string_scalar(typ):
-    def con(values):
-        if typ == "array":
+    if typ == "array":
+        def con(values):
             return pa.array(values)
-        else:
+    else:
+        def con(values):
             return pa.chunked_array([values])
 
     arr = con(['a', 'b', 'c', None])
@@ -1639,10 +1660,11 @@ def con(values):
 
 @pytest.mark.parametrize("typ", ["array", "chunked_array"])
 def test_compare_scalar(typ):
-    def con(values):
-        if typ == "array":
+    if typ == "array":
+        def con(values):
             return pa.array(values)
-        else:
+    else:
+        def con(values):
             return pa.chunked_array([values])
 
     arr = con([1, 2, 3, None])
@@ -1735,17 +1757,14 @@ def test_round_to_integer(ty):
         "half_to_odd": [3, 3, 4, 5, -3, -3, -4, None],
     }
     for round_mode, expected in rmode_and_expected.items():
-        options = RoundOptions(  # type: ignore[reportPossiblyUnboundVariable]
-            round_mode=round_mode)  # type: ignore[reportArgumentType]
-        result = round(values, options=options)  # type: ignore[reportArgumentType]
+        options = RoundOptions(round_mode=round_mode)
+        result = round(values, options=options)
         expected_array = pa.array(expected, type=pa.float64())
         assert expected_array.equals(result)
 
 
 @pytest.mark.numpy
 def test_round():
-    import numpy as np
-
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     ndigits_and_expected = {
         -2: [300, 0, 0, 0, -0, -0, -0, None],
@@ -1765,8 +1784,6 @@ def test_round():
 
 @pytest.mark.numpy
 def test_round_to_multiple():
-    import numpy as np
-
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     multiple_and_expected = {
         0.05: [320, 3.5, 3.1, 4.5, -3.2, -35.1, -3.05, None],
@@ -1790,8 +1807,7 @@ def test_round_to_multiple():
 
     for multiple in [object, 99999999999999999999999]:
         with pytest.raises(TypeError, match="is not a valid multiple type"):
-            pc.round_to_multiple(
-                values, multiple=multiple)  # type: ignore[reportArgumentType]
+            pc.round_to_multiple(values, multiple=multiple)
 
 
 def test_round_binary():
@@ -1862,7 +1878,7 @@ def test_fill_null():
     fill_value = pa.array([5], type=pa.int8())
     with pytest.raises(pa.ArrowInvalid,
                        match="Array arguments must all be the same length"):
-        arr.fill_null(fill_value)  # type: ignore[reportArgumentType]
+        arr.fill_null(fill_value)
 
     arr = pa.array([None, None, None, None], type=pa.null())
     fill_value = pa.scalar(None, type=pa.null())
@@ -2059,16 +2075,14 @@ def test_fsl_to_fsl_cast(value_type):
     # Different sized FSL
     cast_type = pa.list_(pa.field("element", value_type), 3)
     err_msg = 'Size of FixedSizeList is not the same.'
-    with pytest.raises(ArrowTypeError, match=err_msg):
+    with pytest.raises(pa.lib.ArrowTypeError, match=err_msg):
         fsl.cast(cast_type)
 
 
 DecimalTypeTraits = namedtuple('DecimalTypeTraits',
-                               # type: ignore[reportUntypedNamedTuple]
                                ('name', 'factory', 'max_precision'))
 
 FloatToDecimalCase = namedtuple('FloatToDecimalCase',
-                                # type: ignore[reportUntypedNamedTuple]
                                 ('precision', 'scale', 'float_val'))
 
 decimal_type_traits = [DecimalTypeTraits('decimal32', pa.decimal32, 9),
@@ -2081,8 +2095,6 @@ def largest_scaled_float_not_above(val, scale):
     """
     Find the largest float f such as `f * 10**scale <= val`
     """
-    import numpy as np
-
     assert val >= 0
     assert scale >= 0
     float_val = float(val) / 10**scale
@@ -2143,7 +2155,7 @@ def random_float_to_decimal_cast_cases(float_ty, max_precision):
     r = random.Random(42)
     for precision in range(1, max_precision, 6):
         for scale in range(0, precision, 4):
-            for _ in range(20):
+            for i in range(20):
                 unscaled = r.randrange(0, 10**precision)
                 float_val = scaled_float(unscaled, scale)
                 assert float_val * 10**scale < 10**precision
@@ -2200,8 +2212,6 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits):
     """
     Test float-to-decimal conversion against exactly generated values.
     """
-    import numpy as np
-
     r = random.Random(43)
     np_float_ty = {
         pa.float32(): np.float32,
@@ -2242,13 +2252,10 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits):
             float_exp = (-mantissa_bits +
                          math.floor(math.log2(10**(precision - scale))))
             assert float_exp_min <= float_exp <= float_exp_max
-            for _ in range(5):
+            for i in range(5):
                 mantissa = r.randrange(0, 2**mantissa_bits)
-                float_val = np.ldexp(
-                    np_float_ty(mantissa), float_exp
-                )
-                assert isinstance(
-                    float_val, np_float_ty)  # type: ignore[reportArgumentType]
+                float_val = np.ldexp(np_float_ty(mantissa), float_exp)
+                assert isinstance(float_val, np_float_ty)
                 # Make sure we compute the exact expected value and
                 # round by half-to-even when converting to the expected precision.
                 if float_exp >= 0:
@@ -2294,8 +2301,6 @@ def test_strptime():
 @pytest.mark.pandas
 @pytest.mark.timezone_data
 def test_strftime():
-    import pandas as pd
-
     times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
     timezones = ["CET", "UTC", "Europe/Ljubljana"]
 
@@ -2306,8 +2311,7 @@ def test_strftime():
         formats.extend(["%c", "%x", "%X"])
 
     for timezone in timezones:
-        ts = pd.to_datetime(times  # type: ignore[reportArgumentType]
-                            ).tz_localize(timezone)
+        ts = pd.to_datetime(times).tz_localize(timezone)
         for unit in ["s", "ms", "us", "ns"]:
             tsa = pa.array(ts, type=pa.timestamp(unit, timezone))
             for fmt in formats:
@@ -2354,7 +2358,7 @@ def test_strftime():
 
     # Test timestamps without timezone
     fmt = "%Y-%m-%dT%H:%M:%S"
-    ts = pd.to_datetime(times)  # type: ignore[reportArgumentType]
+    ts = pd.to_datetime(times)
     tsa = pa.array(ts, type=pa.timestamp("s"))
     result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
     expected = pa.array(ts.strftime(fmt)).cast(result.type)
@@ -2373,7 +2377,6 @@ def test_strftime():
 
 def _check_datetime_components(timestamps, timezone=None):
     from pyarrow.vendored.version import Version
-    import pandas as pd
 
     ts = pd.to_datetime(timestamps).tz_localize(
         "UTC").tz_convert(timezone).to_series()
@@ -2389,15 +2392,9 @@ def _check_datetime_components(timestamps, timezone=None):
 
     if Version(pd.__version__) < Version("1.1.0"):
         # https://github.com/pandas-dev/pandas/issues/33206
-        iso_year = ts.map(
-            lambda x: x.isocalendar()[0]  # type: ignore[reportUnknownLambdaType]
-        ).astype("int64")
-        iso_week = ts.map(
-            lambda x: x.isocalendar()[1]  # type: ignore[reportUnknownLambdaType]
-        ).astype("int64")
-        iso_day = ts.map(
-            lambda x: x.isocalendar()[2]  # type: ignore[reportUnknownLambdaType]
-        ).astype("int64")
+        iso_year = ts.map(lambda x: x.isocalendar()[0]).astype("int64")
+        iso_week = ts.map(lambda x: x.isocalendar()[1]).astype("int64")
+        iso_day = ts.map(lambda x: x.isocalendar()[2]).astype("int64")
     else:
         # Casting is required because pandas isocalendar returns int32
         # while arrow isocalendar returns int64.
@@ -2447,8 +2444,7 @@ def _check_datetime_components(timestamps, timezone=None):
             # datetime with utc returns None for dst()
             is_dst = [False] * len(ts)
         else:
-            is_dst = ts.apply(
-                lambda x: x.dst().seconds > 0)  # type: ignore[reportUnknownLambdaType]
+            is_dst = ts.apply(lambda x: x.dst().seconds > 0)
         assert pc.is_dst(tsa).equals(pa.array(is_dst))
 
     day_of_week_options = pc.DayOfWeekOptions(
@@ -2509,9 +2505,6 @@ def test_iso_calendar_longer_array(unit):
 @pytest.mark.pandas
 @pytest.mark.timezone_data
 def test_assume_timezone():
-    import numpy as np
-    import pandas as pd
-
     ts_type = pa.timestamp("ns")
     timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789",
                                  "2000-02-29T23:23:23.999999999",
@@ -2536,9 +2529,9 @@ def test_assume_timezone():
     ambiguous_array = pa.array(ambiguous, type=ts_type)
     nonexistent_array = pa.array(nonexistent, type=ts_type)
 
-    ta = pa.array(timestamps, type=ts_type)
     for timezone in ["UTC", "America/Chicago", "Asia/Kolkata"]:
         options = pc.AssumeTimezoneOptions(timezone)
+        ta = pa.array(timestamps, type=ts_type)
         expected = timestamps.tz_localize(timezone)
         result = pc.assume_timezone(ta, options=options)
         assert result.equals(pa.array(expected))
@@ -2547,8 +2540,7 @@ def test_assume_timezone():
 
         ta_zoned = pa.array(timestamps, type=pa.timestamp("ns", timezone))
         with pytest.raises(pa.ArrowInvalid, match="already have a timezone:"):
-            pc.assume_timezone(
-                ta_zoned, options=options)  # type: ignore[reportArgumentType]
+            pc.assume_timezone(ta_zoned, options=options)
 
     invalid_options = pc.AssumeTimezoneOptions("Europe/Brusselsss")
     with pytest.raises(ValueError, match="not found in timezone database"):
@@ -2591,22 +2583,18 @@ def test_assume_timezone():
                              f"timezone '{timezone}'"):
         pc.assume_timezone(ambiguous_array, options=options_ambiguous_raise)
 
-    expected = ambiguous.tz_localize(timezone, ambiguous=np.array([True, True, True]))
+    expected = ambiguous.tz_localize(timezone, ambiguous=[True, True, True])
     result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_earliest)
     result.equals(pa.array(expected))
 
-    expected = ambiguous.tz_localize(
-        timezone, ambiguous=np.array([False, False, False]))
+    expected = ambiguous.tz_localize(timezone, ambiguous=[False, False, False])
     result = pc.assume_timezone(
         ambiguous_array, options=options_ambiguous_latest)
     result.equals(pa.array(expected))
 
 
 def _check_temporal_rounding(ts, values, unit):
-    import numpy as np
-    import pandas as pd
-
     unit_shorthand = {
         "nanosecond": "ns",
         "microsecond": "us",
@@ -2650,7 +2638,7 @@ def _check_temporal_rounding(ts, values, unit):
                 value, unit, calendar_based_origin=True)
             origin = ts.dt.floor(greater_unit[unit])
 
-            if not hasattr(ta.type, "tz"):
+            if ta.type.tz is None:
                 result = pc.ceil_temporal(ta, options=options).to_pandas()
                 expected = (ts - origin).dt.ceil(frequency) + origin
                 np.testing.assert_array_equal(result, expected)
@@ -2681,20 +2669,16 @@ def _check_temporal_rounding(ts, values, unit):
     # to regular ceiled timestamp if it is equal to the original timestamp.
     # This does not work if timestamp is zoned since our logic will not
     # account for DST jumps.
-    if not hasattr(ta.type, "tz"):
+    if ta.type.tz is None:
         options = pc.RoundTemporalOptions(
-            value,  # type: ignore[reportPossiblyUnboundVariable]
-            ceil_is_strictly_greater=True,
-            unit=unit)  # type: ignore[reportPossiblyUnboundVariable]
+            value, unit, ceil_is_strictly_greater=True)
         result = pc.ceil_temporal(ta, options=options)
-        expected = ts.dt.ceil(frequency)  # type: ignore[reportPossiblyUnboundVariable]
+        expected = ts.dt.ceil(frequency)
 
         expected = np.where(
             expected == ts,
-            expected + pd.Timedelta(
-                value,  # type: ignore[reportPossiblyUnboundVariable]
-                unit=unit_shorthand[unit]), expected  # type: ignore[reportArgumentType]
-        )
+            expected + pd.Timedelta(value, unit_shorthand[unit]),
+            expected)
         np.testing.assert_array_equal(result, expected)
 
     # Check RoundTemporalOptions defaults
@@ -2719,10 +2703,8 @@ def _check_temporal_rounding(ts, values, unit):
                                   "second", "minute", "hour", "day"))
 @pytest.mark.pandas
 def test_round_temporal(unit):
-    import pandas as pd
-
     values = (1, 2, 3, 4, 5, 6, 7, 10, 15, 24, 60, 250, 500, 750)
-    timestamps = pd.Series([
+    timestamps = [
         "1923-07-07 08:52:35.203790336",
         "1931-03-17 10:45:00.641559040",
         "1932-06-16 01:16:42.911994368",
@@ -2735,7 +2717,7 @@ def test_round_temporal(unit):
         "1982-01-21 18:43:44.517366784",
         "1992-01-01 00:00:00.100000000",
         "1999-12-04 05:55:34.794991104",
-        "2026-10-26 08:39:00.316686848"])
+        "2026-10-26 08:39:00.316686848"]
     ts = pd.Series([pd.Timestamp(x, unit="ns") for x in timestamps])
     _check_temporal_rounding(ts, values, unit)
 
@@ -2757,7 +2739,7 @@ def test_count():
 
     with pytest.raises(ValueError,
                        match='"something else" is not a valid count mode'):
-        pc.count(arr, 'something else')  # type: ignore[invalid-argument-type]
+        pc.count(arr, 'something else')
 
 
 def test_index():
@@ -2807,7 +2789,7 @@ def test_partition_nth():
     with pytest.raises(
             ValueError,
             match="'partition_nth_indices' cannot be called without options"):
-        pc.partition_nth_indices(data)  # type: ignore[no-matching-overload]
+        pc.partition_nth_indices(data)
 
 
 def test_partition_nth_null_placement():
@@ -2834,13 +2816,10 @@ def validate_select_k(select_k_indices, arr, order, stable_sort=False):
 
     arr = pa.array([1, 2, None, 0])
     for k in [0, 2, 4]:
-        result = pc.select_k_unstable(
-            arr, k=k, sort_keys=[("dummy", "ascending")])
-        validate_select_k(result, arr, "ascending")
-
-        result = pc.select_k_unstable(
-            arr, k=k, sort_keys=[("dummy", "descending")])
-        validate_select_k(result, arr, "descending")
+        for order in ["descending", "ascending"]:
+            result = pc.select_k_unstable(
+                arr, k=k, sort_keys=[("dummy", order)])
+            validate_select_k(result, arr, order)
 
         result = pc.top_k_unstable(arr, k=k)
         validate_select_k(result, arr, "descending")
@@ -2897,7 +2876,7 @@ def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
     with pytest.raises(
             ValueError,
             match="'select_k_unstable' cannot be called without options"):
-        pc.select_k_unstable(table)  # type: ignore[no-matching-overload]
+        pc.select_k_unstable(table)
 
     with pytest.raises(ValueError,
                        match="select_k_unstable requires a nonnegative `k`"):
@@ -2906,19 +2885,14 @@ def validate_select_k(select_k_indices, tbl, sort_keys, stable_sort=False):
     with pytest.raises(ValueError,
                        match="select_k_unstable requires a "
                              "non-empty `sort_keys`"):
-        pc.select_k_unstable(table, sort_keys=[],
-                             k=2  # type: ignore[reportPossiblyUnboundVariable]
-                             )
+        pc.select_k_unstable(table, k=2, sort_keys=[])
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.select_k_unstable(
-            table, k=k,  # type: ignore[reportPossiblyUnboundVariable]
-            sort_keys=[("a", "nonscending")])  # type: ignore[reportArgumentType]
+        pc.select_k_unstable(table, k=k, sort_keys=[("a", "nonscending")])
 
     with pytest.raises(ValueError,
                        match="Invalid sort key column: No match for.*unknown"):
-        pc.select_k_unstable(table, k=k,  # type: ignore[reportPossiblyUnboundVariable]
-                             sort_keys=[("unknown", "ascending")])
+        pc.select_k_unstable(table, k=k, sort_keys=[("unknown", "ascending")])
 
 
 def test_array_sort_indices():
@@ -2937,9 +2911,7 @@ def test_array_sort_indices():
     assert result.to_pylist() == [2, 1, 0, 3]
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.array_sort_indices(arr,
-                              order="nonscending"  # type: ignore[reportArgumentType]
-                              )
+        pc.array_sort_indices(arr, order="nonscending")
 
 
 def test_sort_indices_array():
@@ -2995,19 +2967,14 @@ def test_sort_indices_table():
     assert result.to_pylist() == [2, 1, 0, 3]
 
     with pytest.raises(ValueError, match="Must specify one or more sort keys"):
-        pc.sort_indices(table)  # type: ignore[reportArgumentType]
+        pc.sort_indices(table)
 
     with pytest.raises(ValueError,
                        match="Invalid sort key column: No match for.*unknown"):
-        pc.sort_indices(
-            table,
-            sort_keys=[("unknown", "ascending")]  # type: ignore[reportArgumentType]
-        )
+        pc.sort_indices(table, sort_keys=[("unknown", "ascending")])
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.sort_indices(
-            table, sort_keys=[("a", "nonscending")]  # type: ignore[reportArgumentType]
-        )
+        pc.sort_indices(table, sort_keys=[("a", "nonscending")])
 
 
 def test_is_in():
@@ -3085,9 +3052,9 @@ def test_quantile():
     assert result.to_pylist() == [1.25, 1.5, 1.75]
 
     with pytest.raises(ValueError, match="Quantile must be between 0 and 1"):
-        pc.quantile(arr, q=1.1)  # type: ignore[invalid-argument-type]
+        pc.quantile(arr, q=1.1)
     with pytest.raises(ValueError, match="not a valid quantile interpolation"):
-        pc.quantile(arr, interpolation='zzz')  # type: ignore[invalid-argument-type]
+        pc.quantile(arr, interpolation='zzz')
 
 
 def test_tdigest():
@@ -3153,8 +3120,6 @@ def test_min_max_element_wise():
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_sum(start, skip_nulls):
-    import numpy as np
-
     # Exact tests (e.g., integral types)
     start_int = int(start)
     starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
@@ -3203,15 +3168,13 @@ def test_cumulative_sum(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_sum([1, 2, 3], start=strt)  # type: ignore[reportArgumentType]
+            pc.cumulative_sum([1, 2, 3], start=strt)
 
 
 @pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_prod(start, skip_nulls):
-    import numpy as np
-
     # Exact tests (e.g., integral types)
     start_int = int(start)
     starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
@@ -3260,17 +3223,13 @@ def test_cumulative_prod(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_prod(
-                [1, 2, 3], start=strt  # type: ignore[reportArgumentType]
-            )
+            pc.cumulative_prod([1, 2, 3], start=strt)
 
 
 @pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_max(start, skip_nulls):
-    import numpy as np
-
     # Exact tests (e.g., integral types)
     start_int = int(start)
     starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
@@ -3322,15 +3281,13 @@ def test_cumulative_max(start, skip_nulls):
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_max([1, 2, 3], start=strt)  # type: ignore[reportArgumentType]
+            pc.cumulative_max([1, 2, 3], start=strt)
 
 
 @pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_min(start, skip_nulls):
-    import numpy as np
-
     # Exact tests (e.g., integral types)
     start_int = int(start)
     starts = [None, start_int, pa.scalar(start_int, type=pa.int8()),
@@ -3378,12 +3335,11 @@ def test_cumulative_min(start, skip_nulls):
                 expected_arrays[i], strt if strt is not None else 1e9,
                 skip_nulls=False)
             np.testing.assert_array_almost_equal(result.to_numpy(
-                # type: ignore[reportAttributeAccessIssue]
                 zero_copy_only=False), expected.to_numpy(zero_copy_only=False))
 
     for strt in ['a', pa.scalar('arrow'), 1.1]:
         with pytest.raises(pa.ArrowInvalid):
-            pc.cumulative_max([1, 2, 3], start=strt)  # type: ignore[reportArgumentType]
+            pc.cumulative_max([1, 2, 3], start=strt)
 
 
 def test_make_struct():
@@ -3475,12 +3431,12 @@ def test_list_element():
     lists = pa.array([l1, l2], list_type)
 
     index = 1
-    result = pc.list_element(lists, index)
+    result = pa.compute.list_element(lists, index)
     expected = pa.array([None, {'a': 0.52, 'b': 3}], element_type)
     assert result.equals(expected)
 
     index = 4
-    result = pc.list_element(lists, index)
+    result = pa.compute.list_element(lists, index)
     expected = pa.array([{'a': 5.6, 'b': 6}, {'a': .6, 'b': 8}], element_type)
     assert result.equals(expected)
 
@@ -3519,7 +3475,7 @@ def test_random():
             pa.array([], type=pa.float64())
 
     # System random initialization => outputs all distinct
-    arrays = [tuple(pc.random(100).to_pylist()) for _ in range(10)]
+    arrays = [tuple(pc.random(100).to_pylist()) for i in range(10)]
     assert len(set(arrays)) == len(arrays)
 
     arrays = [tuple(pc.random(100, initializer=i % 7).to_pylist())
@@ -3528,14 +3484,15 @@ def test_random():
 
     # Arbitrary hashable objects can be given as initializer
     initializers = [object(), (4, 5, 6), "foo"]
-    initializers.extend(os.urandom(10) for _ in range(10))
-    arrays = [tuple(pc.random(100, initializer=i).to_pylist()) for i in initializers]
+    initializers.extend(os.urandom(10) for i in range(10))
+    arrays = [tuple(pc.random(100, initializer=i).to_pylist())
+              for i in initializers]
     assert len(set(arrays)) == len(arrays)
 
     with pytest.raises(TypeError,
                        match=r"initializer should be 'system', an integer, "
                              r"or a hashable object; got \[\]"):
-        pc.random(100, initializer=[])  # type: ignore[invalid-argument-type]
+        pc.random(100, initializer=[])
 
 
 @pytest.mark.parametrize(
@@ -3585,7 +3542,7 @@ def test_rank_options():
                        match=r'"NonExisting" is not a valid tiebreaker'):
         pc.RankOptions(sort_keys="descending",
                        null_placement="at_end",
-                       tiebreaker="NonExisting")  # type: ignore[invalid-argument-type]
+                       tiebreaker="NonExisting")
 
 
 def test_rank_quantile_options():
@@ -3615,7 +3572,7 @@ def test_rank_quantile_options():
     assert result.equals(expected_descending)
 
     with pytest.raises(ValueError, match="not a valid sort order"):
-        pc.rank_quantile(arr, sort_keys="XXX")  # type: ignore[reportArgumentType]
+        pc.rank_quantile(arr, sort_keys="XXX")
 
 
 def test_rank_normal_options():
@@ -3643,8 +3600,6 @@ def test_rank_normal_options():
 
 
 def create_sample_expressions():
-    import numpy as np
-
     # We need a schema for substrait conversion
     schema = pa.schema([pa.field("i64", pa.int64()), pa.field(
         "foo", pa.struct([pa.field("bar", pa.string())]))])
@@ -3659,7 +3614,7 @@ def create_sample_expressions():
     e = pc.scalar(None)
     f = pc.scalar({'a': 1})
     g = pc.scalar(pa.scalar(1))
-    h = pc.scalar(np.int64(2))  # type: ignore[reportOptionalMemberAccess]
+    h = pc.scalar(np.int64(2))
     j = pc.scalar(False)
     k = pc.scalar(0)
 
@@ -3734,22 +3689,20 @@ def test_expression_serialization_arrow(pickle_module):
 def test_expression_serialization_substrait():
 
     exprs = create_sample_expressions()
-    schema = pa.schema(exprs["schema"])  # type: ignore[reportAttributeAccessIssue]
+    schema = exprs["schema"]
 
     # Basic literals don't change on binding and so they will round
     # trip without any change
-    for expr in exprs["literals"]:  # type: ignore[reportAttributeAccessIssue]
-        serialized = \
-            expr.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
+    for expr in exprs["literals"]:
+        serialized = expr.to_substrait(schema)
         deserialized = pc.Expression.from_substrait(serialized)
-        assert expr.equals(deserialized)  # type: ignore[reportAttributeAccessIssue]
+        assert expr.equals(deserialized)
 
     # Expressions are bound when they get serialized.  Since bound
     # expressions are not equal to their unbound variants we cannot
     # compare the round tripped with the original
-    for expr in exprs["calls"]:  # type: ignore[reportAttributeAccessIssue]
-        serialized = \
-            expr.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
+    for expr in exprs["calls"]:
+        serialized = expr.to_substrait(schema)
         deserialized = pc.Expression.from_substrait(serialized)
         # We can't compare the expressions themselves because of the bound
         # unbound difference. But we can compare the string representation
@@ -3759,8 +3712,7 @@ def test_expression_serialization_substrait():
         assert deserialized.equals(deserialized_again)
 
     for expr, expr_norm in zip(exprs["refs"], exprs["numeric_refs"]):
-        serialized = \
-            expr.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
+        serialized = expr.to_substrait(schema)
         deserialized = pc.Expression.from_substrait(serialized)
         assert str(deserialized) == str(expr_norm)
         serialized_again = deserialized.to_substrait(schema)
@@ -3770,16 +3722,15 @@ def test_expression_serialization_substrait():
     # For the special cases we get various wrinkles in serialization but we
     # should always get the same thing from round tripping twice
     for expr in exprs["special"]:
-        serialized = \
-            expr.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
+        serialized = expr.to_substrait(schema)
         deserialized = pc.Expression.from_substrait(serialized)
         serialized_again = deserialized.to_substrait(schema)
         deserialized_again = pc.Expression.from_substrait(serialized_again)
         assert deserialized.equals(deserialized_again)
 
     # Special case, we lose the field names of struct literals
-    f = exprs["special"][0]  # type: ignore[reportAttributeAccessIssue]
-    serialized = f.to_substrait(schema)  # type: ignore[reportAttributeAccessIssue]
+    f = exprs["special"][0]
+    serialized = f.to_substrait(schema)
     deserialized = pc.Expression.from_substrait(serialized)
     assert deserialized.equals(pc.scalar({'': 1}))
 
@@ -3807,10 +3758,10 @@ def test_expression_construction():
     nested_field = pc.field(("nested", "field"))
     nested_field2 = pc.field("nested", "field")
 
-    _ = zero | one == string
-    _ = ~true == false
+    zero | one == string
+    ~true == false
     for typ in ("bool", pa.bool_()):
-        _ = field.cast(typ) == true
+        field.cast(typ) == true
 
     field.isin([1, 2])
     nested_mixed_types.isin(["foo", "bar"])
@@ -3818,10 +3769,10 @@ def test_expression_construction():
     nested_field2.isin(["foo", "bar"])
 
     with pytest.raises(TypeError):
-        field.isin(1)  # type: ignore[invalid-argument-type]
+        field.isin(1)
 
     with pytest.raises(pa.ArrowInvalid):
-        _ = field != object()
+        field != object()
 
 
 def test_expression_boolean_operators():
@@ -3830,16 +3781,16 @@ def test_expression_boolean_operators():
     false = pc.scalar(False)
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
-        _ = true and false
+        true and false
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
-        _ = true or false
+        true or false
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
         bool(true)
 
     with pytest.raises(ValueError, match="cannot be evaluated to python True"):
-        _ = not true
+        not true
 
 
 def test_expression_call_function():
@@ -3861,14 +3812,14 @@ def test_expression_call_function():
     # Invalid pc.scalar input gives original error message
     msg = "only other expressions allowed as arguments"
     with pytest.raises(TypeError, match=msg):
-        pc.add(field, object)  # type: ignore[reportArgumentType]
+        pc.add(field, object)
 
 
 def test_cast_table_raises():
     table = pa.table({'a': [1, 2]})
 
-    with pytest.raises(ArrowTypeError):
-        pc.cast(table, pa.int64())  # type: ignore[reportArgumentType]
+    with pytest.raises(pa.lib.ArrowTypeError):
+        pc.cast(table, pa.int64())
 
 
 @pytest.mark.parametrize("start,stop,expected", (
@@ -4015,31 +3966,31 @@ def test_run_end_encode(value_type, option):
 def test_pairwise_diff():
     arr = pa.array([1, 2, 3, None, 4, 5])
     expected = pa.array([None, 1, 1, None, None, 1])
-    result = pc.pairwise_diff(arr, period=1)
+    result = pa.compute.pairwise_diff(arr, period=1)
     assert result.equals(expected)
 
     arr = pa.array([1, 2, 3, None, 4, 5])
     expected = pa.array([None, None, 2, None, 1, None])
-    result = pc.pairwise_diff(arr, period=2)
+    result = pa.compute.pairwise_diff(arr, period=2)
     assert result.equals(expected)
 
     # negative period
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.int8())
     expected = pa.array([-1, -1, None, None, -1, None], type=pa.int8())
-    result = pc.pairwise_diff(arr, period=-1)
+    result = pa.compute.pairwise_diff(arr, period=-1)
     assert result.equals(expected)
 
     # wrap around overflow
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
     expected = pa.array([255, 255, None, None, 255, None], type=pa.uint8())
-    result = pc.pairwise_diff(arr, period=-1)
+    result = pa.compute.pairwise_diff(arr, period=-1)
     assert result.equals(expected)
 
     # fail on overflow
     arr = pa.array([1, 2, 3, None, 4, 5], type=pa.uint8())
     with pytest.raises(pa.ArrowInvalid,
                        match="overflow"):
-        pc.pairwise_diff_checked(arr, period=-1)
+        pa.compute.pairwise_diff_checked(arr, period=-1)
 
 
 def test_pivot_wider():
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index a6d3546e57c..7c86f37587c 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -24,7 +24,7 @@
 import math
 import os
 import pathlib
-import pytest
+import pytest  # type: ignore[import-not-found]
 import random
 import sys
 import tempfile
@@ -33,7 +33,7 @@
 try:
     import numpy as np
 except ImportError:
-    np = None
+    np = None  # type: ignore[assignment]
 
 from pyarrow.util import guid
 from pyarrow import Codec
@@ -811,8 +811,9 @@ def test_cache_options_pickling(pickle_module):
 
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
-    pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("bz2", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     ),
     "brotli",
     "gzip",
@@ -852,8 +853,9 @@ def test_compress_decompress(compression):
 
 @pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
-    pytest.param(
-        "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("bz2", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     ),
     "brotli",
     "gzip",
@@ -1748,9 +1750,9 @@ def test_unknown_compression_raises():
     "gzip",
     "lz4",
     "zstd",
-    pytest.param(
-        "snappy",
-        marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
+    pytest.param("snappy", marks=pytest.mark.xfail(
+        raises=pa.lib.ArrowNotImplementedError  # type: ignore[attr-defined]
+    )
     )
 ])
 def test_compressed_roundtrip(compression):
diff --git a/python/pyarrow/types.pyi b/python/pyarrow/types.pyi
index 3ead6830421..1d1554da520 100644
--- a/python/pyarrow/types.pyi
+++ b/python/pyarrow/types.pyi
@@ -100,59 +100,459 @@ _Nested: TypeAlias = (
     | _Union
 )
 
-def is_null(t: DataType) -> TypeIs[NullType]: ...
-def is_boolean(t: DataType) -> TypeIs[BoolType]: ...
-def is_integer(t: DataType) -> TypeIs[_Integer]: ...
-def is_signed_integer(t: DataType) -> TypeIs[_SignedInteger]: ...
-def is_unsigned_integer(t: DataType) -> TypeIs[_UnsignedInteger]: ...
-def is_int8(t: DataType) -> TypeIs[Int8Type]: ...
-def is_int16(t: DataType) -> TypeIs[Int16Type]: ...
-def is_int32(t: DataType) -> TypeIs[Int32Type]: ...
-def is_int64(t: DataType) -> TypeIs[Int64Type]: ...
-def is_uint8(t: DataType) -> TypeIs[UInt8Type]: ...
-def is_uint16(t: DataType) -> TypeIs[UInt16Type]: ...
-def is_uint32(t: DataType) -> TypeIs[Uint32Type]: ...
-def is_uint64(t: DataType) -> TypeIs[UInt64Type]: ...
-def is_floating(t: DataType) -> TypeIs[_Floating]: ...
-def is_float16(t: DataType) -> TypeIs[Float16Type]: ...
-def is_float32(t: DataType) -> TypeIs[Float32Type]: ...
-def is_float64(t: DataType) -> TypeIs[Float64Type]: ...
-def is_list(t: DataType) -> TypeIs[ListType[Any]]: ...
-def is_large_list(t: DataType) -> TypeIs[LargeListType[Any]]: ...
-def is_fixed_size_list(t: DataType) -> TypeIs[FixedSizeListType[Any, Any]]: ...
-def is_list_view(t: DataType) -> TypeIs[ListViewType[Any]]: ...
-def is_large_list_view(t: DataType) -> TypeIs[LargeListViewType[Any]]: ...
-def is_struct(t: DataType) -> TypeIs[StructType]: ...
-def is_union(t: DataType) -> TypeIs[_Union]: ...
-def is_nested(t: DataType) -> TypeIs[_Nested]: ...
-def is_run_end_encoded(t: DataType) -> TypeIs[RunEndEncodedType[Any, Any]]: ...
-def is_temporal(t: DataType) -> TypeIs[_Temporal]: ...
-def is_timestamp(t: DataType) -> TypeIs[TimestampType[Any, Any]]: ...
-def is_duration(t: DataType) -> TypeIs[DurationType[Any]]: ...
-def is_time(t: DataType) -> TypeIs[_Time]: ...
-def is_time32(t: DataType) -> TypeIs[Time32Type[Any]]: ...
-def is_time64(t: DataType) -> TypeIs[Time64Type[Any]]: ...
-def is_binary(t: DataType) -> TypeIs[BinaryType]: ...
-def is_large_binary(t: DataType) -> TypeIs[LargeBinaryType]: ...
-def is_unicode(t: DataType) -> TypeIs[StringType]: ...
-def is_string(t: DataType) -> TypeIs[StringType]: ...
-def is_large_unicode(t: DataType) -> TypeIs[LargeStringType]: ...
-def is_large_string(t: DataType) -> TypeIs[LargeStringType]: ...
-def is_fixed_size_binary(t: DataType) -> TypeIs[FixedSizeBinaryType]: ...
-def is_binary_view(t: DataType) -> TypeIs[BinaryViewType]: ...
-def is_string_view(t: DataType) -> TypeIs[StringViewType]: ...
-def is_date(t: DataType) -> TypeIs[_Date]: ...
-def is_date32(t: DataType) -> TypeIs[Date32Type]: ...
-def is_date64(t: DataType) -> TypeIs[Date64Type]: ...
-def is_map(t: DataType) -> TypeIs[MapType[Any, Any, Any]]: ...
-def is_decimal(t: DataType) -> TypeIs[_Decimal]: ...
-def is_decimal32(t: DataType) -> TypeIs[Decimal32Type[Any, Any]]: ...
-def is_decimal64(t: DataType) -> TypeIs[Decimal64Type[Any, Any]]: ...
-def is_decimal128(t: DataType) -> TypeIs[Decimal128Type[Any, Any]]: ...
-def is_decimal256(t: DataType) -> TypeIs[Decimal256Type[Any, Any]]: ...
-def is_dictionary(t: DataType) -> TypeIs[DictionaryType[Any, Any, Any]]: ...
-def is_interval(t: DataType) -> TypeIs[_Interval]: ...
-def is_primitive(t: DataType) -> bool: ...
+def is_null(t: DataType) -> TypeIs[NullType]:
+    """
+    Return True if value is an instance of type: null.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_boolean(t: DataType) -> TypeIs[BoolType]:
+    """
+    Return True if value is an instance of type: boolean.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_integer(t: DataType) -> TypeIs[_Integer]:
+    """
+    Return True if value is an instance of type: any integer.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_signed_integer(t: DataType) -> TypeIs[_SignedInteger]:
+    """
+    Return True if value is an instance of type: signed integer.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_unsigned_integer(t: DataType) -> TypeIs[_UnsignedInteger]:
+    """
+    Return True if value is an instance of type: unsigned integer.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_int8(t: DataType) -> TypeIs[Int8Type]:
+    """
+    Return True if value is an instance of type: int8.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_int16(t: DataType) -> TypeIs[Int16Type]:
+    """
+    Return True if value is an instance of type: int16.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_int32(t: DataType) -> TypeIs[Int32Type]:
+    """
+    Return True if value is an instance of type: int32.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_int64(t: DataType) -> TypeIs[Int64Type]:
+    """
+    Return True if value is an instance of type: int64.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_uint8(t: DataType) -> TypeIs[UInt8Type]:
+    """
+    Return True if value is an instance of type: uint8.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_uint16(t: DataType) -> TypeIs[UInt16Type]:
+    """
+    Return True if value is an instance of type: uint16.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_uint32(t: DataType) -> TypeIs[Uint32Type]:
+    """
+    Return True if value is an instance of type: uint32.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_uint64(t: DataType) -> TypeIs[UInt64Type]:
+    """
+    Return True if value is an instance of type: uint64.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_floating(t: DataType) -> TypeIs[_Floating]:
+    """
+    Return True if value is an instance of type: floating point numeric.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_float16(t: DataType) -> TypeIs[Float16Type]:
+    """
+    Return True if value is an instance of type: float16 (half-precision).
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_float32(t: DataType) -> TypeIs[Float32Type]:
+    """
+    Return True if value is an instance of type: float32 (single precision).
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_float64(t: DataType) -> TypeIs[Float64Type]:
+    """
+    Return True if value is an instance of type: float64 (double precision).
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_list(t: DataType) -> TypeIs[ListType[Any]]:
+    """
+    Return True if value is an instance of type: list.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_large_list(t: DataType) -> TypeIs[LargeListType[Any]]:
+    """
+    Return True if value is an instance of type: large list.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_fixed_size_list(t: DataType) -> TypeIs[FixedSizeListType[Any, Any]]:
+    """
+    Return True if value is an instance of type: fixed size list.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_list_view(t: DataType) -> TypeIs[ListViewType[Any]]:
+    """
+    Return True if value is an instance of type: list view.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_large_list_view(t: DataType) -> TypeIs[LargeListViewType[Any]]:
+    """
+    Return True if value is an instance of type: large list view.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_struct(t: DataType) -> TypeIs[StructType]:
+    """
+    Return True if value is an instance of type: struct.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_union(t: DataType) -> TypeIs[_Union]:
+    """
+    Return True if value is an instance of type: union.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_nested(t: DataType) -> TypeIs[_Nested]:
+    """
+    Return True if value is an instance of type: nested type.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_run_end_encoded(t: DataType) -> TypeIs[RunEndEncodedType[Any, Any]]:
+    """
+    Return True if value is an instance of type: run-end encoded.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_temporal(t: DataType) -> TypeIs[_Temporal]:
+    """
+    Return True if value is an instance of type: date, time, timestamp or duration.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_timestamp(t: DataType) -> TypeIs[TimestampType[Any, Any]]:
+    """
+    Return True if value is an instance of type: timestamp.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_duration(t: DataType) -> TypeIs[DurationType[Any]]:
+    """
+    Return True if value is an instance of type: duration.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_time(t: DataType) -> TypeIs[_Time]:
+    """
+    Return True if value is an instance of type: time.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_time32(t: DataType) -> TypeIs[Time32Type[Any]]:
+    """
+    Return True if value is an instance of type: time32.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_time64(t: DataType) -> TypeIs[Time64Type[Any]]:
+    """
+    Return True if value is an instance of type: time64.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_binary(t: DataType) -> TypeIs[BinaryType]:
+    """
+    Return True if value is an instance of type: variable-length binary.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_large_binary(t: DataType) -> TypeIs[LargeBinaryType]:
+    """
+    Return True if value is an instance of type: large variable-length binary.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_unicode(t: DataType) -> TypeIs[StringType]:
+    """
+    Alias for is_string.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_string(t: DataType) -> TypeIs[StringType]:
+    """
+    Return True if value is an instance of type: string (utf8 unicode).
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_large_unicode(t: DataType) -> TypeIs[LargeStringType]:
+    """
+    Alias for is_large_string.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_large_string(t: DataType) -> TypeIs[LargeStringType]:
+    """
+    Return True if value is an instance of type: large string (utf8 unicode).
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_fixed_size_binary(t: DataType) -> TypeIs[FixedSizeBinaryType]:
+    """
+    Return True if value is an instance of type: fixed size binary.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_binary_view(t: DataType) -> TypeIs[BinaryViewType]:
+    """
+    Return True if value is an instance of type: variable-length binary view.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_string_view(t: DataType) -> TypeIs[StringViewType]:
+    """
+    Return True if value is an instance of type: variable-length string (utf-8) view.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_date(t: DataType) -> TypeIs[_Date]:
+    """
+    Return True if value is an instance of type: date.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_date32(t: DataType) -> TypeIs[Date32Type]:
+    """
+    Return True if value is an instance of type: date32 (days).
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_date64(t: DataType) -> TypeIs[Date64Type]:
+    """
+    Return True if value is an instance of type: date64 (milliseconds).
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_map(t: DataType) -> TypeIs[MapType[Any, Any, Any]]:
+    """
+    Return True if value is an instance of type: map.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_decimal(t: DataType) -> TypeIs[_Decimal]:
+    """
+    Return True if value is an instance of type: decimal.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_decimal32(t: DataType) -> TypeIs[Decimal32Type[Any, Any]]:
+    """
+    Return True if value is an instance of type: decimal32.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_decimal64(t: DataType) -> TypeIs[Decimal64Type[Any, Any]]:
+    """
+    Return True if value is an instance of type: decimal64.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_decimal128(t: DataType) -> TypeIs[Decimal128Type[Any, Any]]:
+    """
+    Return True if value is an instance of type: decimal128.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_decimal256(t: DataType) -> TypeIs[Decimal256Type[Any, Any]]:
+    """
+    Return True if value is an instance of type: decimal256.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_dictionary(t: DataType) -> TypeIs[DictionaryType[Any, Any, Any]]:
+    """
+    Return True if value is an instance of type: dictionary-encoded.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_interval(t: DataType) -> TypeIs[_Interval]:
+    """
+    Return True if value is an instance of type: interval.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_primitive(t: DataType) -> bool:
+    """
+    Return True if value is an instance of type: primitive type.
+
+    Parameters
+    ----------
+    t : DataType
+    """
+def is_boolean_value(obj: Any) -> bool:
+    """
+    Check if the object is a boolean.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check
+    """
+
+def is_integer_value(obj: Any) -> bool:
+    """
+    Check if the object is an integer.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check
+    """
+
+def is_float_value(obj: Any) -> bool:
+    """
+    Check if the object is a float.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check
+    """
 
 __all__ = [
     "is_binary",
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 598ddf7a75b..fac3b25c554 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -91,16 +91,3 @@ version_file = 'pyarrow/_generated_version.py'
 version_scheme = 'guess-next-dev'
 git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
 fallback_version = '22.0.0a0'
-
-[tool.pyright]
-typeCheckingMode           = "strict"
-reportMissingImports       = false
-reportPrivateUsage         = false
-reportUnknownParameterType = false
-reportMissingTypeArgument  = false
-reportMissingParameterType = false
-reportMissingTypeStubs     = false
-reportUnknownVariableType  = false
-reportUnknownArgumentType  = false
-reportUnknownMemberType    = false
-include = ["pyarrow/tests/test_compute.py"]

From e7c0202bb892e36cd2321a033fd9205bcd951682 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 12 Sep 2025 15:38:46 +0200
Subject: [PATCH 09/26] Remove more stubs

---
 .../pyarrow/_dataset_parquet_encryption.pyi   |  102 -
 python/pyarrow/_feather.pyi                   |   46 -
 python/pyarrow/_gcsfs.pyi                     |  100 -
 python/pyarrow/_parquet_encryption.pyi        |   84 -
 python/pyarrow/csv.pyi                        |   44 -
 python/pyarrow/cuda.pyi                       |   42 -
 python/pyarrow/flight.pyi                     |  112 -
 python/pyarrow/fs.pyi                         |   94 -
 python/pyarrow/interchange/__init__.pyi       |   16 -
 python/pyarrow/interchange/buffer.pyi         |   75 -
 python/pyarrow/interchange/column.pyi         |  269 ---
 python/pyarrow/interchange/dataframe.pyi      |  119 -
 python/pyarrow/interchange/from_dataframe.pyi |  261 ---
 python/pyarrow/parquet/__init__.pyi           |   18 -
 python/pyarrow/parquet/core.pyi               | 2078 -----------------
 python/pyarrow/parquet/encryption.pyi         |   32 -
 16 files changed, 3492 deletions(-)
 delete mode 100644 python/pyarrow/_dataset_parquet_encryption.pyi
 delete mode 100644 python/pyarrow/_feather.pyi
 delete mode 100644 python/pyarrow/_gcsfs.pyi
 delete mode 100644 python/pyarrow/_parquet_encryption.pyi
 delete mode 100644 python/pyarrow/csv.pyi
 delete mode 100644 python/pyarrow/cuda.pyi
 delete mode 100644 python/pyarrow/flight.pyi
 delete mode 100644 python/pyarrow/fs.pyi
 delete mode 100644 python/pyarrow/interchange/__init__.pyi
 delete mode 100644 python/pyarrow/interchange/buffer.pyi
 delete mode 100644 python/pyarrow/interchange/column.pyi
 delete mode 100644 python/pyarrow/interchange/dataframe.pyi
 delete mode 100644 python/pyarrow/interchange/from_dataframe.pyi
 delete mode 100644 python/pyarrow/parquet/__init__.pyi
 delete mode 100644 python/pyarrow/parquet/core.pyi
 delete mode 100644 python/pyarrow/parquet/encryption.pyi

diff --git a/python/pyarrow/_dataset_parquet_encryption.pyi b/python/pyarrow/_dataset_parquet_encryption.pyi
deleted file mode 100644
index be40c0b39b3..00000000000
--- a/python/pyarrow/_dataset_parquet_encryption.pyi
+++ /dev/null
@@ -1,102 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from ._dataset_parquet import ParquetFileWriteOptions, ParquetFragmentScanOptions
-from ._parquet import FileDecryptionProperties
-from ._parquet_encryption import CryptoFactory, EncryptionConfiguration, KmsConnectionConfig
-from .lib import _Weakrefable
-
-class ParquetEncryptionConfig(_Weakrefable):
-    """
-    Core configuration class encapsulating parameters for high-level encryption
-    within the Parquet framework.
-
-    The ParquetEncryptionConfig class serves as a bridge for passing encryption-related
-    parameters to the appropriate components within the Parquet library. It maintains references
-    to objects that define the encryption strategy, Key Management Service (KMS) configuration,
-    and specific encryption configurations for Parquet data.
-
-    Parameters
-    ----------
-    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
-        Shared pointer to a `CryptoFactory` object. The `CryptoFactory` is responsible for
-        creating cryptographic components, such as encryptors and decryptors.
-    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
-        Shared pointer to a `KmsConnectionConfig` object. This object holds the configuration
-        parameters necessary for connecting to a Key Management Service (KMS).
-    encryption_config : pyarrow.parquet.encryption.EncryptionConfiguration
-        Shared pointer to an `EncryptionConfiguration` object. This object defines specific
-        encryption settings for Parquet data, including the keys assigned to different columns.
-
-    Raises
-    ------
-    ValueError
-        Raised if `encryption_config` is None.
-    """
-    def __init__(
-        self,
-        crypto_factory: CryptoFactory,
-        kms_connection_config: KmsConnectionConfig,
-        encryption_config: EncryptionConfiguration,
-    ) -> None: ...
-
-class ParquetDecryptionConfig(_Weakrefable):
-    """
-    Core configuration class encapsulating parameters for high-level decryption
-    within the Parquet framework.
-
-    ParquetDecryptionConfig is designed to pass decryption-related parameters to
-    the appropriate decryption components within the Parquet library. It holds references to
-    objects that define the decryption strategy, Key Management Service (KMS) configuration,
-    and specific decryption configurations for reading encrypted Parquet data.
-
-    Parameters
-    ----------
-    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
-        Shared pointer to a `CryptoFactory` object, pivotal in creating cryptographic
-        components for the decryption process.
-    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
-        Shared pointer to a `KmsConnectionConfig` object, containing parameters necessary
-        for connecting to a Key Management Service (KMS) during decryption.
-    decryption_config : pyarrow.parquet.encryption.DecryptionConfiguration
-        Shared pointer to a `DecryptionConfiguration` object, specifying decryption settings
-        for reading encrypted Parquet data.
-
-    Raises
-    ------
-    ValueError
-        Raised if `decryption_config` is None.
-    """
-    def __init__(
-        self,
-        crypto_factory: CryptoFactory,
-        kms_connection_config: KmsConnectionConfig,
-        encryption_config: EncryptionConfiguration,
-    ) -> None: ...
-
-def set_encryption_config(
-    opts: ParquetFileWriteOptions,
-    config: ParquetEncryptionConfig,
-) -> None: ...
-def set_decryption_properties(
-    opts: ParquetFragmentScanOptions,
-    config: FileDecryptionProperties,
-): ...
-def set_decryption_config(
-    opts: ParquetFragmentScanOptions,
-    config: ParquetDecryptionConfig,
-): ...
diff --git a/python/pyarrow/_feather.pyi b/python/pyarrow/_feather.pyi
deleted file mode 100644
index 373fe38cdce..00000000000
--- a/python/pyarrow/_feather.pyi
+++ /dev/null
@@ -1,46 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import IO
-
-from _typeshed import StrPath
-
-from .lib import Buffer, NativeFile, Table, _Weakrefable
-
-class FeatherError(Exception): ...
-
-def write_feather(
-    table: Table,
-    dest: StrPath | IO | NativeFile,
-    compression: str | None = None,
-    compression_level: int | None = None,
-    chunksize: int | None = None,
-    version: int = 2,
-): ...
-
-class FeatherReader(_Weakrefable):
-    def __init__(
-        self,
-        source: StrPath | IO | NativeFile | Buffer,
-        use_memory_map: bool,
-        use_threads: bool,
-    ) -> None: ...
-    @property
-    def version(self) -> str: ...
-    def read(self) -> Table: ...
-    def read_indices(self, indices: list[int]) -> Table: ...
-    def read_names(self, names: list[str]) -> Table: ...
diff --git a/python/pyarrow/_gcsfs.pyi b/python/pyarrow/_gcsfs.pyi
deleted file mode 100644
index 0ced106615a..00000000000
--- a/python/pyarrow/_gcsfs.pyi
+++ /dev/null
@@ -1,100 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import datetime as dt
-
-from ._fs import FileSystem
-from .lib import KeyValueMetadata
-
-class GcsFileSystem(FileSystem):
-    """
-    Google Cloud Storage (GCS) backed FileSystem implementation
-
-    By default uses the process described in https://google.aip.dev/auth/4110
-    to resolve credentials. If not running on Google Cloud Platform (GCP),
-    this generally requires the environment variable
-    GOOGLE_APPLICATION_CREDENTIALS to point to a JSON file
-    containing credentials.
-
-    Note: GCS buckets are special and the operations available on them may be
-    limited or more expensive than expected compared to local file systems.
-
-    Note: When pickling a GcsFileSystem that uses default credentials, resolution
-    credentials are not stored in the serialized data. Therefore, when unpickling
-    it is assumed that the necessary credentials are in place for the target
-    process.
-
-    Parameters
-    ----------
-    anonymous : boolean, default False
-        Whether to connect anonymously.
-        If true, will not attempt to look up credentials using standard GCP
-        configuration methods.
-    access_token : str, default None
-        GCP access token.  If provided, temporary credentials will be fetched by
-        assuming this role; also, a `credential_token_expiration` must be
-        specified as well.
-    target_service_account : str, default None
-        An optional service account to try to impersonate when accessing GCS. This
-        requires the specified credential user or service account to have the necessary
-        permissions.
-    credential_token_expiration : datetime, default None
-        Expiration for credential generated with an access token. Must be specified
-        if `access_token` is specified.
-    default_bucket_location : str, default 'US'
-        GCP region to create buckets in.
-    scheme : str, default 'https'
-        GCS connection transport scheme.
-    endpoint_override : str, default None
-        Override endpoint with a connect string such as "localhost:9000"
-    default_metadata : mapping or pyarrow.KeyValueMetadata, default None
-        Default metadata for `open_output_stream`.  This will be ignored if
-        non-empty metadata is passed to `open_output_stream`.
-    retry_time_limit : timedelta, default None
-        Set the maximum amount of time the GCS client will attempt to retry
-        transient errors. Subsecond granularity is ignored.
-    project_id : str, default None
-        The GCP project identifier to use for creating buckets.
-        If not set, the library uses the GOOGLE_CLOUD_PROJECT environment
-        variable. Most I/O operations do not need a project id, only applications
-        that create new buckets need a project id.
-    """
-
-    def __init__(
-        self,
-        *,
-        anonymous: bool = False,
-        access_token: str | None = None,
-        target_service_account: str | None = None,
-        credential_token_expiration: dt.datetime | None = None,
-        default_bucket_location: str = "US",
-        scheme: str = "https",
-        endpoint_override: str | None = None,
-        default_metadata: dict | KeyValueMetadata | None = None,
-        retry_time_limit: dt.timedelta | None = None,
-        project_id: str | None = None,
-    ): ...
-    @property
-    def default_bucket_location(self) -> str:
-        """
-        The GCP location this filesystem will write to.
-        """
-    @property
-    def project_id(self) -> str:
-        """
-        The GCP project id this filesystem will use.
-        """
diff --git a/python/pyarrow/_parquet_encryption.pyi b/python/pyarrow/_parquet_encryption.pyi
deleted file mode 100644
index e1228cbdb5a..00000000000
--- a/python/pyarrow/_parquet_encryption.pyi
+++ /dev/null
@@ -1,84 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import datetime as dt
-
-from typing import Callable
-
-from ._parquet import FileDecryptionProperties, FileEncryptionProperties
-from .lib import _Weakrefable
-
-class EncryptionConfiguration(_Weakrefable):
-    footer_key: str
-    column_keys: dict[str, list[str]]
-    encryption_algorithm: str
-    plaintext_footer: bool
-    double_wrapping: bool
-    cache_lifetime: dt.timedelta
-    internal_key_material: bool
-    data_key_length_bits: int
-
-    def __init__(
-        self,
-        footer_key: str,
-        *,
-        column_keys: dict[str, str | list[str]] | None = None,
-        encryption_algorithm: str | None = None,
-        plaintext_footer: bool | None = None,
-        double_wrapping: bool | None = None,
-        cache_lifetime: dt.timedelta | None = None,
-        internal_key_material: bool | None = None,
-        data_key_length_bits: int | None = None,
-    ) -> None: ...
-
-class DecryptionConfiguration(_Weakrefable):
-    cache_lifetime: dt.timedelta
-    def __init__(self, *, cache_lifetime: dt.timedelta | None = None): ...
-
-class KmsConnectionConfig(_Weakrefable):
-    kms_instance_id: str
-    kms_instance_url: str
-    key_access_token: str
-    custom_kms_conf: dict[str, str]
-    def __init__(
-        self,
-        *,
-        kms_instance_id: str | None = None,
-        kms_instance_url: str | None = None,
-        key_access_token: str | None = None,
-        custom_kms_conf: dict[str, str] | None = None,
-    ) -> None: ...
-    def refresh_key_access_token(self, value: str) -> None: ...
-
-class KmsClient(_Weakrefable):
-    def wrap_key(self, key_bytes: bytes, master_key_identifier: str) -> str: ...
-    def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> str: ...
-
-class CryptoFactory(_Weakrefable):
-    def __init__(self, kms_client_factory: Callable[[KmsConnectionConfig], KmsClient]): ...
-    def file_encryption_properties(
-        self,
-        kms_connection_config: KmsConnectionConfig,
-        encryption_config: EncryptionConfiguration,
-    ) -> FileEncryptionProperties: ...
-    def file_decryption_properties(
-        self,
-        kms_connection_config: KmsConnectionConfig,
-        decryption_config: DecryptionConfiguration | None = None,
-    ) -> FileDecryptionProperties: ...
-    def remove_cache_entries_for_token(self, access_token: str) -> None: ...
-    def remove_cache_entries_for_all_tokens(self) -> None: ...
diff --git a/python/pyarrow/csv.pyi b/python/pyarrow/csv.pyi
deleted file mode 100644
index a7abd413aab..00000000000
--- a/python/pyarrow/csv.pyi
+++ /dev/null
@@ -1,44 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow._csv import (
-    ISO8601,
-    ConvertOptions,
-    CSVStreamingReader,
-    CSVWriter,
-    InvalidRow,
-    ParseOptions,
-    ReadOptions,
-    WriteOptions,
-    open_csv,
-    read_csv,
-    write_csv,
-)
-
-__all__ = [
-    "ISO8601",
-    "ConvertOptions",
-    "CSVStreamingReader",
-    "CSVWriter",
-    "InvalidRow",
-    "ParseOptions",
-    "ReadOptions",
-    "WriteOptions",
-    "open_csv",
-    "read_csv",
-    "write_csv",
-]
diff --git a/python/pyarrow/cuda.pyi b/python/pyarrow/cuda.pyi
deleted file mode 100644
index 0394965bb73..00000000000
--- a/python/pyarrow/cuda.pyi
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow._cuda import (
-    BufferReader,
-    BufferWriter,
-    Context,
-    CudaBuffer,
-    HostBuffer,
-    IpcMemHandle,
-    new_host_buffer,
-    read_message,
-    read_record_batch,
-    serialize_record_batch,
-)
-
-__all__ = [
-    "BufferReader",
-    "BufferWriter",
-    "Context",
-    "CudaBuffer",
-    "HostBuffer",
-    "IpcMemHandle",
-    "new_host_buffer",
-    "read_message",
-    "read_record_batch",
-    "serialize_record_batch",
-]
diff --git a/python/pyarrow/flight.pyi b/python/pyarrow/flight.pyi
deleted file mode 100644
index dcc6ee2244b..00000000000
--- a/python/pyarrow/flight.pyi
+++ /dev/null
@@ -1,112 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow._flight import (
-    Action,
-    ActionType,
-    BasicAuth,
-    CallInfo,
-    CertKeyPair,
-    ClientAuthHandler,
-    ClientMiddleware,
-    ClientMiddlewareFactory,
-    DescriptorType,
-    FlightCallOptions,
-    FlightCancelledError,
-    FlightClient,
-    FlightDataStream,
-    FlightDescriptor,
-    FlightEndpoint,
-    FlightError,
-    FlightInfo,
-    FlightInternalError,
-    FlightMetadataReader,
-    FlightMetadataWriter,
-    FlightMethod,
-    FlightServerBase,
-    FlightServerError,
-    FlightStreamChunk,
-    FlightStreamReader,
-    FlightStreamWriter,
-    FlightTimedOutError,
-    FlightUnauthenticatedError,
-    FlightUnauthorizedError,
-    FlightUnavailableError,
-    FlightWriteSizeExceededError,
-    GeneratorStream,
-    Location,
-    MetadataRecordBatchReader,
-    MetadataRecordBatchWriter,
-    RecordBatchStream,
-    Result,
-    SchemaResult,
-    ServerAuthHandler,
-    ServerCallContext,
-    ServerMiddleware,
-    ServerMiddlewareFactory,
-    Ticket,
-    TracingServerMiddlewareFactory,
-    connect,
-)
-
-__all__ = [
-    "Action",
-    "ActionType",
-    "BasicAuth",
-    "CallInfo",
-    "CertKeyPair",
-    "ClientAuthHandler",
-    "ClientMiddleware",
-    "ClientMiddlewareFactory",
-    "DescriptorType",
-    "FlightCallOptions",
-    "FlightCancelledError",
-    "FlightClient",
-    "FlightDataStream",
-    "FlightDescriptor",
-    "FlightEndpoint",
-    "FlightError",
-    "FlightInfo",
-    "FlightInternalError",
-    "FlightMetadataReader",
-    "FlightMetadataWriter",
-    "FlightMethod",
-    "FlightServerBase",
-    "FlightServerError",
-    "FlightStreamChunk",
-    "FlightStreamReader",
-    "FlightStreamWriter",
-    "FlightTimedOutError",
-    "FlightUnauthenticatedError",
-    "FlightUnauthorizedError",
-    "FlightUnavailableError",
-    "FlightWriteSizeExceededError",
-    "GeneratorStream",
-    "Location",
-    "MetadataRecordBatchReader",
-    "MetadataRecordBatchWriter",
-    "RecordBatchStream",
-    "Result",
-    "SchemaResult",
-    "ServerAuthHandler",
-    "ServerCallContext",
-    "ServerMiddleware",
-    "ServerMiddlewareFactory",
-    "Ticket",
-    "TracingServerMiddlewareFactory",
-    "connect",
-]
diff --git a/python/pyarrow/fs.pyi b/python/pyarrow/fs.pyi
deleted file mode 100644
index 6c5a0af8d19..00000000000
--- a/python/pyarrow/fs.pyi
+++ /dev/null
@@ -1,94 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow._fs import (  # noqa
-    FileSelector,
-    FileType,
-    FileInfo,
-    FileSystem,
-    LocalFileSystem,
-    SubTreeFileSystem,
-    _MockFileSystem,
-    FileSystemHandler,
-    PyFileSystem,
-    SupportedFileSystem,
-)
-from pyarrow._azurefs import AzureFileSystem
-from pyarrow._hdfs import HadoopFileSystem
-from pyarrow._gcsfs import GcsFileSystem
-from pyarrow._s3fs import (  # noqa
-    AwsDefaultS3RetryStrategy,
-    AwsStandardS3RetryStrategy,
-    S3FileSystem,
-    S3LogLevel,
-    S3RetryStrategy,
-    ensure_s3_initialized,
-    finalize_s3,
-    ensure_s3_finalized,
-    initialize_s3,
-    resolve_s3_region,
-)
-
-FileStats = FileInfo
-
-def copy_files(
-    source: str,
-    destination: str,
-    source_filesystem: SupportedFileSystem | None = None,
-    destination_filesystem: SupportedFileSystem | None = None,
-    *,
-    chunk_size: int = 1024 * 1024,
-    use_threads: bool = True,
-) -> None: ...
-
-class FSSpecHandler(FileSystemHandler):  # type: ignore[misc]
-    fs: SupportedFileSystem
-    def __init__(self, fs: SupportedFileSystem) -> None: ...
-
-__all__ = [
-    # _fs
-    "FileSelector",
-    "FileType",
-    "FileInfo",
-    "FileSystem",
-    "LocalFileSystem",
-    "SubTreeFileSystem",
-    "_MockFileSystem",
-    "FileSystemHandler",
-    "PyFileSystem",
-    # _azurefs
-    "AzureFileSystem",
-    # _hdfs
-    "HadoopFileSystem",
-    # _gcsfs
-    "GcsFileSystem",
-    # _s3fs
-    "AwsDefaultS3RetryStrategy",
-    "AwsStandardS3RetryStrategy",
-    "S3FileSystem",
-    "S3LogLevel",
-    "S3RetryStrategy",
-    "ensure_s3_initialized",
-    "finalize_s3",
-    "ensure_s3_finalized",
-    "initialize_s3",
-    "resolve_s3_region",
-    # fs
-    "FileStats",
-    "copy_files",
-    "FSSpecHandler",
-]
diff --git a/python/pyarrow/interchange/__init__.pyi b/python/pyarrow/interchange/__init__.pyi
deleted file mode 100644
index 13a83393a91..00000000000
--- a/python/pyarrow/interchange/__init__.pyi
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/python/pyarrow/interchange/buffer.pyi b/python/pyarrow/interchange/buffer.pyi
deleted file mode 100644
index 78d1dabb8b7..00000000000
--- a/python/pyarrow/interchange/buffer.pyi
+++ /dev/null
@@ -1,75 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import enum
-
-from pyarrow.lib import Buffer
-
-class DlpackDeviceType(enum.IntEnum):
-    """Integer enum for device type codes matching DLPack."""
-
-    CPU = 1
-    CUDA = 2
-    CPU_PINNED = 3
-    OPENCL = 4
-    VULKAN = 7
-    METAL = 8
-    VPI = 9
-    ROCM = 10
-
-class _PyArrowBuffer:
-    """
-    Data in the buffer is guaranteed to be contiguous in memory.
-
-    Note that there is no dtype attribute present, a buffer can be thought of
-    as simply a block of memory. However, if the column that the buffer is
-    attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
-    implemented, then that dtype information will be contained in the return
-    value from ``__dlpack__``.
-
-    This distinction is useful to support both data exchange via DLPack on a
-    buffer and (b) dtypes like variable-length strings which do not have a
-    fixed number of bytes per element.
-    """
-    def __init__(self, x: Buffer, allow_copy: bool = True) -> None: ...
-    @property
-    def bufsize(self) -> int:
-        """
-        Buffer size in bytes.
-        """
-    @property
-    def ptr(self) -> int:
-        """
-        Pointer to start of the buffer as an integer.
-        """
-    def __dlpack__(self):
-        """
-        Produce DLPack capsule (see array API standard).
-
-        Raises:
-            - TypeError : if the buffer contains unsupported dtypes.
-            - NotImplementedError : if DLPack support is not implemented
-
-        Useful to have to connect to array libraries. Support optional because
-        it's not completely trivial to implement for a Python-only library.
-        """
-    def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]:
-        """
-        Device type and device ID for where the data in the buffer resides.
-        Uses device type codes matching DLPack.
-        Note: must be implemented even if ``__dlpack__`` is not.
-        """
diff --git a/python/pyarrow/interchange/column.pyi b/python/pyarrow/interchange/column.pyi
deleted file mode 100644
index ce7e169bfb5..00000000000
--- a/python/pyarrow/interchange/column.pyi
+++ /dev/null
@@ -1,269 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import enum
-
-from typing import Any, Iterable, TypeAlias, TypedDict
-
-from pyarrow.lib import Array, ChunkedArray
-
-from .buffer import _PyArrowBuffer
-
-class DtypeKind(enum.IntEnum):
-    """
-    Integer enum for data types.
-
-    Attributes
-    ----------
-    INT : int
-        Matches to signed integer data type.
-    UINT : int
-        Matches to unsigned integer data type.
-    FLOAT : int
-        Matches to floating point data type.
-    BOOL : int
-        Matches to boolean data type.
-    STRING : int
-        Matches to string data type (UTF-8 encoded).
-    DATETIME : int
-        Matches to datetime data type.
-    CATEGORICAL : int
-        Matches to categorical data type.
-    """
-
-    INT = 0
-    UINT = 1
-    FLOAT = 2
-    BOOL = 20
-    STRING = 21  # UTF-8
-    DATETIME = 22
-    CATEGORICAL = 23
-
-Dtype: TypeAlias = tuple[DtypeKind, int, str, str]
-
-class ColumnNullType(enum.IntEnum):
-    """
-    Integer enum for null type representation.
-
-    Attributes
-    ----------
-    NON_NULLABLE : int
-        Non-nullable column.
-    USE_NAN : int
-        Use explicit float NaN value.
-    USE_SENTINEL : int
-        Sentinel value besides NaN.
-    USE_BITMASK : int
-        The bit is set/unset representing a null on a certain position.
-    USE_BYTEMASK : int
-        The byte is set/unset representing a null on a certain position.
-    """
-
-    NON_NULLABLE = 0
-    USE_NAN = 1
-    USE_SENTINEL = 2
-    USE_BITMASK = 3
-    USE_BYTEMASK = 4
-
-class ColumnBuffers(TypedDict):
-    data: tuple[_PyArrowBuffer, Dtype]
-    validity: tuple[_PyArrowBuffer, Dtype] | None
-    offsets: tuple[_PyArrowBuffer, Dtype] | None
-
-class CategoricalDescription(TypedDict):
-    is_ordered: bool
-    is_dictionary: bool
-    categories: _PyArrowColumn | None
-
-class Endianness(enum.Enum):
-    LITTLE = "<"
-    BIG = ">"
-    NATIVE = "="
-    NA = "|"
-
-class NoBufferPresent(Exception):
-    """Exception to signal that there is no requested buffer."""
-
-class _PyArrowColumn:
-    """
-    A column object, with only the methods and properties required by the
-    interchange protocol defined.
-
-    A column can contain one or more chunks. Each chunk can contain up to three
-    buffers - a data buffer, a mask buffer (depending on null representation),
-    and an offsets buffer (if variable-size binary; e.g., variable-length
-    strings).
-
-    TBD: Arrow has a separate "null" dtype, and has no separate mask concept.
-         Instead, it seems to use "children" for both columns with a bit mask,
-         and for nested dtypes. Unclear whether this is elegant or confusing.
-         This design requires checking the null representation explicitly.
-
-         The Arrow design requires checking:
-         1. the ARROW_FLAG_NULLABLE (for sentinel values)
-         2. if a column has two children, combined with one of those children
-            having a null dtype.
-
-         Making the mask concept explicit seems useful. One null dtype would
-         not be enough to cover both bit and byte masks, so that would mean
-         even more checking if we did it the Arrow way.
-
-    TBD: there's also the "chunk" concept here, which is implicit in Arrow as
-         multiple buffers per array (= column here). Semantically it may make
-         sense to have both: chunks were meant for example for lazy evaluation
-         of data which doesn't fit in memory, while multiple buffers per column
-         could also come from doing a selection operation on a single
-         contiguous buffer.
-
-         Given these concepts, one would expect chunks to be all of the same
-         size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),
-         while multiple buffers could have data-dependent lengths. Not an issue
-         in pandas if one column is backed by a single NumPy array, but in
-         Arrow it seems possible.
-         Are multiple chunks *and* multiple buffers per column necessary for
-         the purposes of this interchange protocol, or must producers either
-         reuse the chunk concept for this or copy the data?
-
-    Note: this Column object can only be produced by ``__dataframe__``, so
-          doesn't need its own version or ``__column__`` protocol.
-    """
-    def __init__(self, column: Array | ChunkedArray, allow_copy: bool = True) -> None: ...
-    def size(self) -> int:
-        """
-        Size of the column, in elements.
-
-        Corresponds to DataFrame.num_rows() if column is a single chunk;
-        equal to size of this current chunk otherwise.
-
-        Is a method rather than a property because it may cause a (potentially
-        expensive) computation for some dataframe implementations.
-        """
-    @property
-    def offset(self) -> int:
-        """
-        Offset of first element.
-
-        May be > 0 if using chunks; for example for a column with N chunks of
-        equal size M (only the last chunk may be shorter),
-        ``offset = n * M``, ``n = 0 .. N-1``.
-        """
-    @property
-    def dtype(self) -> tuple[DtypeKind, int, str, str]:
-        """
-        Dtype description as a tuple ``(kind, bit-width, format string,
-        endianness)``.
-
-        Bit-width : the number of bits as an integer
-        Format string : data type description format string in Apache Arrow C
-                        Data Interface format.
-        Endianness : current only native endianness (``=``) is supported
-
-        Notes:
-            - Kind specifiers are aligned with DLPack where possible (hence the
-              jump to 20, leave enough room for future extension)
-            - Masks must be specified as boolean with either bit width 1 (for
-              bit masks) or 8 (for byte masks).
-            - Dtype width in bits was preferred over bytes
-            - Endianness isn't too useful, but included now in case in the
-              future we need to support non-native endianness
-            - Went with Apache Arrow format strings over NumPy format strings
-              because they're more complete from a dataframe perspective
-            - Format strings are mostly useful for datetime specification, and
-              for categoricals.
-            - For categoricals, the format string describes the type of the
-              categorical in the data buffer. In case of a separate encoding of
-              the categorical (e.g. an integer to string mapping), this can
-              be derived from ``self.describe_categorical``.
-            - Data types not included: complex, Arrow-style null, binary,
-              decimal, and nested (list, struct, map, union) dtypes.
-        """
-    @property
-    def describe_categorical(self) -> CategoricalDescription:
-        """
-        If the dtype is categorical, there are two options:
-        - There are only values in the data buffer.
-        - There is a separate non-categorical Column encoding categorical
-          values.
-
-        Raises TypeError if the dtype is not categorical
-
-        Returns the dictionary with description on how to interpret the
-        data buffer:
-            - "is_ordered" : bool, whether the ordering of dictionary indices
-                             is semantically meaningful.
-            - "is_dictionary" : bool, whether a mapping of
-                                categorical values to other objects exists
-            - "categories" : Column representing the (implicit) mapping of
-                             indices to category values (e.g. an array of
-                             cat1, cat2, ...). None if not a dictionary-style
-                             categorical.
-
-        TBD: are there any other in-memory representations that are needed?
-        """
-    @property
-    def describe_null(self) -> tuple[ColumnNullType, Any]:
-        """
-        Return the missing value (or "null") representation the column dtype
-        uses, as a tuple ``(kind, value)``.
-
-        Value : if kind is "sentinel value", the actual value. If kind is a bit
-        mask or a byte mask, the value (0 or 1) indicating a missing value.
-        None otherwise.
-        """
-    @property
-    def null_count(self) -> int:
-        """
-        Number of null elements, if known.
-
-        Note: Arrow uses -1 to indicate "unknown", but None seems cleaner.
-        """
-    @property
-    def metadata(self) -> dict[str, Any]:
-        """
-        The metadata for the column. See `DataFrame.metadata` for more details.
-        """
-    def num_chunks(self) -> int:
-        """
-        Return the number of chunks the column consists of.
-        """
-    def get_chunks(self, n_chunks: int | None = None) -> Iterable[_PyArrowColumn]:
-        """
-        Return an iterator yielding the chunks.
-
-        See `DataFrame.get_chunks` for details on ``n_chunks``.
-        """
-    def get_buffers(self) -> ColumnBuffers:
-        """
-        Return a dictionary containing the underlying buffers.
-
-        The returned dictionary has the following contents:
-
-            - "data": a two-element tuple whose first element is a buffer
-                      containing the data and whose second element is the data
-                      buffer's associated dtype.
-            - "validity": a two-element tuple whose first element is a buffer
-                          containing mask values indicating missing data and
-                          whose second element is the mask value buffer's
-                          associated dtype. None if the null representation is
-                          not a bit or byte mask.
-            - "offsets": a two-element tuple whose first element is a buffer
-                         containing the offset values for variable-size binary
-                         data (e.g., variable-length strings) and whose second
-                         element is the offsets buffer's associated dtype. None
-                         if the data buffer does not have an associated offsets
-                         buffer.
-        """
diff --git a/python/pyarrow/interchange/dataframe.pyi b/python/pyarrow/interchange/dataframe.pyi
deleted file mode 100644
index a7ea6aeac74..00000000000
--- a/python/pyarrow/interchange/dataframe.pyi
+++ /dev/null
@@ -1,119 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-from typing import Any, Iterable, Sequence
-
-from pyarrow.interchange.column import _PyArrowColumn
-from pyarrow.lib import RecordBatch, Table
-
-class _PyArrowDataFrame:
-    """
-    A data frame class, with only the methods required by the interchange
-    protocol defined.
-
-    A "data frame" represents an ordered collection of named columns.
-    A column's "name" must be a unique string.
-    Columns may be accessed by name or by position.
-
-    This could be a public data frame class, or an object with the methods and
-    attributes defined on this DataFrame class could be returned from the
-    ``__dataframe__`` method of a public data frame class in a library adhering
-    to the dataframe interchange protocol specification.
-    """
-
-    def __init__(
-        self, df: Table | RecordBatch, nan_as_null: bool = False, allow_copy: bool = True
-    ) -> None: ...
-    def __dataframe__(
-        self, nan_as_null: bool = False, allow_copy: bool = True
-    ) -> _PyArrowDataFrame:
-        """
-        Construct a new exchange object, potentially changing the parameters.
-        ``nan_as_null`` is a keyword intended for the consumer to tell the
-        producer to overwrite null values in the data with ``NaN``.
-        It is intended for cases where the consumer does not support the bit
-        mask or byte mask that is the producer's native representation.
-        ``allow_copy`` is a keyword that defines whether or not the library is
-        allowed to make a copy of the data. For example, copying data would be
-        necessary if a library supports strided buffers, given that this
-        protocol specifies contiguous buffers.
-        """
-    @property
-    def metadata(self) -> dict[str, Any]:
-        """
-        The metadata for the data frame, as a dictionary with string keys. The
-        contents of `metadata` may be anything, they are meant for a library
-        to store information that it needs to, e.g., roundtrip losslessly or
-        for two implementations to share data that is not (yet) part of the
-        interchange protocol specification. For avoiding collisions with other
-        entries, please add name the keys with the name of the library
-        followed by a period and the desired name, e.g, ``pandas.indexcol``.
-        """
-    def num_columns(self) -> int:
-        """
-        Return the number of columns in the DataFrame.
-        """
-    def num_rows(self) -> int:
-        """
-        Return the number of rows in the DataFrame, if available.
-        """
-    def num_chunks(self) -> int:
-        """
-        Return the number of chunks the DataFrame consists of.
-        """
-    def column_names(self) -> Iterable[str]:
-        """
-        Return an iterator yielding the column names.
-        """
-    def get_column(self, i: int) -> _PyArrowColumn:
-        """
-        Return the column at the indicated position.
-        """
-    def get_column_by_name(self, name: str) -> _PyArrowColumn:
-        """
-        Return the column whose name is the indicated name.
-        """
-    def get_columns(self) -> Iterable[_PyArrowColumn]:
-        """
-        Return an iterator yielding the columns.
-        """
-    def select_columns(self, indices: Sequence[int]) -> Self:
-        """
-        Create a new DataFrame by selecting a subset of columns by index.
-        """
-    def select_columns_by_name(self, names: Sequence[str]) -> Self:
-        """
-        Create a new DataFrame by selecting a subset of columns by name.
-        """
-    def get_chunks(self, n_chunks: int | None = None) -> Iterable[Self]:
-        """
-        Return an iterator yielding the chunks.
-
-        By default (None), yields the chunks that the data is stored as by the
-        producer. If given, ``n_chunks`` must be a multiple of
-        ``self.num_chunks()``, meaning the producer must subdivide each chunk
-        before yielding it.
-
-        Note that the producer must ensure that all columns are chunked the
-        same way.
-        """
diff --git a/python/pyarrow/interchange/from_dataframe.pyi b/python/pyarrow/interchange/from_dataframe.pyi
deleted file mode 100644
index aa6217b6181..00000000000
--- a/python/pyarrow/interchange/from_dataframe.pyi
+++ /dev/null
@@ -1,261 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Protocol, TypeAlias
-
-from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
-
-from .column import (
-    ColumnBuffers,
-    ColumnNullType,
-    Dtype,
-    DtypeKind,
-)
-
-class DataFrameObject(Protocol):
-    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True) -> Any: ...
-
-ColumnObject: TypeAlias = Any
-
-def from_dataframe(df: DataFrameObject, allow_copy=True) -> Table:
-    """
-    Build a ``pa.Table`` from any DataFrame supporting the interchange protocol.
-
-    Parameters
-    ----------
-    df : DataFrameObject
-        Object supporting the interchange protocol, i.e. `__dataframe__`
-        method.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Table
-
-    Examples
-    --------
-    >>> import pyarrow
-    >>> from pyarrow.interchange import from_dataframe
-
-    Convert a pandas dataframe to a pyarrow table:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "n_attendees": [100, 10, 1],
-    ...         "country": ["Italy", "Spain", "Slovenia"],
-    ...     }
-    ... )
-    >>> df
-       n_attendees   country
-    0          100     Italy
-    1           10     Spain
-    2            1  Slovenia
-    >>> from_dataframe(df)
-    pyarrow.Table
-    n_attendees: int64
-    country: large_string
-    ----
-    n_attendees: [[100,10,1]]
-    country: [["Italy","Spain","Slovenia"]]
-    """
-
-def protocol_df_chunk_to_pyarrow(df: DataFrameObject, allow_copy: bool = True) -> RecordBatch:
-    """
-    Convert interchange protocol chunk to ``pa.RecordBatch``.
-
-    Parameters
-    ----------
-    df : DataFrameObject
-        Object supporting the interchange protocol, i.e. `__dataframe__`
-        method.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.RecordBatch
-    """
-
-def column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
-    """
-    Convert a column holding one of the primitive dtypes to a PyArrow array.
-    A primitive type is one of: int, uint, float, bool (1 bit).
-
-    Parameters
-    ----------
-    col : ColumnObject
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Array
-    """
-
-def bool_column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
-    """
-    Convert a column holding boolean dtype to a PyArrow array.
-
-    Parameters
-    ----------
-    col : ColumnObject
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Array
-    """
-
-def categorical_column_to_dictionary(
-    col: ColumnObject, allow_copy: bool = True
-) -> DictionaryArray:
-    """
-    Convert a column holding categorical data to a pa.DictionaryArray.
-
-    Parameters
-    ----------
-    col : ColumnObject
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.DictionaryArray
-    """
-
-def parse_datetime_format_str(format_str: str) -> tuple[str, str]:
-    """Parse datetime `format_str` to interpret the `data`."""
-
-def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> DataType:
-    """Map column date type to pyarrow date type."""
-
-def buffers_to_array(
-    buffers: ColumnBuffers,
-    data_type: tuple[DtypeKind, int, str, str],
-    length: int,
-    describe_null: ColumnNullType,
-    offset: int = 0,
-    allow_copy: bool = True,
-) -> Array:
-    """
-    Build a PyArrow array from the passed buffer.
-
-    Parameters
-    ----------
-    buffer : ColumnBuffers
-        Dictionary containing tuples of underlying buffers and
-        their associated dtype.
-    data_type : Tuple[DtypeKind, int, str, str],
-        Dtype description of the column as a tuple ``(kind, bit-width, format string,
-        endianness)``.
-    length : int
-        The number of values in the array.
-    describe_null: ColumnNullType
-        Null representation the column dtype uses,
-        as a tuple ``(kind, value)``
-    offset : int, default: 0
-        Number of elements to offset from the start of the buffer.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Array
-
-    Notes
-    -----
-    The returned array doesn't own the memory. The caller of this function
-    is responsible for keeping the memory owner object alive as long as
-    the returned PyArrow array is being used.
-    """
-
-def validity_buffer_from_mask(
-    validity_buff: Buffer,
-    validity_dtype: Dtype,
-    describe_null: ColumnNullType,
-    length: int,
-    offset: int = 0,
-    allow_copy: bool = True,
-) -> Buffer:
-    """
-    Build a PyArrow buffer from the passed mask buffer.
-
-    Parameters
-    ----------
-    validity_buff : BufferObject
-        Tuple of underlying validity buffer and associated dtype.
-    validity_dtype : Dtype
-        Dtype description as a tuple ``(kind, bit-width, format string,
-        endianness)``.
-    describe_null : ColumnNullType
-        Null representation the column dtype uses,
-        as a tuple ``(kind, value)``
-    length : int
-        The number of values in the array.
-    offset : int, default: 0
-        Number of elements to offset from the start of the buffer.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Buffer
-    """
-
-def validity_buffer_nan_sentinel(
-    data_pa_buffer: Buffer,
-    data_type: Dtype,
-    describe_null: ColumnNullType,
-    length: int,
-    offset: int = 0,
-    allow_copy: bool = True,
-) -> Buffer:
-    """
-    Build a PyArrow buffer from NaN or sentinel values.
-
-    Parameters
-    ----------
-    data_pa_buffer : pa.Buffer
-        PyArrow buffer for the column data.
-    data_type : Dtype
-        Dtype description as a tuple ``(kind, bit-width, format string,
-        endianness)``.
-    describe_null : ColumnNullType
-        Null representation the column dtype uses,
-        as a tuple ``(kind, value)``
-    length : int
-        The number of values in the array.
-    offset : int, default: 0
-        Number of elements to offset from the start of the buffer.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Buffer
-    """
diff --git a/python/pyarrow/parquet/__init__.pyi b/python/pyarrow/parquet/__init__.pyi
deleted file mode 100644
index 8d0b5374ea0..00000000000
--- a/python/pyarrow/parquet/__init__.pyi
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from .core import *  # noqa
diff --git a/python/pyarrow/parquet/core.pyi b/python/pyarrow/parquet/core.pyi
deleted file mode 100644
index f5ac0510ffc..00000000000
--- a/python/pyarrow/parquet/core.pyi
+++ /dev/null
@@ -1,2078 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-
-from pathlib import Path
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-from typing import IO, Callable, Iterator, Literal, Sequence
-
-if sys.version_info >= (3, 10):
-    from typing import TypeAlias
-else:
-    from typing_extensions import TypeAlias
-
-from pyarrow import _parquet
-from pyarrow._compute import Expression
-from pyarrow._fs import FileSystem, SupportedFileSystem
-from pyarrow._parquet import (
-    ColumnChunkMetaData,
-    ColumnSchema,
-    FileDecryptionProperties,
-    FileEncryptionProperties,
-    FileMetaData,
-    ParquetLogicalType,
-    ParquetReader,
-    ParquetSchema,
-    RowGroupMetaData,
-    SortingColumn,
-    Statistics,
-)
-from pyarrow._stubs_typing import FilterTuple, SingleOrList
-from pyarrow.dataset import ParquetFileFragment, Partitioning
-from pyarrow.lib import NativeFile, RecordBatch, Schema, Table
-from typing_extensions import deprecated
-
-__all__ = (
-    "ColumnChunkMetaData",
-    "ColumnSchema",
-    "FileDecryptionProperties",
-    "FileEncryptionProperties",
-    "FileMetaData",
-    "ParquetDataset",
-    "ParquetFile",
-    "ParquetLogicalType",
-    "ParquetReader",
-    "ParquetSchema",
-    "ParquetWriter",
-    "RowGroupMetaData",
-    "SortingColumn",
-    "Statistics",
-    "read_metadata",
-    "read_pandas",
-    "read_schema",
-    "read_table",
-    "write_metadata",
-    "write_table",
-    "write_to_dataset",
-    "_filters_to_expression",
-    "filters_to_expression",
-)
-
-def filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression:
-    """
-    Check if filters are well-formed and convert to an ``Expression``.
-
-    Parameters
-    ----------
-    filters : List[Tuple] or List[List[Tuple]]
-
-    Notes
-    -----
-    See internal ``pyarrow._DNF_filter_doc`` attribute for more details.
-
-    Examples
-    --------
-
-    >>> filters_to_expression([("foo", "==", "bar")])
-    <pyarrow.compute.Expression (foo == "bar")>
-
-    Returns
-    -------
-    pyarrow.compute.Expression
-        An Expression representing the filters
-    """
-
-@deprecated("use filters_to_expression")
-def _filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
-
-_Compression: TypeAlias = Literal["gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"]
-
-class ParquetFile:
-    """
-    Reader interface for a single Parquet file.
-
-    Parameters
-    ----------
-    source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
-        Readable source. For passing bytes or buffer-like file containing a
-        Parquet file, use pyarrow.BufferReader.
-    metadata : FileMetaData, default None
-        Use existing metadata object, rather than reading from file.
-    common_metadata : FileMetaData, default None
-        Will be used in reads for pandas schema metadata if not found in the
-        main file's metadata, no other uses at the moment.
-    read_dictionary : list
-        List of column names to read directly as DictionaryArray.
-    memory_map : bool, default False
-        If the source is a file path, use a memory map to read file, which can
-        improve performance in some environments.
-    buffer_size : int, default 0
-        If positive, perform read buffering when deserializing individual
-        column chunks. Otherwise IO calls are unbuffered.
-    pre_buffer : bool, default False
-        Coalesce and issue file reads in parallel to improve performance on
-        high-latency filesystems (e.g. S3). If True, Arrow will use a
-        background I/O thread pool.
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular
-        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be inferred as timestamps
-        in nanoseconds.
-    decryption_properties : FileDecryptionProperties, default None
-        File decryption properties for Parquet Modular Encryption.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    page_checksum_verification : bool, default False
-        If True, verify the checksum for each page read from the file.
-
-    Examples
-    --------
-
-    Generate an example PyArrow Table and write it to Parquet file:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_table(table, "example.parquet")
-
-    Create a ``ParquetFile`` object from the Parquet file:
-
-    >>> parquet_file = pq.ParquetFile("example.parquet")
-
-    Read the data:
-
-    >>> parquet_file.read()
-    pyarrow.Table
-    n_legs: int64
-    animal: string
-    ----
-    n_legs: [[2,2,4,4,5,100]]
-    animal: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
-
-    Create a ParquetFile object with "animal" column as DictionaryArray:
-
-    >>> parquet_file = pq.ParquetFile("example.parquet", read_dictionary=["animal"])
-    >>> parquet_file.read()
-    pyarrow.Table
-    n_legs: int64
-    animal: dictionary<values=string, indices=int32, ordered=0>
-    ----
-    n_legs: [[2,2,4,4,5,100]]
-    animal: [  -- dictionary:
-    ["Flamingo","Parrot",...,"Brittle stars","Centipede"]  -- indices:
-    [0,1,2,3,4,5]]
-    """
-
-    reader: ParquetReader
-    common_metadata: FileMetaData
-
-    def __init__(
-        self,
-        source: str | Path | NativeFile | IO,
-        *,
-        metadata: FileMetaData | None = None,
-        common_metadata: FileMetaData | None = None,
-        read_dictionary: list[str] | None = None,
-        memory_map: bool = False,
-        buffer_size: int = 0,
-        pre_buffer: bool = False,
-        coerce_int96_timestamp_unit: str | None = None,
-        decryption_properties: FileDecryptionProperties | None = None,
-        thrift_string_size_limit: int | None = None,
-        thrift_container_size_limit: int | None = None,
-        filesystem: SupportedFileSystem | None = None,
-        page_checksum_verification: bool = False,
-    ): ...
-    def __enter__(self) -> Self: ...
-    def __exit__(self, *args, **kwargs) -> None: ...
-    @property
-    def metadata(self) -> FileMetaData:
-        """
-        Return the Parquet metadata.
-        """
-    @property
-    def schema(self) -> ParquetSchema:
-        """
-        Return the Parquet schema, unconverted to Arrow types
-        """
-    @property
-    def schema_arrow(self) -> Schema:
-        """
-        Return the inferred Arrow schema, converted from the whole Parquet
-        file's schema
-
-        Examples
-        --------
-        Generate an example Parquet file:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        Read the Arrow schema:
-
-        >>> parquet_file.schema_arrow
-        n_legs: int64
-        animal: string
-        """
-    @property
-    def num_row_groups(self) -> int:
-        """
-        Return the number of row groups of the Parquet file.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        >>> parquet_file.num_row_groups
-        1
-        """
-    def close(self, force: bool = False) -> None: ...
-    @property
-    def closed(self) -> bool: ...
-    def read_row_group(
-        self,
-        i: int,
-        columns: list | None = None,
-        use_threads: bool = True,
-        use_pandas_metadata: bool = False,
-    ) -> Table:
-        """
-        Read a single row group from a Parquet file.
-
-        Parameters
-        ----------
-        i : int
-            Index of the individual row group that we want to read.
-        columns : list
-            If not None, only these columns will be read from the row group. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'.
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : bool, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Returns
-        -------
-        pyarrow.table.Table
-            Content of the row group as a table (of columns)
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        >>> parquet_file.read_row_group(0)
-        pyarrow.Table
-        n_legs: int64
-        animal: string
-        ----
-        n_legs: [[2,2,4,4,5,100]]
-        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
-        """
-    def read_row_groups(
-        self,
-        row_groups: list,
-        columns: list | None = None,
-        use_threads: bool = True,
-        use_pandas_metadata: bool = False,
-    ) -> Table:
-        """
-        Read a multiple row groups from a Parquet file.
-
-        Parameters
-        ----------
-        row_groups : list
-            Only these row groups will be read from the file.
-        columns : list
-            If not None, only these columns will be read from the row group. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'.
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : bool, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Returns
-        -------
-        pyarrow.table.Table
-            Content of the row groups as a table (of columns).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        >>> parquet_file.read_row_groups([0, 0])
-        pyarrow.Table
-        n_legs: int64
-        animal: string
-        ----
-        n_legs: [[2,2,4,4,5,...,2,4,4,5,100]]
-        animal: [["Flamingo","Parrot","Dog",...,"Brittle stars","Centipede"]]
-        """
-    def iter_batches(
-        self,
-        batch_size: int = 65536,
-        row_groups: list | None = None,
-        columns: list | None = None,
-        use_threads: bool = True,
-        use_pandas_metadata: bool = False,
-    ) -> Iterator[RecordBatch]:
-        """
-        Read streaming batches from a Parquet file.
-
-        Parameters
-        ----------
-        batch_size : int, default 64K
-            Maximum number of records to yield per batch. Batches may be
-            smaller if there aren't enough rows in the file.
-        row_groups : list
-            Only these row groups will be read from the file.
-        columns : list
-            If not None, only these columns will be read from the file. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'.
-        use_threads : boolean, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : boolean, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Yields
-        ------
-        pyarrow.RecordBatch
-            Contents of each batch as a record batch
-
-        Examples
-        --------
-        Generate an example Parquet file:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-        >>> for i in parquet_file.iter_batches():
-        ...     print("RecordBatch")
-        ...     print(i.to_pandas())
-        RecordBatch
-           n_legs         animal
-        0       2       Flamingo
-        1       2         Parrot
-        2       4            Dog
-        3       4          Horse
-        4       5  Brittle stars
-        5     100      Centipede
-        """
-    def read(
-        self,
-        columns: list | None = None,
-        use_threads: bool = True,
-        use_pandas_metadata: bool = False,
-    ) -> Table:
-        """
-        Read a Table from Parquet format.
-
-        Parameters
-        ----------
-        columns : list
-            If not None, only these columns will be read from the file. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'.
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : bool, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Returns
-        -------
-        pyarrow.table.Table
-            Content of the file as a table (of columns).
-
-        Examples
-        --------
-        Generate an example Parquet file:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        Read a Table:
-
-        >>> parquet_file.read(columns=["animal"])
-        pyarrow.Table
-        animal: string
-        ----
-        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
-        """
-    def scan_contents(self, columns: list | None = None, batch_size: int = 65536) -> int:
-        """
-        Read contents of file for the given columns and batch size.
-
-        Notes
-        -----
-        This function's primary purpose is benchmarking.
-        The scan is executed on a single thread.
-
-        Parameters
-        ----------
-        columns : list of integers, default None
-            Select columns to read, if None scan all columns.
-        batch_size : int, default 64K
-            Number of rows to read at a time internally.
-
-        Returns
-        -------
-        num_rows : int
-            Number of rows in file
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        >>> parquet_file.scan_contents()
-        6
-        """
-
-class ParquetWriter:
-    """
-    Class for incrementally building a Parquet file for Arrow tables.
-
-    Parameters
-    ----------
-    where : path or file-like object
-    schema : pyarrow.Schema
-    version : {"1.0", "2.4", "2.6"}, default "2.6"
-        Determine which Parquet logical types are available for use, whether the
-        reduced set from the Parquet 1.x.x format or the expanded logical types
-        added in later format versions.
-        Files written with version='2.4' or '2.6' may not be readable in all
-        Parquet implementations, so version='1.0' is likely the choice that
-        maximizes file compatibility.
-        UINT32 and some logical types are only available with version '2.4'.
-        Nanosecond timestamps are only available with version '2.6'.
-        Other features such as compression algorithms or the new serialized
-        data page format must be enabled separately (see 'compression' and
-        'data_page_version').
-    use_dictionary : bool or list, default True
-        Specify if we should use dictionary encoding in general or only for
-        some columns.
-        When encoding the column, if the dictionary size is too large, the
-        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
-        doesn't support dictionary encoding.
-    compression : str or dict, default 'snappy'
-        Specify the compression codec, either on a general basis or per-column.
-        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
-    write_statistics : bool or list, default True
-        Specify if we should write statistics in general (default is True) or only
-        for some columns.
-    use_deprecated_int96_timestamps : bool, default None
-        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
-        by flavor argument. This take priority over the coerce_timestamps option.
-    coerce_timestamps : str, default None
-        Cast timestamps to a particular resolution. If omitted, defaults are chosen
-        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
-        nanoseconds are cast to microseconds ('us'), while for
-        ``version='2.6'`` (the default), they are written natively without loss
-        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
-        as Parquet does not have any temporal type with seconds resolution.
-        If the casting results in loss of data, it will raise an exception
-        unless ``allow_truncated_timestamps=True`` is given.
-        Valid values: {None, 'ms', 'us'}
-    allow_truncated_timestamps : bool, default False
-        Allow loss of data when coercing timestamps to a particular
-        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
-        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
-        will NOT result in the truncation exception being ignored unless
-        ``coerce_timestamps`` is not None.
-    data_page_size : int, default None
-        Set a target threshold for the approximate encoded size of data
-        pages within a column chunk (in bytes). If None, use the default data page
-        size of 1MByte.
-    flavor : {'spark'}, default None
-        Sanitize schema or set other compatibility options to work with
-        various target systems.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred from `where` if path-like, else
-        `where` is already a file-like object so no filesystem is needed.
-    compression_level : int or dict, default None
-        Specify the compression level for a codec, either on a general basis or
-        per-column. If None is passed, arrow selects the compression level for
-        the compression codec in use. The compression level has a different
-        meaning for each codec, so you have to read the documentation of the
-        codec you are using.
-        An exception is thrown if the compression codec does not allow specifying
-        a compression level.
-    use_byte_stream_split : bool or list, default False
-        Specify if the byte_stream_split encoding should be used in general or
-        only for some columns. If both dictionary and byte_stream_stream are
-        enabled, then dictionary is preferred.
-        The byte_stream_split encoding is valid for integer, floating-point
-        and fixed-size binary data types (including decimals); it should be
-        combined with a compression codec so as to achieve size reduction.
-    column_encoding : string or dict, default None
-        Specify the encoding scheme on a per column basis.
-        Can only be used when ``use_dictionary`` is set to False, and
-        cannot be used in combination with ``use_byte_stream_split``.
-        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
-        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
-        Certain encodings are only compatible with certain data types.
-        Please refer to the encodings section of `Reading and writing Parquet
-        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
-    data_page_version : {"1.0", "2.0"}, default "1.0"
-        The serialized Parquet data page format version to write, defaults to
-        1.0. This does not impact the file schema logical types and Arrow to
-        Parquet type casting behavior; for that use the "version" option.
-    use_compliant_nested_type : bool, default True
-        Whether to write compliant Parquet nested type (lists) as defined
-        `here <https://github.com/apache/parquet-format/blob/master/
-        LogicalTypes.md#nested-types>`_, defaults to ``True``.
-        For ``use_compliant_nested_type=True``, this will write into a list
-        with 3-level structure where the middle level, named ``list``,
-        is a repeated group with a single field named ``element``::
-
-            <list-repetition> group <name> (LIST) {
-                repeated group list {
-                        <element-repetition> <element-type> element;
-                }
-            }
-
-        For ``use_compliant_nested_type=False``, this will also write into a list
-        with 3-level structure, where the name of the single field of the middle
-        level ``list`` is taken from the element name for nested columns in Arrow,
-        which defaults to ``item``::
-
-            <list-repetition> group <name> (LIST) {
-                repeated group list {
-                    <element-repetition> <element-type> item;
-                }
-            }
-    encryption_properties : FileEncryptionProperties, default None
-        File encryption properties for Parquet Modular Encryption.
-        If None, no encryption will be done.
-        The encryption properties can be created using:
-        ``CryptoFactory.file_encryption_properties()``.
-    write_batch_size : int, default None
-        Number of values to write to a page at a time. If None, use the default of
-        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
-        are exceeding the ``data_page_size`` due to large column values, lowering
-        the batch size can help keep page sizes closer to the intended size.
-    dictionary_pagesize_limit : int, default None
-        Specify the dictionary page size limit per row group. If None, use the
-        default 1MB.
-    store_schema : bool, default True
-        By default, the Arrow schema is serialized and stored in the Parquet
-        file metadata (in the "ARROW:schema" key). When reading the file,
-        if this key is available, it will be used to more faithfully recreate
-        the original Arrow data. For example, for tz-aware timestamp columns
-        it will restore the timezone (Parquet only stores the UTC values without
-        timezone), or columns with duration type will be restored from the int64
-        Parquet column.
-    write_page_index : bool, default False
-        Whether to write a page index in general for all columns.
-        Writing statistics to the page index disables the old method of writing
-        statistics to each data page header. The page index makes statistics-based
-        filtering more efficient than the page header, as it gathers all the
-        statistics for a Parquet file in a single place, avoiding scattered I/O.
-        Note that the page index is not yet used on the read size by PyArrow.
-    write_page_checksum : bool, default False
-        Whether to write page checksums in general for all columns.
-        Page checksums enable detection of data corruption, which might occur during
-        transmission or in the storage.
-    sorting_columns : Sequence of SortingColumn, default None
-        Specify the sort order of the data being written. The writer does not sort
-        the data nor does it verify that the data is sorted. The sort order is
-        written to the row group metadata, which can then be used by readers.
-    store_decimal_as_integer : bool, default False
-        Allow decimals with 1 <= precision <= 18 to be stored as integers.
-        In Parquet, DECIMAL can be stored in any of the following physical types:
-        - int32: for 1 <= precision <= 9.
-        - int64: for 10 <= precision <= 18.
-        - fixed_len_byte_array: precision is limited by the array size.
-            Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
-        - binary: precision is unlimited. The minimum number of bytes to store the
-            unscaled value is used.
-
-        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
-        When enabled, the writer will use the following physical types to store decimals:
-        - int32: for 1 <= precision <= 9.
-        - int64: for 10 <= precision <= 18.
-        - fixed_len_byte_array: for precision > 18.
-
-        As a consequence, decimal columns stored in integer types are more compact.
-    writer_engine_version : unused
-    **options : dict
-        If options contains a key `metadata_collector` then the
-        corresponding value is assumed to be a list (or any object with
-        `.append` method) that will be filled with the file metadata instance
-        of the written file.
-
-    Examples
-    --------
-    Generate an example PyArrow Table and RecordBatch:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> batch = pa.record_batch(
-    ...     [
-    ...         [2, 2, 4, 4, 5, 100],
-    ...         ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     ],
-    ...     names=["n_legs", "animal"],
-    ... )
-
-    create a ParquetWriter object:
-
-    >>> import pyarrow.parquet as pq
-    >>> writer = pq.ParquetWriter("example.parquet", table.schema)
-
-    and write the Table into the Parquet file:
-
-    >>> writer.write_table(table)
-    >>> writer.close()
-
-    >>> pq.read_table("example.parquet").to_pandas()
-       n_legs         animal
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-
-    create a ParquetWriter object for the RecordBatch:
-
-    >>> writer2 = pq.ParquetWriter("example2.parquet", batch.schema)
-
-    and write the RecordBatch into the Parquet file:
-
-    >>> writer2.write_batch(batch)
-    >>> writer2.close()
-
-    >>> pq.read_table("example2.parquet").to_pandas()
-       n_legs         animal
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-    """
-
-    flavor: str
-    schema_changed: bool
-    schema: ParquetSchema
-    where: str | Path | IO
-    file_handler: NativeFile | None
-    writer: _parquet.ParquetWriter
-    is_open: bool
-
-    def __init__(
-        self,
-        where: str | Path | IO | NativeFile,
-        schema: Schema,
-        filesystem: SupportedFileSystem | None = None,
-        flavor: str | None = None,
-        version: Literal["1.0", "2.4", "2.6"] = ...,
-        use_dictionary: bool = True,
-        compression: _Compression | dict[str, _Compression] = "snappy",
-        write_statistics: bool | list = True,
-        use_deprecated_int96_timestamps: bool | None = None,
-        compression_level: int | dict | None = None,
-        use_byte_stream_split: bool | list = False,
-        column_encoding: str | dict | None = None,
-        writer_engine_version=None,
-        data_page_version: Literal["1.0", "2.0"] = ...,
-        use_compliant_nested_type: bool = True,
-        encryption_properties: FileEncryptionProperties | None = None,
-        write_batch_size: int | None = None,
-        dictionary_pagesize_limit: int | None = None,
-        store_schema: bool = True,
-        write_page_index: bool = False,
-        write_page_checksum: bool = False,
-        sorting_columns: Sequence[SortingColumn] | None = None,
-        store_decimal_as_integer: bool = False,
-        **options,
-    ) -> None: ...
-    def __enter__(self) -> Self: ...
-    def __exit__(self, *args, **kwargs) -> Literal[False]: ...
-    def write(
-        self, table_or_batch: RecordBatch | Table, row_group_size: int | None = None
-    ) -> None:
-        """
-        Write RecordBatch or Table to the Parquet file.
-
-        Parameters
-        ----------
-        table_or_batch : {RecordBatch, Table}
-        row_group_size : int, default None
-            Maximum number of rows in each written row group. If None,
-            the row group size will be the minimum of the input
-            table or batch length and 1024 * 1024.
-        """
-    def write_batch(self, batch: RecordBatch, row_group_size: int | None = None) -> None:
-        """
-        Write RecordBatch to the Parquet file.
-
-        Parameters
-        ----------
-        batch : RecordBatch
-        row_group_size : int, default None
-            Maximum number of rows in written row group. If None, the
-            row group size will be the minimum of the RecordBatch
-            size and 1024 * 1024.  If set larger than 64Mi then 64Mi
-            will be used instead.
-        """
-    def write_table(self, table: Table, row_group_size: int | None = None) -> None:
-        """
-        Write Table to the Parquet file.
-
-        Parameters
-        ----------
-        table : Table
-        row_group_size : int, default None
-            Maximum number of rows in each written row group. If None,
-            the row group size will be the minimum of the Table size
-            and 1024 * 1024.  If set larger than 64Mi then 64Mi will
-            be used instead.
-
-        """
-    def close(self) -> None:
-        """
-        Close the connection to the Parquet file.
-        """
-    def add_key_value_metadata(self, key_value_metadata: dict[str, str]) -> None:
-        """
-        Add key-value metadata to the file.
-        This will overwrite any existing metadata with the same key.
-
-        Parameters
-        ----------
-        key_value_metadata : dict
-            Keys and values must be string-like / coercible to bytes.
-        """
-
-class ParquetDataset:
-    """
-    Encapsulates details of reading a complete Parquet dataset possibly
-    consisting of multiple files and partitions in subdirectories.
-
-    Parameters
-    ----------
-    path_or_paths : str or List[str]
-        A directory name, single file name, or list of file names.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    schema : pyarrow.parquet.Schema
-        Optionally provide the Schema for the Dataset, in which case it will
-        not be inferred from the source.
-    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
-        Rows which do not match the filter predicate will be removed from scanned
-        data. Partition keys embedded in a nested directory structure will be
-        exploited to avoid loading files at all if they contain no matching rows.
-        Within-file level filtering and different partitioning schemes are supported.
-
-        Predicates are expressed using an ``Expression`` or using
-        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
-        DNF allows arbitrary boolean logical combinations of single column predicates.
-        The innermost tuples each describe a single column predicate. The list of inner
-        predicates is interpreted as a conjunction (AND), forming a more selective and
-        multiple column predicate. Finally, the most outer list combines these filters
-        as a disjunction (OR).
-
-        Predicates may also be passed as List[Tuple]. This form is interpreted
-        as a single conjunction. To express OR in predicates, one must
-        use the (preferred) List[List[Tuple]] notation.
-
-        Each tuple has format: (``key``, ``op``, ``value``) and compares the
-        ``key`` with the ``value``.
-        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
-        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
-        ``value`` must be a collection such as a ``list``, a ``set`` or a
-        ``tuple``.
-
-        Examples:
-
-        Using the ``Expression`` API:
-
-        .. code-block:: python
-
-            import pyarrow.compute as pc
-            pc.field('x') = 0
-            pc.field('y').isin(['a', 'b', 'c'])
-            ~pc.field('y').isin({'a', 'b'})
-
-        Using the DNF format:
-
-        .. code-block:: python
-
-            ("x", "=", 0)
-            ("y", "in", ["a", "b", "c"])
-            ("z", "not in", {"a", "b"})
-
-
-    read_dictionary : list, default None
-        List of names or column paths (for nested types) to read directly
-        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
-        a flat column as dictionary-encoded pass the column name. For
-        nested types, you must pass the full column "path", which could be
-        something like level1.level2.list.item. Refer to the Parquet
-        file's schema to obtain the paths.
-    memory_map : bool, default False
-        If the source is a file path, use a memory map to read file, which can
-        improve performance in some environments.
-    buffer_size : int, default 0
-        If positive, perform read buffering when deserializing individual
-        column chunks. Otherwise IO calls are unbuffered.
-    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
-        The partitioning scheme for a partitioned dataset. The default of "hive"
-        assumes directory names with key=value pairs like "/year=2009/month=11".
-        In addition, a scheme like "/2009/11" is also supported, in which case
-        you need to specify the field names or a full schema. See the
-        ``pyarrow.dataset.partitioning()`` function for more details.
-    ignore_prefixes : list, optional
-        Files matching any of these prefixes will be ignored by the
-        discovery process.
-        This is matched to the basename of a path.
-        By default this is ['.', '_'].
-        Note that discovery happens only if a directory is passed as source.
-    pre_buffer : bool, default True
-        Coalesce and issue file reads in parallel to improve performance on
-        high-latency filesystems (e.g. S3, GCS). If True, Arrow will use a
-        background I/O thread pool. If using a filesystem layer that itself
-        performs readahead (e.g. fsspec's S3FS), disable readahead for best
-        results. Set to False if you want to prioritize minimal memory usage
-        over maximum speed.
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular resolution
-        (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
-        timestamps will be inferred as timestamps in nanoseconds.
-    decryption_properties : FileDecryptionProperties or None
-        File-level decryption properties.
-        The decryption properties can be created using
-        ``CryptoFactory.file_decryption_properties()``.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    page_checksum_verification : bool, default False
-        If True, verify the page checksum for each page read from the file.
-
-    Examples
-    --------
-    Generate an example PyArrow Table and write it to a partitioned dataset:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, root_path="dataset_v2", partition_cols=["year"])
-
-    create a ParquetDataset object from the dataset source:
-
-    >>> dataset = pq.ParquetDataset("dataset_v2/")
-
-    and read the data:
-
-    >>> dataset.read().to_pandas()
-       n_legs         animal  year
-    0       5  Brittle stars  2019
-    1       2       Flamingo  2020
-    2       4            Dog  2021
-    3     100      Centipede  2021
-    4       2         Parrot  2022
-    5       4          Horse  2022
-
-    create a ParquetDataset object with filter:
-
-    >>> dataset = pq.ParquetDataset("dataset_v2/", filters=[("n_legs", "=", 4)])
-    >>> dataset.read().to_pandas()
-       n_legs animal  year
-    0       4    Dog  2021
-    1       4  Horse  2022
-    """
-    def __init__(
-        self,
-        path_or_paths: SingleOrList[str]
-        | SingleOrList[Path]
-        | SingleOrList[NativeFile]
-        | SingleOrList[IO],
-        filesystem: SupportedFileSystem | None = None,
-        schema: Schema | None = None,
-        *,
-        filters: Expression | FilterTuple | list[FilterTuple] | None = None,
-        read_dictionary: list[str] | None = None,
-        memory_map: bool = False,
-        buffer_size: int = 0,
-        partitioning: str | list[str] | Partitioning | None = "hive",
-        ignore_prefixes: list[str] | None = None,
-        pre_buffer: bool = True,
-        coerce_int96_timestamp_unit: str | None = None,
-        decryption_properties: FileDecryptionProperties | None = None,
-        thrift_string_size_limit: int | None = None,
-        thrift_container_size_limit: int | None = None,
-        page_checksum_verification: bool = False,
-    ): ...
-    def equals(self, other: ParquetDataset) -> bool: ...
-    @property
-    def schema(self) -> Schema:
-        """
-        Schema of the Dataset.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path="dataset_v2_schema", partition_cols=["year"])
-        >>> dataset = pq.ParquetDataset("dataset_v2_schema/")
-
-        Read the schema:
-
-        >>> dataset.schema
-        n_legs: int64
-        animal: string
-        year: dictionary<values=int32, indices=int32, ordered=0>
-        """
-    def read(
-        self,
-        columns: list[str] | None = None,
-        use_threads: bool = True,
-        use_pandas_metadata: bool = False,
-    ) -> Table:
-        """
-        Read (multiple) Parquet files as a single pyarrow.Table.
-
-        Parameters
-        ----------
-        columns : List[str]
-            Names of columns to read from the dataset. The partition fields
-            are not automatically included.
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : bool, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Returns
-        -------
-        pyarrow.Table
-            Content of the file as a table (of columns).
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path="dataset_v2_read", partition_cols=["year"])
-        >>> dataset = pq.ParquetDataset("dataset_v2_read/")
-
-        Read the dataset:
-
-        >>> dataset.read(columns=["n_legs"])
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[5],[2],[4,100],[2,4]]
-        """
-    def read_pandas(self, **kwargs) -> Table:
-        """
-        Read dataset including pandas metadata, if any. Other arguments passed
-        through to :func:`read`, see docstring for further details.
-
-        Parameters
-        ----------
-        **kwargs : optional
-            Additional options for :func:`read`
-
-        Examples
-        --------
-        Generate an example parquet file:
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "table_V2.parquet")
-        >>> dataset = pq.ParquetDataset("table_V2.parquet")
-
-        Read the dataset with pandas metadata:
-
-        >>> dataset.read_pandas(columns=["n_legs"])
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[2,2,4,4,5,100]]
-
-        >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata
-        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...}
-        """
-    @property
-    def fragments(self) -> list[ParquetFileFragment]:
-        """
-        A list of the Dataset source fragments or pieces with absolute
-        file paths.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path="dataset_v2_fragments", partition_cols=["year"])
-        >>> dataset = pq.ParquetDataset("dataset_v2_fragments/")
-
-        List the fragments:
-
-        >>> dataset.fragments
-        [<pyarrow.dataset.ParquetFileFragment path=dataset_v2_fragments/...
-        """
-    @property
-    def files(self) -> list[str]:
-        """
-        A list of absolute Parquet file paths in the Dataset source.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path="dataset_v2_files", partition_cols=["year"])
-        >>> dataset = pq.ParquetDataset("dataset_v2_files/")
-
-        List the files:
-
-        >>> dataset.files
-        ['dataset_v2_files/year=2019/...-0.parquet', ...
-        """
-    @property
-    def filesystem(self) -> FileSystem:
-        """
-        The filesystem type of the Dataset source.
-        """
-    @property
-    def partitioning(self) -> Partitioning:
-        """
-        The partitioning of the Dataset source, if discovered.
-        """
-
-def read_table(
-    source: SingleOrList[str] | SingleOrList[Path] | SingleOrList[NativeFile] | SingleOrList[IO],
-    *,
-    columns: list | None = None,
-    use_threads: bool = True,
-    schema: Schema | None = None,
-    use_pandas_metadata: bool = False,
-    read_dictionary: list[str] | None = None,
-    memory_map: bool = False,
-    buffer_size: int = 0,
-    partitioning: str | list[str] | Partitioning | None = "hive",
-    filesystem: SupportedFileSystem | None = None,
-    filters: Expression | FilterTuple | list[FilterTuple] | None = None,
-    ignore_prefixes: list[str] | None = None,
-    pre_buffer: bool = True,
-    coerce_int96_timestamp_unit: str | None = None,
-    decryption_properties: FileDecryptionProperties | None = None,
-    thrift_string_size_limit: int | None = None,
-    thrift_container_size_limit: int | None = None,
-    page_checksum_verification: bool = False,
-) -> Table:
-    """
-    Read a Table from Parquet format
-
-    Parameters
-    ----------
-    source : str, pyarrow.NativeFile, or file-like object
-        If a string passed, can be a single file name or directory name. For
-        file-like objects, only read a single file. Use pyarrow.BufferReader to
-        read a file contained in a bytes or buffer-like object.
-    columns : list
-        If not None, only these columns will be read from the file. A column
-        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
-        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
-        that the table will still have the correct num_rows set despite having
-        no columns.
-    use_threads : bool, default True
-        Perform multi-threaded column reads.
-    schema : Schema, optional
-        Optionally provide the Schema for the parquet dataset, in which case it
-        will not be inferred from the source.
-    use_pandas_metadata : bool, default False
-        If True and file has custom pandas schema metadata, ensure that
-        index columns are also loaded.
-    read_dictionary : list, default None
-        List of names or column paths (for nested types) to read directly
-        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
-        a flat column as dictionary-encoded pass the column name. For
-        nested types, you must pass the full column "path", which could be
-        something like level1.level2.list.item. Refer to the Parquet
-        file's schema to obtain the paths.
-    memory_map : bool, default False
-        If the source is a file path, use a memory map to read file, which can
-        improve performance in some environments.
-    buffer_size : int, default 0
-        If positive, perform read buffering when deserializing individual
-        column chunks. Otherwise IO calls are unbuffered.
-    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
-        The partitioning scheme for a partitioned dataset. The default of "hive"
-        assumes directory names with key=value pairs like "/year=2009/month=11".
-        In addition, a scheme like "/2009/11" is also supported, in which case
-        you need to specify the field names or a full schema. See the
-        ``pyarrow.dataset.partitioning()`` function for more details.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
-        Rows which do not match the filter predicate will be removed from scanned
-        data. Partition keys embedded in a nested directory structure will be
-        exploited to avoid loading files at all if they contain no matching rows.
-        Within-file level filtering and different partitioning schemes are supported.
-
-        Predicates are expressed using an ``Expression`` or using
-        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
-        DNF allows arbitrary boolean logical combinations of single column predicates.
-        The innermost tuples each describe a single column predicate. The list of inner
-        predicates is interpreted as a conjunction (AND), forming a more selective and
-        multiple column predicate. Finally, the most outer list combines these filters
-        as a disjunction (OR).
-
-        Predicates may also be passed as List[Tuple]. This form is interpreted
-        as a single conjunction. To express OR in predicates, one must
-        use the (preferred) List[List[Tuple]] notation.
-
-        Each tuple has format: (``key``, ``op``, ``value``) and compares the
-        ``key`` with the ``value``.
-        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
-        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
-        ``value`` must be a collection such as a ``list``, a ``set`` or a
-        ``tuple``.
-
-        Examples:
-
-        Using the ``Expression`` API:
-
-        .. code-block:: python
-
-            import pyarrow.compute as pc
-            pc.field('x') = 0
-            pc.field('y').isin(['a', 'b', 'c'])
-            ~pc.field('y').isin({'a', 'b'})
-
-        Using the DNF format:
-
-        .. code-block:: python
-
-            ("x", "=", 0)
-            ("y", "in", ["a", "b", "c"])
-            ("z", "not in", {"a", "b"})
-
-
-    ignore_prefixes : list, optional
-        Files matching any of these prefixes will be ignored by the
-        discovery process.
-        This is matched to the basename of a path.
-        By default this is ['.', '_'].
-        Note that discovery happens only if a directory is passed as source.
-    pre_buffer : bool, default True
-        Coalesce and issue file reads in parallel to improve performance on
-        high-latency filesystems (e.g. S3). If True, Arrow will use a
-        background I/O thread pool. If using a filesystem layer that itself
-        performs readahead (e.g. fsspec's S3FS), disable readahead for best
-        results.
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular
-        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be inferred as timestamps
-        in nanoseconds.
-    decryption_properties : FileDecryptionProperties or None
-        File-level decryption properties.
-        The decryption properties can be created using
-        ``CryptoFactory.file_decryption_properties()``.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    page_checksum_verification : bool, default False
-        If True, verify the checksum for each page read from the file.
-
-    Returns
-    -------
-    pyarrow.Table
-        Content of the file as a table (of columns)
-
-
-    Examples
-    --------
-
-    Generate an example PyArrow Table and write it to a partitioned dataset:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, root_path="dataset_name_2", partition_cols=["year"])
-
-    Read the data:
-
-    >>> pq.read_table("dataset_name_2").to_pandas()
-       n_legs         animal  year
-    0       5  Brittle stars  2019
-    1       2       Flamingo  2020
-    2       4            Dog  2021
-    3     100      Centipede  2021
-    4       2         Parrot  2022
-    5       4          Horse  2022
-
-
-    Read only a subset of columns:
-
-    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"])
-    pyarrow.Table
-    n_legs: int64
-    animal: string
-    ----
-    n_legs: [[5],[2],[4,100],[2,4]]
-    animal: [["Brittle stars"],["Flamingo"],["Dog","Centipede"],["Parrot","Horse"]]
-
-    Read a subset of columns and read one column as DictionaryArray:
-
-    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"], read_dictionary=["animal"])
-    pyarrow.Table
-    n_legs: int64
-    animal: dictionary<values=string, indices=int32, ordered=0>
-    ----
-    n_legs: [[5],[2],[4,100],[2,4]]
-    animal: [  -- dictionary:
-    ["Brittle stars"]  -- indices:
-    [0],  -- dictionary:
-    ["Flamingo"]  -- indices:
-    [0],  -- dictionary:
-    ["Dog","Centipede"]  -- indices:
-    [0,1],  -- dictionary:
-    ["Parrot","Horse"]  -- indices:
-    [0,1]]
-
-    Read the table with filter:
-
-    >>> pq.read_table(
-    ...     "dataset_name_2", columns=["n_legs", "animal"], filters=[("n_legs", "<", 4)]
-    ... ).to_pandas()
-       n_legs    animal
-    0       2  Flamingo
-    1       2    Parrot
-
-    Read data from a single Parquet file:
-
-    >>> pq.write_table(table, "example.parquet")
-    >>> pq.read_table("dataset_name_2").to_pandas()
-       n_legs         animal  year
-    0       5  Brittle stars  2019
-    1       2       Flamingo  2020
-    2       4            Dog  2021
-    3     100      Centipede  2021
-    4       2         Parrot  2022
-    5       4          Horse  2022
-    """
-
-def read_pandas(
-    source: str | Path | NativeFile | IO, columns: list | None = None, **kwargs
-) -> Table:
-    """
-
-    Read a Table from Parquet format, also reading DataFrame
-    index values if known in the file metadata
-
-    Parameters
-    ----------
-    source : str, pyarrow.NativeFile, or file-like object
-        If a string passed, can be a single file name or directory name. For
-        file-like objects, only read a single file. Use pyarrow.BufferReader to
-        read a file contained in a bytes or buffer-like object.
-    columns : list
-        If not None, only these columns will be read from the file. A column
-        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
-        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
-        that the table will still have the correct num_rows set despite having
-        no columns.
-    use_threads : bool, default True
-        Perform multi-threaded column reads.
-    schema : Schema, optional
-        Optionally provide the Schema for the parquet dataset, in which case it
-        will not be inferred from the source.
-    read_dictionary : list, default None
-        List of names or column paths (for nested types) to read directly
-        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
-        a flat column as dictionary-encoded pass the column name. For
-        nested types, you must pass the full column "path", which could be
-        something like level1.level2.list.item. Refer to the Parquet
-        file's schema to obtain the paths.
-    memory_map : bool, default False
-        If the source is a file path, use a memory map to read file, which can
-        improve performance in some environments.
-    buffer_size : int, default 0
-        If positive, perform read buffering when deserializing individual
-        column chunks. Otherwise IO calls are unbuffered.
-    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
-        The partitioning scheme for a partitioned dataset. The default of "hive"
-        assumes directory names with key=value pairs like "/year=2009/month=11".
-        In addition, a scheme like "/2009/11" is also supported, in which case
-        you need to specify the field names or a full schema. See the
-        ``pyarrow.dataset.partitioning()`` function for more details.
-    **kwargs
-        additional options for :func:`read_table`
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
-        Rows which do not match the filter predicate will be removed from scanned
-        data. Partition keys embedded in a nested directory structure will be
-        exploited to avoid loading files at all if they contain no matching rows.
-        Within-file level filtering and different partitioning schemes are supported.
-
-        Predicates are expressed using an ``Expression`` or using
-        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
-        DNF allows arbitrary boolean logical combinations of single column predicates.
-        The innermost tuples each describe a single column predicate. The list of inner
-        predicates is interpreted as a conjunction (AND), forming a more selective and
-        multiple column predicate. Finally, the most outer list combines these filters
-        as a disjunction (OR).
-
-        Predicates may also be passed as List[Tuple]. This form is interpreted
-        as a single conjunction. To express OR in predicates, one must
-        use the (preferred) List[List[Tuple]] notation.
-
-        Each tuple has format: (``key``, ``op``, ``value``) and compares the
-        ``key`` with the ``value``.
-        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
-        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
-        ``value`` must be a collection such as a ``list``, a ``set`` or a
-        ``tuple``.
-
-        Examples:
-
-        Using the ``Expression`` API:
-
-        .. code-block:: python
-
-            import pyarrow.compute as pc
-            pc.field('x') = 0
-            pc.field('y').isin(['a', 'b', 'c'])
-            ~pc.field('y').isin({'a', 'b'})
-
-        Using the DNF format:
-
-        .. code-block:: python
-
-            ("x", "=", 0)
-            ("y", "in", ["a", "b", "c"])
-            ("z", "not in", {"a", "b"})
-
-
-    ignore_prefixes : list, optional
-        Files matching any of these prefixes will be ignored by the
-        discovery process.
-        This is matched to the basename of a path.
-        By default this is ['.', '_'].
-        Note that discovery happens only if a directory is passed as source.
-    pre_buffer : bool, default True
-        Coalesce and issue file reads in parallel to improve performance on
-        high-latency filesystems (e.g. S3). If True, Arrow will use a
-        background I/O thread pool. If using a filesystem layer that itself
-        performs readahead (e.g. fsspec's S3FS), disable readahead for best
-        results.
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular
-        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be inferred as timestamps
-        in nanoseconds.
-    decryption_properties : FileDecryptionProperties or None
-        File-level decryption properties.
-        The decryption properties can be created using
-        ``CryptoFactory.file_decryption_properties()``.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    page_checksum_verification : bool, default False
-        If True, verify the checksum for each page read from the file.
-
-    Returns
-    -------
-    pyarrow.Table
-        Content of the file as a Table of Columns, including DataFrame
-        indexes as columns
-    """
-
-def write_table(
-    table: Table,
-    where: str | Path | NativeFile | IO,
-    row_group_size: int | None = None,
-    version: Literal["1.0", "2.4", "2.6"] = "2.6",
-    use_dictionary: bool = True,
-    compression: _Compression | dict[str, _Compression] = "snappy",
-    write_statistics: bool | list = True,
-    use_deprecated_int96_timestamps: bool | None = None,
-    coerce_timestamps: str | None = None,
-    allow_truncated_timestamps: bool = False,
-    data_page_size: int | None = None,
-    flavor: str | None = None,
-    filesystem: SupportedFileSystem | None = None,
-    compression_level: int | dict | None = None,
-    use_byte_stream_split: bool = False,
-    column_encoding: str | dict | None = None,
-    data_page_version: Literal["1.0", "2.0"] = ...,
-    use_compliant_nested_type: bool = True,
-    encryption_properties: FileEncryptionProperties | None = None,
-    write_batch_size: int | None = None,
-    dictionary_pagesize_limit: int | None = None,
-    store_schema: bool = True,
-    write_page_index: bool = False,
-    write_page_checksum: bool = False,
-    sorting_columns: Sequence[SortingColumn] | None = None,
-    store_decimal_as_integer: bool = False,
-    **kwargs,
-) -> None:
-    """
-
-    Write a Table to Parquet format.
-
-    Parameters
-    ----------
-    table : pyarrow.Table
-    where : string or pyarrow.NativeFile
-    row_group_size : int
-        Maximum number of rows in each written row group. If None, the
-        row group size will be the minimum of the Table size and
-        1024 * 1024.
-    version : {"1.0", "2.4", "2.6"}, default "2.6"
-        Determine which Parquet logical types are available for use, whether the
-        reduced set from the Parquet 1.x.x format or the expanded logical types
-        added in later format versions.
-        Files written with version='2.4' or '2.6' may not be readable in all
-        Parquet implementations, so version='1.0' is likely the choice that
-        maximizes file compatibility.
-        UINT32 and some logical types are only available with version '2.4'.
-        Nanosecond timestamps are only available with version '2.6'.
-        Other features such as compression algorithms or the new serialized
-        data page format must be enabled separately (see 'compression' and
-        'data_page_version').
-    use_dictionary : bool or list, default True
-        Specify if we should use dictionary encoding in general or only for
-        some columns.
-        When encoding the column, if the dictionary size is too large, the
-        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
-        doesn't support dictionary encoding.
-    compression : str or dict, default 'snappy'
-        Specify the compression codec, either on a general basis or per-column.
-        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
-    write_statistics : bool or list, default True
-        Specify if we should write statistics in general (default is True) or only
-        for some columns.
-    use_deprecated_int96_timestamps : bool, default None
-        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
-        by flavor argument. This take priority over the coerce_timestamps option.
-    coerce_timestamps : str, default None
-        Cast timestamps to a particular resolution. If omitted, defaults are chosen
-        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
-        nanoseconds are cast to microseconds ('us'), while for
-        ``version='2.6'`` (the default), they are written natively without loss
-        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
-        as Parquet does not have any temporal type with seconds resolution.
-        If the casting results in loss of data, it will raise an exception
-        unless ``allow_truncated_timestamps=True`` is given.
-        Valid values: {None, 'ms', 'us'}
-    allow_truncated_timestamps : bool, default False
-        Allow loss of data when coercing timestamps to a particular
-        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
-        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
-        will NOT result in the truncation exception being ignored unless
-        ``coerce_timestamps`` is not None.
-    data_page_size : int, default None
-        Set a target threshold for the approximate encoded size of data
-        pages within a column chunk (in bytes). If None, use the default data page
-        size of 1MByte.
-    flavor : {'spark'}, default None
-        Sanitize schema or set other compatibility options to work with
-        various target systems.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred from `where` if path-like, else
-        `where` is already a file-like object so no filesystem is needed.
-    compression_level : int or dict, default None
-        Specify the compression level for a codec, either on a general basis or
-        per-column. If None is passed, arrow selects the compression level for
-        the compression codec in use. The compression level has a different
-        meaning for each codec, so you have to read the documentation of the
-        codec you are using.
-        An exception is thrown if the compression codec does not allow specifying
-        a compression level.
-    use_byte_stream_split : bool or list, default False
-        Specify if the byte_stream_split encoding should be used in general or
-        only for some columns. If both dictionary and byte_stream_stream are
-        enabled, then dictionary is preferred.
-        The byte_stream_split encoding is valid for integer, floating-point
-        and fixed-size binary data types (including decimals); it should be
-        combined with a compression codec so as to achieve size reduction.
-    column_encoding : string or dict, default None
-        Specify the encoding scheme on a per column basis.
-        Can only be used when ``use_dictionary`` is set to False, and
-        cannot be used in combination with ``use_byte_stream_split``.
-        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
-        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
-        Certain encodings are only compatible with certain data types.
-        Please refer to the encodings section of `Reading and writing Parquet
-        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
-    data_page_version : {"1.0", "2.0"}, default "1.0"
-        The serialized Parquet data page format version to write, defaults to
-        1.0. This does not impact the file schema logical types and Arrow to
-        Parquet type casting behavior; for that use the "version" option.
-    use_compliant_nested_type : bool, default True
-        Whether to write compliant Parquet nested type (lists) as defined
-        `here <https://github.com/apache/parquet-format/blob/master/
-        LogicalTypes.md#nested-types>`_, defaults to ``True``.
-        For ``use_compliant_nested_type=True``, this will write into a list
-        with 3-level structure where the middle level, named ``list``,
-        is a repeated group with a single field named ``element``::
-
-            <list-repetition> group <name> (LIST) {
-                repeated group list {
-                      <element-repetition> <element-type> element;
-                }
-            }
-
-        For ``use_compliant_nested_type=False``, this will also write into a list
-        with 3-level structure, where the name of the single field of the middle
-        level ``list`` is taken from the element name for nested columns in Arrow,
-        which defaults to ``item``::
-
-            <list-repetition> group <name> (LIST) {
-                repeated group list {
-                    <element-repetition> <element-type> item;
-                }
-            }
-    encryption_properties : FileEncryptionProperties, default None
-        File encryption properties for Parquet Modular Encryption.
-        If None, no encryption will be done.
-        The encryption properties can be created using:
-        ``CryptoFactory.file_encryption_properties()``.
-    write_batch_size : int, default None
-        Number of values to write to a page at a time. If None, use the default of
-        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
-        are exceeding the ``data_page_size`` due to large column values, lowering
-        the batch size can help keep page sizes closer to the intended size.
-    dictionary_pagesize_limit : int, default None
-        Specify the dictionary page size limit per row group. If None, use the
-        default 1MB.
-    store_schema : bool, default True
-        By default, the Arrow schema is serialized and stored in the Parquet
-        file metadata (in the "ARROW:schema" key). When reading the file,
-        if this key is available, it will be used to more faithfully recreate
-        the original Arrow data. For example, for tz-aware timestamp columns
-        it will restore the timezone (Parquet only stores the UTC values without
-        timezone), or columns with duration type will be restored from the int64
-        Parquet column.
-    write_page_index : bool, default False
-        Whether to write a page index in general for all columns.
-        Writing statistics to the page index disables the old method of writing
-        statistics to each data page header. The page index makes statistics-based
-        filtering more efficient than the page header, as it gathers all the
-        statistics for a Parquet file in a single place, avoiding scattered I/O.
-        Note that the page index is not yet used on the read size by PyArrow.
-    write_page_checksum : bool, default False
-        Whether to write page checksums in general for all columns.
-        Page checksums enable detection of data corruption, which might occur during
-        transmission or in the storage.
-    sorting_columns : Sequence of SortingColumn, default None
-        Specify the sort order of the data being written. The writer does not sort
-        the data nor does it verify that the data is sorted. The sort order is
-        written to the row group metadata, which can then be used by readers.
-    store_decimal_as_integer : bool, default False
-        Allow decimals with 1 <= precision <= 18 to be stored as integers.
-        In Parquet, DECIMAL can be stored in any of the following physical types:
-        - int32: for 1 <= precision <= 9.
-        - int64: for 10 <= precision <= 18.
-        - fixed_len_byte_array: precision is limited by the array size.
-          Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
-        - binary: precision is unlimited. The minimum number of bytes to store the
-          unscaled value is used.
-
-        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
-        When enabled, the writer will use the following physical types to store decimals:
-        - int32: for 1 <= precision <= 9.
-        - int64: for 10 <= precision <= 18.
-        - fixed_len_byte_array: for precision > 18.
-
-        As a consequence, decimal columns stored in integer types are more compact.
-
-    **kwargs : optional
-        Additional options for ParquetWriter
-
-    Examples
-    --------
-    Generate an example PyArrow Table:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-
-    and write the Table into Parquet file:
-
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_table(table, "example.parquet")
-
-    Defining row group size for the Parquet file:
-
-    >>> pq.write_table(table, "example.parquet", row_group_size=3)
-
-    Defining row group compression (default is Snappy):
-
-    >>> pq.write_table(table, "example.parquet", compression="none")
-
-    Defining row group compression and encoding per-column:
-
-    >>> pq.write_table(
-    ...     table,
-    ...     "example.parquet",
-    ...     compression={"n_legs": "snappy", "animal": "gzip"},
-    ...     use_dictionary=["n_legs", "animal"],
-    ... )
-
-    Defining column encoding per-column:
-
-    >>> pq.write_table(
-    ...     table, "example.parquet", column_encoding={"animal": "PLAIN"}, use_dictionary=False
-    ... )
-    """
-
-def write_to_dataset(
-    table: Table,
-    root_path: str | Path,
-    partition_cols: list[str] | None = None,
-    filesystem: SupportedFileSystem | None = None,
-    schema: Schema | None = None,
-    partitioning: Partitioning | list[str] | None = None,
-    basename_template: str | None = None,
-    use_threads: bool | None = None,
-    file_visitor: Callable[[str], None] | None = None,
-    existing_data_behavior: Literal["overwrite_or_ignore", "error", "delete_matching"]
-    | None = None,
-    **kwargs,
-) -> None:
-    """
-    Wrapper around dataset.write_dataset for writing a Table to
-    Parquet format by partitions.
-    For each combination of partition columns and values,
-    a subdirectories are created in the following
-        manner:
-
-        root_dir/
-          group1=value1
-            group2=value1
-              <uuid>.parquet
-            group2=value2
-              <uuid>.parquet
-          group1=valueN
-            group2=value1
-              <uuid>.parquet
-            group2=valueN
-              <uuid>.parquet
-
-    Parameters
-    ----------
-    table : pyarrow.Table
-    root_path : str, pathlib.Path
-        The root directory of the dataset.
-    partition_cols : list,
-        Column names by which to partition the dataset.
-        Columns are partitioned in the order they are given.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    schema : Schema, optional
-        This Schema of the dataset.
-    partitioning : Partitioning or list[str], optional
-        The partitioning scheme specified with the
-        ``pyarrow.dataset.partitioning()`` function or a list of field names.
-        When providing a list of field names, you can use
-        ``partitioning_flavor`` to drive which partitioning type should be
-        used.
-    basename_template : str, optional
-        A template string used to generate basenames of written data files.
-        The token '{i}' will be replaced with an automatically incremented
-        integer. If not specified, it defaults to "guid-{i}.parquet".
-    use_threads : bool, default True
-        Write files in parallel. If enabled, then maximum parallelism will be
-        used determined by the number of available CPU cores.
-    file_visitor : function
-        If set, this function will be called with a WrittenFile instance
-        for each file created during the call.  This object will have both
-        a path attribute and a metadata attribute.
-
-        The path attribute will be a string containing the path to
-        the created file.
-
-        The metadata attribute will be the parquet metadata of the file.
-        This metadata will have the file path attribute set and can be used
-        to build a _metadata file.  The metadata attribute will be None if
-        the format is not parquet.
-
-        Example visitor which simple collects the filenames created::
-
-            visited_paths = []
-
-            def file_visitor(written_file):
-                visited_paths.append(written_file.path)
-
-    existing_data_behavior : 'overwrite_or_ignore' | 'error' | 'delete_matching'
-        Controls how the dataset will handle data that already exists in
-        the destination. The default behaviour is 'overwrite_or_ignore'.
-
-        'overwrite_or_ignore' will ignore any existing data and will
-        overwrite files with the same name as an output file.  Other
-        existing files will be ignored.  This behavior, in combination
-        with a unique basename_template for each write, will allow for
-        an append workflow.
-
-        'error' will raise an error if any data exists in the destination.
-
-        'delete_matching' is useful when you are writing a partitioned
-        dataset.  The first time each partition directory is encountered
-        the entire directory will be deleted.  This allows you to overwrite
-        old partitions completely.
-    **kwargs : dict,
-        Used as additional kwargs for :func:`pyarrow.dataset.write_dataset`
-        function for matching kwargs, and remainder to
-        :func:`pyarrow.dataset.ParquetFileFormat.make_write_options`.
-        See the docstring of :func:`write_table` and
-        :func:`pyarrow.dataset.write_dataset` for the available options.
-        Using `metadata_collector` in kwargs allows one to collect the
-        file metadata instances of dataset pieces. The file paths in the
-        ColumnChunkMetaData will be set relative to `root_path`.
-
-    Examples
-    --------
-    Generate an example PyArrow Table:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-
-    and write it to a partitioned dataset:
-
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, root_path="dataset_name_3", partition_cols=["year"])
-    >>> pq.ParquetDataset("dataset_name_3").files
-    ['dataset_name_3/year=2019/...-0.parquet', ...
-
-    Write a single Parquet file into the root folder:
-
-    >>> pq.write_to_dataset(table, root_path="dataset_name_4")
-    >>> pq.ParquetDataset("dataset_name_4/").files
-    ['dataset_name_4/...-0.parquet']
-    """
-
-def write_metadata(
-    schema: Schema,
-    where: str | NativeFile,
-    metadata_collector: list[FileMetaData] | None = None,
-    filesystem: SupportedFileSystem | None = None,
-    **kwargs,
-) -> None:
-    """
-    Write metadata-only Parquet file from schema. This can be used with
-    `write_to_dataset` to generate `_common_metadata` and `_metadata` sidecar
-    files.
-
-    Parameters
-    ----------
-    schema : pyarrow.Schema
-    where : string or pyarrow.NativeFile
-    metadata_collector : list
-        where to collect metadata information.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred from `where` if path-like, else
-        `where` is already a file-like object so no filesystem is needed.
-    **kwargs : dict,
-        Additional kwargs for ParquetWriter class. See docstring for
-        `ParquetWriter` for more information.
-
-    Examples
-    --------
-    Generate example data:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-
-    Write a dataset and collect metadata information.
-
-    >>> metadata_collector = []
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, "dataset_metadata", metadata_collector=metadata_collector)
-
-    Write the `_common_metadata` parquet file without row groups statistics.
-
-    >>> pq.write_metadata(table.schema, "dataset_metadata/_common_metadata")
-
-    Write the `_metadata` parquet file with row groups statistics.
-
-    >>> pq.write_metadata(
-    ...     table.schema, "dataset_metadata/_metadata", metadata_collector=metadata_collector
-    ... )
-    """
-
-def read_metadata(
-    where: str | Path | IO | NativeFile,
-    memory_map: bool = False,
-    decryption_properties: FileDecryptionProperties | None = None,
-    filesystem: SupportedFileSystem | None = None,
-) -> FileMetaData:
-    """
-    Read FileMetaData from footer of a single Parquet file.
-
-    Parameters
-    ----------
-    where : str (file path) or file-like object
-    memory_map : bool, default False
-        Create memory map when the source is a file path.
-    decryption_properties : FileDecryptionProperties, default None
-        Decryption properties for reading encrypted Parquet files.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-
-    Returns
-    -------
-    metadata : FileMetaData
-        The metadata of the Parquet file
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.parquet as pq
-    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
-    >>> pq.write_table(table, "example.parquet")
-
-    >>> pq.read_metadata("example.parquet")
-    <pyarrow._parquet.FileMetaData object at ...>
-      created_by: parquet-cpp-arrow version ...
-      num_columns: 2
-      num_rows: 3
-      num_row_groups: 1
-      format_version: 2.6
-      serialized_size: ...
-    """
-
-def read_schema(
-    where: str | Path | IO | NativeFile,
-    memory_map: bool = False,
-    decryption_properties: FileDecryptionProperties | None = None,
-    filesystem: SupportedFileSystem | None = None,
-) -> Schema:
-    """
-    Read effective Arrow schema from Parquet file metadata.
-
-    Parameters
-    ----------
-    where : str (file path) or file-like object
-    memory_map : bool, default False
-        Create memory map when the source is a file path.
-    decryption_properties : FileDecryptionProperties, default None
-        Decryption properties for reading encrypted Parquet files.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-
-    Returns
-    -------
-    schema : pyarrow.Schema
-        The schema of the Parquet file
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.parquet as pq
-    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
-    >>> pq.write_table(table, "example.parquet")
-
-    >>> pq.read_schema("example.parquet")
-    n_legs: int64
-    animal: string
-    """
diff --git a/python/pyarrow/parquet/encryption.pyi b/python/pyarrow/parquet/encryption.pyi
deleted file mode 100644
index fe9a454e593..00000000000
--- a/python/pyarrow/parquet/encryption.pyi
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow._parquet_encryption import (
-    CryptoFactory,
-    DecryptionConfiguration,
-    EncryptionConfiguration,
-    KmsClient,
-    KmsConnectionConfig,
-)
-
-__all__ = [
-    "CryptoFactory",
-    "DecryptionConfiguration",
-    "EncryptionConfiguration",
-    "KmsClient",
-    "KmsConnectionConfig",
-]

From 73e3e3a2959fbd638f62862639672d328e9198c1 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 12 Sep 2025 15:46:24 +0200
Subject: [PATCH 10/26] Fix

---
 .github/workflows/python.yml  |  11 +-
 dev/update_stub_docstrings.py | 206 +++++++++++++++++++---------------
 python/pyarrow/cuda.py        |  25 +++++
 3 files changed, 150 insertions(+), 92 deletions(-)
 create mode 100644 python/pyarrow/cuda.py

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index a4aa53e5cdc..8630dab7e93 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -138,10 +138,15 @@ jobs:
         continue-on-error: true
         run: archery docker push ${{ matrix.image }}
 
-        - name: Type check with pyright
+      - name: Type check with mypy and pyright
         run: |-
-            python -m pip install pyright
-            pushd python; python -m pyright
+            python -m pip install mypy pyright scipy-stubs pandas-stubs types-python-dateutil types-requests griffe libcst
+            pushd python;
+            pip install -e .
+            python -m mypy pyarrow/*.pyi pyarrow/__lib_pxi/*.pyi pyarrow/tests/test_array.py pyarrow/tests/test_io.py
+            python -m pyright pyarrow/*.pyi pyarrow/__lib_pxi/*.pyi
+            python ../dev/update_stub_docstrings.py -f ./pyarrow
+            git status --porcelain=1
 
   macos:
     name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} Python 3
diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py
index 72db8b0d000..17f7e8e1aa1 100644
--- a/dev/update_stub_docstrings.py
+++ b/dev/update_stub_docstrings.py
@@ -1,118 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # Utility to extract docstrings from pyarrow and update
 # docstrings in stubfiles.
 #
 # Usage
 # =====
 #
-# python ../dev/update_stub_docstrings.py -s ./pyarrow/compute.pyi
+# python ../dev/update_stub_docstrings.py -f ./pyarrow/
 
 
-import os
 from pathlib import Path
 from textwrap import indent
 
 import click
+# TODO: perhaps replace griffe with importlib
 import griffe
-import libcst as cst
-
-docstrings_map = {}
-
-
-def extract_docstrings(pckg, path=""):
-    if "filepath" in pckg and pckg["filepath"].endswith(".pyi"):
-        return
-    if "docstring" in pckg:
-        docstrings_map[path] = pckg["docstring"].value
-
-    for name, pckg in pckg.get("members", {}).items():
-        extract_docstrings(pckg, path=f"{path}.{name}")
-
-
-def _is_docstring_node(node):
-    """Checks if a node is a docstring."""
-    return (
-        isinstance(node, cst.SimpleStatementLine) and
-        isinstance(node.body[0], cst.Expr) and
-        isinstance(node.body[0].value, cst.SimpleString)
-    )
-
-
-class ClonedSignatureDocstringTransformer(cst.CSTTransformer):
-    def __init__(self, docstrings_map, module_name):
-        self.docstrings_map = docstrings_map
-        self.module_name = module_name
-        self.name_of_function = None
-
-    def leave_Assign(self, original_node, updated_node):
-        target = original_node.targets[0].target
-        value = original_node.value
-
-        if isinstance(target, cst.Name) and isinstance(value, cst.Call) and \
-            value.func.value == "_clone_signature":
-            self.name_of_function = f"{self.module_name}.{target.value}"
+import libcst
+
+
+class DocUpdater(libcst.CSTTransformer):
+    def __init__(self, package, namespace):
+        self.stack = [namespace] if namespace else []
+        self._docstring = None
+        self.indentation = 0
+        self.package = package
+
+    def _get_docstring(self, name):
+        # print("extract_docstrings", name)
+        try:
+            obj = self.package.get_member(name)
+        except KeyError:
+            # Some cython __init__ symbols can't be found
+            # e.g. pyarrow.lib.OSFile.__init__
+            parent_name = ".".join(self.stack[:-1])
+
+            try:
+                obj = self.package.get_member(parent_name).all_members[self.stack[-1]]
+            except KeyError:
+                # print(f"{name} not found in {self.package.name}, it's probably ok.")
+                return None
+
+        if obj.has_docstring:
+            docstring = obj.docstring.value
+            # remove signature if present in docstring
+            if docstring.startswith(obj.name) or (
+                (hasattr(obj.parent, "name") and
+                    docstring.startswith(f"{obj.parent.name}.{obj.name}"))):
+                return "\n".join(docstring.splitlines()[2:])
+            else:
+                return docstring
+        return None
+
+    def visit_ClassDef(self, node):
+        # TODO: class docstrings?
+        self.stack.append(node.name.value)
+        self.indentation += 1
+        node_name = ".".join(self.stack)
+        docstring = self._get_docstring(node_name)
+
+        if docstring:
+            if not node.get_docstring(clean=False):
+                print("Missing docstring (in annotations) for:", node_name)
+                return False
+            self._docstring = f'"""{node.get_docstring(clean=False)}"""'
+            return True
+        return False
+
+    def visit_FunctionDef(self, node):
+        self.stack.append(node.name.value)
+        self.indentation += 1
+        node_name = ".".join(self.stack)
+        docstring = self._get_docstring(node_name)
+
+        if docstring:
+            if not node.get_docstring(clean=False):
+                print("Missing docstring (in annotations) for:", node_name)
+                return False
+            self._docstring = f'"""{node.get_docstring(clean=False)}"""'
+            return True
+        return False
+
+    def leave_ClassDef(self, original_node, updated_node):
+        self.stack.pop()
+        self.indentation -= 1
         return updated_node
 
-    def leave_SimpleStatementLine(self, original_node, updated_node):
-        if self.name_of_function:
-            if len(updated_node.body) > 0 and _is_docstring_node(updated_node):
-                comment_content = self.docstrings_map[self.name_of_function].strip()
-                self.name_of_function = None
-
-                new_string_node = cst.SimpleString(value=f'"""\n{comment_content}\n"""')
-                new_expr_node = updated_node.body[0].with_changes(value=new_string_node)
-                new_body = [new_expr_node] + list(updated_node.body[1:])
-                updated_node = updated_node.with_changes(body=new_body)
-
+    def leave_FunctionDef(self, original_node, updated_node):
+        self.stack.pop()
+        self.indentation -= 1
         return updated_node
 
+    def leave_SimpleString(self, original_node, updated_node):
+        node_name = ".".join(self.stack)
 
-class FunctionDocstringTransformer(cst.CSTTransformer):
-    def __init__(self, docstrings_map, module_name):
-        self.docstrings_map = docstrings_map
-        self.module_name = module_name
-
-    def leave_FunctionDef(self, original_node, updated_node):
-        full_name = f"{self.module_name}.{original_node.name.value}"
-
-        # Check if we have a docstring for this function
-        if full_name in self.docstrings_map:
-            # Check if the function already has a docstring
-            body_list = list(updated_node.body.body)
-            has_docstring = len(body_list) > 0 and _is_docstring_node(body_list[0])
-
-            if has_docstring:
-                # Replace existing docstring
-                docstring = indent(self.docstrings_map[full_name], "    ").strip()
-                docstring_value = f'"""\n    {docstring}\n    """'
-                new_docstring_node = cst.SimpleStatementLine(
-                    body=[cst.Expr(value=cst.SimpleString(value=docstring_value))]
-                )
-                new_body = [new_docstring_node] + body_list[1:]
-                return updated_node.with_changes(
-                    body=updated_node.body.with_changes(body=new_body)
-                )
+        if original_node.value == self._docstring:
+            indentation = self.indentation * "    "
+            indented_docstring = indent(self._get_docstring(node_name), indentation)
+            docstring = f'"""\n{indented_docstring}\n{indentation}"""'
+            return updated_node.with_changes(value=docstring)
 
         return updated_node
 
+
 @click.command()
-@click.option('--stub_file', '-s', type=click.Path(resolve_path=True))
-def update_stub_file(stub_file):
-    package = griffe.load("pyarrow", try_relative_path=False, force_inspection=True, resolve_aliases=True)
-    extract_docstrings(package.as_dict(), "pyarrow")
+@click.option('--pyarrow_folder', '-f', type=click.Path(resolve_path=True))
+def update_stub_files(pyarrow_folder):
+    print("Updating docstrings of stub files in:", pyarrow_folder)
+    package = griffe.load("pyarrow", try_relative_path=True,
+                          force_inspection=True, resolve_aliases=True)
 
-    with open(stub_file, 'r') as f:
-        tree = cst.parse_module(f.read())
+    for stub_file in Path(pyarrow_folder).rglob('*.pyi'):
+        if stub_file.name == "_stubs_typing.pyi":
+            continue
 
-    cloned_signature_transformer = ClonedSignatureDocstringTransformer(docstrings_map, "pyarrow.compute")
-    function_docstring_transformer = FunctionDocstringTransformer(docstrings_map, "pyarrow.compute")
+        print(f"[{stub_file}]")
 
-    modified_tree = tree.visit(function_docstring_transformer)
-    modified_tree = modified_tree.visit(cloned_signature_transformer)
+        with open(stub_file, 'r') as f:
+            tree = libcst.parse_module(f.read())
 
+        if stub_file.name != "__init__.pyi":
+            modified_tree = tree.visit(DocUpdater(package, "lib"))
+        else:
+            modified_tree = tree.visit(DocUpdater(package, None))
+        with open(stub_file, "w") as f:
+            f.write(modified_tree.code)
 
-    # Write the modified code
-    with open(stub_file, "w") as f:
-        f.write(modified_tree.code)
 
 if __name__ == "__main__":
-    update_stub_file(obj={})
+    docstrings_map = {}
+    update_stub_files(obj={})
diff --git a/python/pyarrow/cuda.py b/python/pyarrow/cuda.py
new file mode 100644
index 00000000000..18c530d4afe
--- /dev/null
+++ b/python/pyarrow/cuda.py
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+
+from pyarrow._cuda import (Context, IpcMemHandle, CudaBuffer,
+                           HostBuffer, BufferReader, BufferWriter,
+                           new_host_buffer,
+                           serialize_record_batch, read_message,
+                           read_record_batch)

From 1c05a04550e6b41dac5781dc66eda2211d6e1399 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Mon, 15 Sep 2025 19:21:05 +0200
Subject: [PATCH 11/26] work

---
 .../{pyarrow => pyarrow-stubs}/__init__.pyi   |   0
 .../_stubs_typing.pyi                         |   0
 .../types.pyi => pyarrow-stubs/_types.pyi}    |   5 +
 .../__lib_pxi => pyarrow-stubs}/array.pyi     | 111 ++--
 .../__lib_pxi => pyarrow-stubs}/io.pyi        |   0
 python/{pyarrow => pyarrow-stubs}/lib.pyi     |  28 +-
 .../__lib_pxi => pyarrow-stubs}/memory.pyi    |   0
 python/pyarrow-stubs/py.typed                 |   0
 .../__lib_pxi => pyarrow-stubs}/scalar.pyi    |  22 +-
 .../__lib_pxi => pyarrow-stubs}/tensor.pyi    |   0
 python/pyarrow-stubs/types.pyi                | 214 ++++++
 python/pyarrow/__lib_pxi/__init__.pyi         |  16 -
 python/pyarrow/types.pyi                      | 611 ------------------
 13 files changed, 308 insertions(+), 699 deletions(-)
 rename python/{pyarrow => pyarrow-stubs}/__init__.pyi (100%)
 rename python/{pyarrow => pyarrow-stubs}/_stubs_typing.pyi (100%)
 rename python/{pyarrow/__lib_pxi/types.pyi => pyarrow-stubs/_types.pyi} (99%)
 rename python/{pyarrow/__lib_pxi => pyarrow-stubs}/array.pyi (96%)
 rename python/{pyarrow/__lib_pxi => pyarrow-stubs}/io.pyi (100%)
 rename python/{pyarrow => pyarrow-stubs}/lib.pyi (90%)
 rename python/{pyarrow/__lib_pxi => pyarrow-stubs}/memory.pyi (100%)
 create mode 100644 python/pyarrow-stubs/py.typed
 rename python/{pyarrow/__lib_pxi => pyarrow-stubs}/scalar.pyi (97%)
 rename python/{pyarrow/__lib_pxi => pyarrow-stubs}/tensor.pyi (100%)
 create mode 100644 python/pyarrow-stubs/types.pyi
 delete mode 100644 python/pyarrow/__lib_pxi/__init__.pyi
 delete mode 100644 python/pyarrow/types.pyi

diff --git a/python/pyarrow/__init__.pyi b/python/pyarrow-stubs/__init__.pyi
similarity index 100%
rename from python/pyarrow/__init__.pyi
rename to python/pyarrow-stubs/__init__.pyi
diff --git a/python/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/_stubs_typing.pyi
similarity index 100%
rename from python/pyarrow/_stubs_typing.pyi
rename to python/pyarrow-stubs/_stubs_typing.pyi
diff --git a/python/pyarrow/__lib_pxi/types.pyi b/python/pyarrow-stubs/_types.pyi
similarity index 99%
rename from python/pyarrow/__lib_pxi/types.pyi
rename to python/pyarrow-stubs/_types.pyi
index 27a2c75d68d..0c8afe2cbbb 100644
--- a/python/pyarrow/__lib_pxi/types.pyi
+++ b/python/pyarrow-stubs/_types.pyi
@@ -4282,4 +4282,9 @@ __all__ = [
     "type_for_alias",
     "schema",
     "from_numpy_dtype",
+    "_Unit",
+    "_Tz",
+    "_Time32Unit",
+    "_Time64Unit",
+
 ]
diff --git a/python/pyarrow/__lib_pxi/array.pyi b/python/pyarrow-stubs/array.pyi
similarity index 96%
rename from python/pyarrow/__lib_pxi/array.pyi
rename to python/pyarrow-stubs/array.pyi
index c6e8dfecb62..c01f167029e 100644
--- a/python/pyarrow/__lib_pxi/array.pyi
+++ b/python/pyarrow-stubs/array.pyi
@@ -55,13 +55,20 @@ from pyarrow.lib import ( # type: ignore[attr-defined]
 )
 from typing_extensions import deprecated
 
-from . import scalar, types
+from .scalar import *
 from .device import DeviceAllocationType  # type: ignore[import-not-found]
-from .scalar import Scalar
-from .types import (
+from ._types import (
+    BaseExtensionType,
+    BinaryType,
     DataType,
     Field,
+    Float64Type,
+    Int16Type,
+    Int32Type,
+    Int64Type,
     MapType,
+    StringType,
+    StructType,
     _AsPyType,
     _BasicDataType,
     _BasicValueT,
@@ -69,8 +76,12 @@ from .types import (
     _IndexT,
     _RunEndType,
     _Size,
+    _Time32Unit,
+    _Time64Unit,
+    _Tz,
+    _Unit,
 )
-from .._stubs_typing import NullableCollection
+from ._stubs_typing import NullableCollection
 
 def array(
     values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
@@ -963,7 +974,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         end: int | None = None,
         *,
         memory_pool: MemoryPool | None = None,
-    ) -> scalar.Int64Scalar | scalar.Int64Scalar:
+    ) -> Int64Scalar:
         """
         Find the first index of a value.
 
@@ -1270,12 +1281,12 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         Statistics of the array.
         """
 
-class NullArray(Array[scalar.NullScalar]):
+class NullArray(Array[NullScalar]):
     """
     Concrete class for Arrow arrays of null data type.
     """
 
-class BooleanArray(Array[scalar.BooleanScalar]):
+class BooleanArray(Array[BooleanScalar]):
     """
     Concrete class for Arrow arrays of boolean data type.
     """
@@ -1296,79 +1307,79 @@ class FloatingPointArray(NumericArray[_ScalarT]):
     """
     A base class for Arrow floating-point arrays.
     """
-class Int8Array(IntegerArray[scalar.Int8Scalar]):
+class Int8Array(IntegerArray[Int8Scalar]):
     """
     Concrete class for Arrow arrays of int8 data type.
     """
-class UInt8Array(IntegerArray[scalar.UInt8Scalar]):
+class UInt8Array(IntegerArray[UInt8Scalar]):
     """
     Concrete class for Arrow arrays of uint8 data type.
     """
-class Int16Array(IntegerArray[scalar.Int16Scalar]):
+class Int16Array(IntegerArray[Int16Scalar]):
     """
     Concrete class for Arrow arrays of int16 data type.
     """
-class UInt16Array(IntegerArray[scalar.UInt16Scalar]):
+class UInt16Array(IntegerArray[UInt16Scalar]):
     """
     Concrete class for Arrow arrays of uint16 data type.
     """
-class Int32Array(IntegerArray[scalar.Int32Scalar]):
+class Int32Array(IntegerArray[Int32Scalar]):
     """
     Concrete class for Arrow arrays of int32 data type.
     """
-class UInt32Array(IntegerArray[scalar.UInt32Scalar]):
+class UInt32Array(IntegerArray[UInt32Scalar]):
     """
     Concrete class for Arrow arrays of uint32 data type.
     """
-class Int64Array(IntegerArray[scalar.Int64Scalar]):
+class Int64Array(IntegerArray[Int64Scalar]):
     """
     Concrete class for Arrow arrays of int64 data type.
     """
-class UInt64Array(IntegerArray[scalar.UInt64Scalar]):
+class UInt64Array(IntegerArray[UInt64Scalar]):
     """
     Concrete class for Arrow arrays of uint64 data type.
     """
-class Date32Array(NumericArray[scalar.Date32Scalar]):
+class Date32Array(NumericArray[Date32Scalar]):
     """
     Concrete class for Arrow arrays of date32 data type.
     """
-class Date64Array(NumericArray[scalar.Date64Scalar]):
+class Date64Array(NumericArray[Date64Scalar]):
     """
     Concrete class for Arrow arrays of date64 data type.
     """
-class TimestampArray(NumericArray[scalar.TimestampScalar[types._Unit, types._Tz]]):
+class TimestampArray(NumericArray[TimestampScalar[_Unit, _Tz]]):
     """
     Concrete class for Arrow arrays of timestamp data type.
     """
-class Time32Array(NumericArray[scalar.Time32Scalar[types._Time32Unit]]):
+class Time32Array(NumericArray[Time32Scalar[_Time32Unit]]):
     """
     Concrete class for Arrow arrays of time32 data type.
     """
-class Time64Array(NumericArray[scalar.Time64Scalar[types._Time64Unit]]):
+class Time64Array(NumericArray[Time64Scalar[_Time64Unit]]):
     """
     Concrete class for Arrow arrays of time64 data type.
     """
-class DurationArray(NumericArray[scalar.DurationScalar[types._Unit]]):
+class DurationArray(NumericArray[DurationScalar[_Unit]]):
     """
     Concrete class for Arrow arrays of duration data type.
     """
-class MonthDayNanoIntervalArray(Array[scalar.MonthDayNanoIntervalScalar]):
+class MonthDayNanoIntervalArray(Array[MonthDayNanoIntervalScalar]):
     """
     Concrete class for Arrow arrays of interval[MonthDayNano] type.
     """
-class HalfFloatArray(FloatingPointArray[scalar.HalfFloatScalar]):
+class HalfFloatArray(FloatingPointArray[HalfFloatScalar]):
     """
     Concrete class for Arrow arrays of float16 data type.
     """
-class FloatArray(FloatingPointArray[scalar.FloatScalar]):
+class FloatArray(FloatingPointArray[FloatScalar]):
     """
     Concrete class for Arrow arrays of float32 data type.
     """
-class DoubleArray(FloatingPointArray[scalar.DoubleScalar]):
+class DoubleArray(FloatingPointArray[DoubleScalar]):
     """
     Concrete class for Arrow arrays of float64 data type.
     """
-class FixedSizeBinaryArray(Array[scalar.FixedSizeBinaryScalar]):
+class FixedSizeBinaryArray(Array[FixedSizeBinaryScalar]):
     """
     Concrete class for Arrow arrays of a fixed-size binary data type.
     """
@@ -1533,7 +1544,7 @@ class ListArray(BaseListArray[_ScalarT]):
         type: _DataTypeT | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> ListArray[scalar.ListScalar[_DataTypeT | types.Int64Type | types.Float64Type | types.StringType | types.BinaryType]] | ListArray:
+    ) -> ListArray[ListScalar[_DataTypeT | Int64Type | Float64Type | StringType | BinaryType]] | ListArray:
         """
         Construct ListArray from arrays of int32 offsets and values.
 
@@ -1679,7 +1690,7 @@ class ListArray(BaseListArray[_ScalarT]):
         ]
         """
 
-class LargeListArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
+class LargeListArray(BaseListArray[LargeListScalar[_DataTypeT]]):
     """
     Concrete class for Arrow arrays of a large list data type.
 
@@ -1797,7 +1808,7 @@ class LargeListArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
         offsets : Int64Array
         """
 
-class ListViewArray(BaseListArray[scalar.ListViewScalar[_DataTypeT]]):
+class ListViewArray(BaseListArray[ListViewScalar[_DataTypeT]]):
     """
     Concrete class for Arrow arrays of a list view data type.
     """
@@ -1992,7 +2003,7 @@ class ListViewArray(BaseListArray[scalar.ListViewScalar[_DataTypeT]]):
         ]
         """
 
-class LargeListViewArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
+class LargeListViewArray(BaseListArray[LargeListScalar[_DataTypeT]]):
     """
     Concrete class for Arrow arrays of a large list view data type.
 
@@ -2196,7 +2207,7 @@ class LargeListViewArray(BaseListArray[scalar.LargeListScalar[_DataTypeT]]):
         ]
         """
 
-class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _Size]]):
+class FixedSizeListArray(BaseListArray[FixedSizeListScalar[_DataTypeT, _Size]]):
     """
     Concrete class for Arrow arrays of a fixed size list data type.
     """
@@ -2267,7 +2278,7 @@ class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _S
         ]
         """
     @property
-    def values(self) -> BaseListArray[scalar.ListScalar[_DataTypeT]]:
+    def values(self) -> BaseListArray[ListScalar[_DataTypeT]]:
         """
         Return the underlying array of values which backs the
         FixedSizeListArray ignoring the array's offset.
@@ -2307,7 +2318,7 @@ class FixedSizeListArray(BaseListArray[scalar.FixedSizeListScalar[_DataTypeT, _S
 _MapKeyT = TypeVar("_MapKeyT", bound=_BasicDataType)
 _MapItemT = TypeVar("_MapItemT", bound=_BasicDataType)
 
-class MapArray(BaseListArray[scalar.MapScalar[_MapKeyT, _MapItemT]]):
+class MapArray(BaseListArray[MapScalar[_MapKeyT, _MapItemT]]):
     """
     Concrete class for Arrow arrays of a map data type.
     """
@@ -2425,7 +2436,7 @@ class MapArray(BaseListArray[scalar.MapScalar[_MapKeyT, _MapItemT]]):
         Flattened array of items across all maps in array
         """
 
-class UnionArray(Array[scalar.UnionScalar]):
+class UnionArray(Array[UnionScalar]):
     """
     Concrete class for Arrow arrays of a Union data type.
     """
@@ -2522,7 +2533,7 @@ class UnionArray(Array[scalar.UnionScalar]):
         union_array : UnionArray
         """
 
-class StringArray(Array[scalar.StringScalar]):
+class StringArray(Array[StringScalar]):
     """
     Concrete class for Arrow arrays of string (or utf8) data type.
     """
@@ -2554,7 +2565,7 @@ class StringArray(Array[scalar.StringScalar]):
         string_array : StringArray
         """
 
-class LargeStringArray(Array[scalar.LargeStringScalar]):
+class LargeStringArray(Array[LargeStringScalar]):
     """
     Concrete class for Arrow arrays of large string (or utf8) data type.
     """
@@ -2586,12 +2597,12 @@ class LargeStringArray(Array[scalar.LargeStringScalar]):
         string_array : StringArray
         """
 
-class StringViewArray(Array[scalar.StringViewScalar]):
+class StringViewArray(Array[StringViewScalar]):
     """
     Concrete class for Arrow arrays of string (or utf8) view data type.
     """
 
-class BinaryArray(Array[scalar.BinaryScalar]):
+class BinaryArray(Array[BinaryScalar]):
     """
     Concrete class for Arrow arrays of variable-sized binary data type.
     """
@@ -2602,7 +2613,7 @@ class BinaryArray(Array[scalar.BinaryScalar]):
         by the offsets of this BinaryArray.
         """
 
-class LargeBinaryArray(Array[scalar.LargeBinaryScalar]):
+class LargeBinaryArray(Array[LargeBinaryScalar]):
     """
     Concrete class for Arrow arrays of large variable-sized binary data type.
     """
@@ -2613,12 +2624,12 @@ class LargeBinaryArray(Array[scalar.LargeBinaryScalar]):
         by the offsets of this LargeBinaryArray.
         """
 
-class BinaryViewArray(Array[scalar.BinaryViewScalar]):
+class BinaryViewArray(Array[BinaryViewScalar]):
     """
     Concrete class for Arrow arrays of variable-sized binary view data type.
     """
 
-class DictionaryArray(Array[scalar.DictionaryScalar[_IndexT, _BasicValueT]]):
+class DictionaryArray(Array[DictionaryScalar[_IndexT, _BasicValueT]]):
     """
     Concrete class for dictionary-encoded Arrow arrays.
     """
@@ -2700,7 +2711,7 @@ class DictionaryArray(Array[scalar.DictionaryScalar[_IndexT, _BasicValueT]]):
         dict_array : DictionaryArray
         """
 
-class StructArray(Array[scalar.StructScalar]):
+class StructArray(Array[StructScalar]):
     """
     Concrete class for Arrow arrays of a struct data type.
     """
@@ -2737,7 +2748,7 @@ class StructArray(Array[scalar.StructScalar]):
         fields: list[Field] | None = None,
         mask=None,
         memory_pool: MemoryPool | None = None,
-        type: types.StructType | None = None,
+        type: StructType | None = None,
     ) -> StructArray:
         """
         Construct StructArray from collection of arrays representing
@@ -2784,7 +2795,7 @@ class StructArray(Array[scalar.StructScalar]):
         result : StructArray
         """
 
-class RunEndEncodedArray(Array[scalar.RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
+class RunEndEncodedArray(Array[RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
     """
     Concrete class for Arrow run-end encoded arrays.
     """
@@ -2793,7 +2804,7 @@ class RunEndEncodedArray(Array[scalar.RunEndEncodedScalar[_RunEndType, _BasicVal
         run_ends: Int16Array | Int32Array | Int64Array,
         values: Array,
         type: DataType | None = None,
-    ) -> RunEndEncodedArray[types.Int16Type | types.Int32Type | types.Int64Type, _BasicValueT]:  # type: ignore[type-var]
+    ) -> RunEndEncodedArray[Int16Type | Int32Type | Int64Type, _BasicValueT]:  # type: ignore[type-var]
         """
         Construct RunEndEncodedArray from run_ends and values arrays.
 
@@ -2849,14 +2860,14 @@ class RunEndEncodedArray(Array[scalar.RunEndEncodedScalar[_RunEndType, _BasicVal
         RunEndEncodedArray
         """
     @property
-    def run_ends(self) -> Array[scalar.Scalar[_RunEndType]]:
+    def run_ends(self) -> Array[Scalar[_RunEndType]]:
         """
         An array holding the logical indexes of each run-end.
 
         The physical offset to the array is applied.
         """
     @property
-    def values(self) -> Array[scalar.Scalar[_BasicValueT]]:
+    def values(self) -> Array[Scalar[_BasicValueT]]:
         """
         An array holding the values of each run.
 
@@ -2884,14 +2895,14 @@ class RunEndEncodedArray(Array[scalar.RunEndEncodedScalar[_RunEndType, _BasicVal
 
 _ArrayT = TypeVar("_ArrayT", bound=Array)
 
-class ExtensionArray(Array[scalar.ExtensionScalar], Generic[_ArrayT]):
+class ExtensionArray(Array[ExtensionScalar], Generic[_ArrayT]):
     """
     Concrete class for Arrow extension arrays.
     """
     @property
     def storage(self) -> Any: ...
     @staticmethod
-    def from_storage(typ: types.BaseExtensionType, storage: _ArrayT) -> ExtensionArray[_ArrayT]:
+    def from_storage(typ: BaseExtensionType, storage: _ArrayT) -> ExtensionArray[_ArrayT]:
         """
         Construct ExtensionArray from type and storage array.
 
@@ -3232,7 +3243,7 @@ def concat_arrays(arrays: Iterable[_ArrayT], memory_pool: MemoryPool | None = No
     ]
     """
 
-def _empty_array(type: _DataTypeT) -> Array[scalar.Scalar[_DataTypeT]]:
+def _empty_array(type: _DataTypeT) -> Array[Scalar[_DataTypeT]]:
     """
     Create empty array of the given type.
     """
diff --git a/python/pyarrow/__lib_pxi/io.pyi b/python/pyarrow-stubs/io.pyi
similarity index 100%
rename from python/pyarrow/__lib_pxi/io.pyi
rename to python/pyarrow-stubs/io.pyi
diff --git a/python/pyarrow/lib.pyi b/python/pyarrow-stubs/lib.pyi
similarity index 90%
rename from python/pyarrow/lib.pyi
rename to python/pyarrow-stubs/lib.pyi
index 9d5bd7bedb2..a1a016ef2f2 100644
--- a/python/pyarrow/lib.pyi
+++ b/python/pyarrow-stubs/lib.pyi
@@ -21,22 +21,22 @@ import datetime as dt
 from typing import NamedTuple, Literal
 from typing_extensions import TypeVar
 
-from .__lib_pxi.array import *
+from .array import *
 # TODO
-# from .__lib_pxi.benchmark import *
-# from .__lib_pxi.builder import *
-# from .__lib_pxi.compat import *
-# from .__lib_pxi.config import *
-# from .__lib_pxi.device import *
-# from .__lib_pxi.error import *
-from .__lib_pxi.io import *
+# from .benchmark import *
+# from .builder import *
+# from .compat import *
+# from .config import *
+# from .device import *
+# from .error import *
+from .io import *
 # from .__lib_pxi.ipc import *
-from .__lib_pxi.memory import *
-# from .__lib_pxi.pandas_shim import *
-from .__lib_pxi.scalar import *
-# from .__lib_pxi.table import *
-from .__lib_pxi.tensor import *
-from .__lib_pxi.types import *
+from .memory import *
+# from .pandas_shim import *
+from .scalar import *
+# from .table import *
+from .tensor import *
+from ._types import *
 
 _DataTypeT = TypeVar("_DataTypeT", bound=DataType)
 
diff --git a/python/pyarrow/__lib_pxi/memory.pyi b/python/pyarrow-stubs/memory.pyi
similarity index 100%
rename from python/pyarrow/__lib_pxi/memory.pyi
rename to python/pyarrow-stubs/memory.pyi
diff --git a/python/pyarrow-stubs/py.typed b/python/pyarrow-stubs/py.typed
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pyarrow/__lib_pxi/scalar.pyi b/python/pyarrow-stubs/scalar.pyi
similarity index 97%
rename from python/pyarrow/__lib_pxi/scalar.pyi
rename to python/pyarrow-stubs/scalar.pyi
index b979ec43a3a..2532026e7bc 100644
--- a/python/pyarrow/__lib_pxi/scalar.pyi
+++ b/python/pyarrow-stubs/scalar.pyi
@@ -35,19 +35,25 @@ from pyarrow._compute import CastOptions  # type: ignore[import-not-found]
 from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakrefable
 from typing_extensions import  TypeVar
 
-from . import types
-from .types import (
+from ._types import (
     # _AsPyType,
     _DataTypeT,
     _Time32Unit,
     _Time64Unit,
     _Tz,
     _Unit,
+    DataType,
+    ListType,
+    LargeListType,
+    ListViewType,
+    LargeListViewType,
+    FixedSizeListType,
 )
+from . import types
 
 _AsPyTypeK = TypeVar("_AsPyTypeK")
 _AsPyTypeV = TypeVar("_AsPyTypeV")
-_DataType_co = TypeVar("_DataType_co", bound=types.DataType, covariant=True)
+_DataType_co = TypeVar("_DataType_co", bound=DataType, covariant=True)
 
 class Scalar(_Weakrefable, Generic[_DataType_co]):
     """
@@ -330,7 +336,7 @@ class StringViewScalar(Scalar[types.StringViewType]):
         Return a view over this value as a Buffer object.
         """
 
-class ListScalar(Scalar[types.ListType[_DataTypeT]]):
+class ListScalar(Scalar[ListType[_DataTypeT]]):
     """
     Concrete class for list-like scalars.
     """
@@ -349,7 +355,7 @@ class ListScalar(Scalar[types.ListType[_DataTypeT]]):
         Iterate over this element's values.
         """
 
-class FixedSizeListScalar(Scalar[types.FixedSizeListType[_DataTypeT, types._Size]]):
+class FixedSizeListScalar(Scalar[FixedSizeListType[_DataTypeT, types._Size]]):
     """
     """
     @property
@@ -373,7 +379,7 @@ class FixedSizeListScalar(Scalar[types.FixedSizeListType[_DataTypeT, types._Size
         Iterate over this element's values.
         """
 
-class LargeListScalar(Scalar[types.LargeListType[_DataTypeT]]):
+class LargeListScalar(Scalar[LargeListType[_DataTypeT]]):
     """
     """
     @property
@@ -397,7 +403,7 @@ class LargeListScalar(Scalar[types.LargeListType[_DataTypeT]]):
         Iterate over this element's values.
         """
 
-class ListViewScalar(Scalar[types.ListViewType[_DataTypeT]]):
+class ListViewScalar(Scalar[ListViewType[_DataTypeT]]):
     """
     """
     @property
@@ -421,7 +427,7 @@ class ListViewScalar(Scalar[types.ListViewType[_DataTypeT]]):
         Iterate over this element's values.
         """
 
-class LargeListViewScalar(Scalar[types.LargeListViewType[_DataTypeT]]):
+class LargeListViewScalar(Scalar[LargeListViewType[_DataTypeT]]):
     """
     """
     @property
diff --git a/python/pyarrow/__lib_pxi/tensor.pyi b/python/pyarrow-stubs/tensor.pyi
similarity index 100%
rename from python/pyarrow/__lib_pxi/tensor.pyi
rename to python/pyarrow-stubs/tensor.pyi
diff --git a/python/pyarrow-stubs/types.pyi b/python/pyarrow-stubs/types.pyi
new file mode 100644
index 00000000000..98181f6acc2
--- /dev/null
+++ b/python/pyarrow-stubs/types.pyi
@@ -0,0 +1,214 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+from typing import Any
+
+if sys.version_info >= (3, 13):
+    from typing import TypeIs
+else:
+    from typing_extensions import TypeIs
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from pyarrow.lib import (
+    BinaryType,
+    BinaryViewType,
+    BoolType,
+    DataType,
+    Date32Type,
+    Date64Type,
+    Decimal32Type,
+    Decimal64Type,
+    Decimal128Type,
+    Decimal256Type,
+    DenseUnionType,
+    DictionaryType,
+    DurationType,
+    FixedSizeBinaryType,
+    FixedSizeListType,
+    Float16Type,
+    Float32Type,
+    Float64Type,
+    Int8Type,
+    Int16Type,
+    Int32Type,
+    Int64Type,
+    LargeBinaryType,
+    LargeListType,
+    LargeListViewType,
+    LargeStringType,
+    ListType,
+    ListViewType,
+    MapType,
+    MonthDayNanoIntervalType,
+    NullType,
+    RunEndEncodedType,
+    SparseUnionType,
+    StringType,
+    StringViewType,
+    StructType,
+    Time32Type,
+    Time64Type,
+    TimestampType,
+    UInt8Type,
+    UInt16Type,
+    Uint32Type,
+    UInt64Type,
+)
+
+_SignedInteger: TypeAlias = Int8Type | Int16Type | Int32Type | Int64Type
+_UnsignedInteger: TypeAlias = UInt8Type | UInt16Type | Uint32Type | UInt64Type
+_Integer: TypeAlias = _SignedInteger | _UnsignedInteger
+_Floating: TypeAlias = Float16Type | Float32Type | Float64Type
+_Decimal: TypeAlias = (
+    Decimal32Type[Any, Any]
+    | Decimal64Type[Any, Any]
+    | Decimal128Type[Any, Any]
+    | Decimal256Type[Any, Any]
+)
+_Date: TypeAlias = Date32Type | Date64Type
+_Time: TypeAlias = Time32Type[Any] | Time64Type[Any]
+_Interval: TypeAlias = MonthDayNanoIntervalType
+_Temporal: TypeAlias = TimestampType[Any, Any] | DurationType[Any] | _Time | _Date | _Interval
+_Union: TypeAlias = SparseUnionType | DenseUnionType
+_Nested: TypeAlias = (
+    ListType[Any]
+    | FixedSizeListType[Any, Any]
+    | LargeListType[Any]
+    | ListViewType[Any]
+    | LargeListViewType[Any]
+    | StructType
+    | MapType[Any, Any, Any]
+    | _Union
+)
+
+def is_null(t: DataType) -> TypeIs[NullType]: ...
+def is_boolean(t: DataType) -> TypeIs[BoolType]: ...
+def is_integer(t: DataType) -> TypeIs[_Integer]: ...
+def is_signed_integer(t: DataType) -> TypeIs[_SignedInteger]: ...
+def is_unsigned_integer(t: DataType) -> TypeIs[_UnsignedInteger]: ...
+def is_int8(t: DataType) -> TypeIs[Int8Type]: ...
+def is_int16(t: DataType) -> TypeIs[Int16Type]: ...
+def is_int32(t: DataType) -> TypeIs[Int32Type]: ...
+def is_int64(t: DataType) -> TypeIs[Int64Type]: ...
+def is_uint8(t: DataType) -> TypeIs[UInt8Type]: ...
+def is_uint16(t: DataType) -> TypeIs[UInt16Type]: ...
+def is_uint32(t: DataType) -> TypeIs[Uint32Type]: ...
+def is_uint64(t: DataType) -> TypeIs[UInt64Type]: ...
+def is_floating(t: DataType) -> TypeIs[_Floating]: ...
+def is_float16(t: DataType) -> TypeIs[Float16Type]: ...
+def is_float32(t: DataType) -> TypeIs[Float32Type]: ...
+def is_float64(t: DataType) -> TypeIs[Float64Type]: ...
+def is_list(t: DataType) -> TypeIs[ListType[Any]]: ...
+def is_large_list(t: DataType) -> TypeIs[LargeListType[Any]]: ...
+def is_fixed_size_list(t: DataType) -> TypeIs[FixedSizeListType[Any, Any]]: ...
+def is_list_view(t: DataType) -> TypeIs[ListViewType[Any]]: ...
+def is_large_list_view(t: DataType) -> TypeIs[LargeListViewType[Any]]: ...
+def is_struct(t: DataType) -> TypeIs[StructType]: ...
+def is_union(t: DataType) -> TypeIs[_Union]: ...
+def is_nested(t: DataType) -> TypeIs[_Nested]: ...
+def is_run_end_encoded(t: DataType) -> TypeIs[RunEndEncodedType[Any, Any]]: ...
+def is_temporal(t: DataType) -> TypeIs[_Temporal]: ...
+def is_timestamp(t: DataType) -> TypeIs[TimestampType[Any, Any]]: ...
+def is_duration(t: DataType) -> TypeIs[DurationType[Any]]: ...
+def is_time(t: DataType) -> TypeIs[_Time]: ...
+def is_time32(t: DataType) -> TypeIs[Time32Type[Any]]: ...
+def is_time64(t: DataType) -> TypeIs[Time64Type[Any]]: ...
+def is_binary(t: DataType) -> TypeIs[BinaryType]: ...
+def is_large_binary(t: DataType) -> TypeIs[LargeBinaryType]: ...
+def is_unicode(t: DataType) -> TypeIs[StringType]: ...
+def is_string(t: DataType) -> TypeIs[StringType]: ...
+def is_large_unicode(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_large_string(t: DataType) -> TypeIs[LargeStringType]: ...
+def is_fixed_size_binary(t: DataType) -> TypeIs[FixedSizeBinaryType]: ...
+def is_binary_view(t: DataType) -> TypeIs[BinaryViewType]: ...
+def is_string_view(t: DataType) -> TypeIs[StringViewType]: ...
+def is_date(t: DataType) -> TypeIs[_Date]: ...
+def is_date32(t: DataType) -> TypeIs[Date32Type]: ...
+def is_date64(t: DataType) -> TypeIs[Date64Type]: ...
+def is_map(t: DataType) -> TypeIs[MapType[Any, Any, Any]]: ...
+def is_decimal(t: DataType) -> TypeIs[_Decimal]: ...
+def is_decimal32(t: DataType) -> TypeIs[Decimal32Type[Any, Any]]: ...
+def is_decimal64(t: DataType) -> TypeIs[Decimal64Type[Any, Any]]: ...
+def is_decimal128(t: DataType) -> TypeIs[Decimal128Type[Any, Any]]: ...
+def is_decimal256(t: DataType) -> TypeIs[Decimal256Type[Any, Any]]: ...
+def is_dictionary(t: DataType) -> TypeIs[DictionaryType[Any, Any, Any]]: ...
+def is_interval(t: DataType) -> TypeIs[_Interval]: ...
+def is_primitive(t: DataType) -> bool: ...
+def is_boolean_value(obj: Any) -> bool: ...
+def is_integer_value(obj: Any) -> bool: ...
+def is_float_value(obj: Any) -> bool: ...
+
+__all__ = [
+    "is_binary",
+    "is_binary_view",
+    "is_boolean",
+    "is_date",
+    "is_date32",
+    "is_date64",
+    "is_decimal",
+    "is_decimal128",
+    "is_decimal256",
+    "is_decimal32",
+    "is_decimal64",
+    "is_dictionary",
+    "is_duration",
+    "is_fixed_size_binary",
+    "is_fixed_size_list",
+    "is_float16",
+    "is_float32",
+    "is_float64",
+    "is_floating",
+    "is_int16",
+    "is_int32",
+    "is_int64",
+    "is_int8",
+    "is_integer",
+    "is_interval",
+    "is_large_binary",
+    "is_large_list",
+    "is_large_list_view",
+    "is_large_string",
+    "is_large_unicode",
+    "is_list",
+    "is_list_view",
+    "is_map",
+    "is_nested",
+    "is_null",
+    "is_primitive",
+    "is_run_end_encoded",
+    "is_signed_integer",
+    "is_string",
+    "is_string_view",
+    "is_struct",
+    "is_temporal",
+    "is_time",
+    "is_time32",
+    "is_time64",
+    "is_timestamp",
+    "is_uint16",
+    "is_uint32",
+    "is_uint64",
+    "is_uint8",
+    "is_unicode",
+    "is_union",
+    "is_unsigned_integer",
+]
diff --git a/python/pyarrow/__lib_pxi/__init__.pyi b/python/pyarrow/__lib_pxi/__init__.pyi
deleted file mode 100644
index 13a83393a91..00000000000
--- a/python/pyarrow/__lib_pxi/__init__.pyi
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/python/pyarrow/types.pyi b/python/pyarrow/types.pyi
deleted file mode 100644
index 1d1554da520..00000000000
--- a/python/pyarrow/types.pyi
+++ /dev/null
@@ -1,611 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import sys
-
-from typing import Any
-
-if sys.version_info >= (3, 13):
-    from typing import TypeIs
-else:
-    from typing_extensions import TypeIs
-if sys.version_info >= (3, 10):
-    from typing import TypeAlias
-else:
-    from typing_extensions import TypeAlias
-
-from pyarrow.lib import (
-    BinaryType,
-    BinaryViewType,
-    BoolType,
-    DataType,
-    Date32Type,
-    Date64Type,
-    Decimal32Type,
-    Decimal64Type,
-    Decimal128Type,
-    Decimal256Type,
-    DenseUnionType,
-    DictionaryType,
-    DurationType,
-    FixedSizeBinaryType,
-    FixedSizeListType,
-    Float16Type,
-    Float32Type,
-    Float64Type,
-    Int8Type,
-    Int16Type,
-    Int32Type,
-    Int64Type,
-    LargeBinaryType,
-    LargeListType,
-    LargeListViewType,
-    LargeStringType,
-    ListType,
-    ListViewType,
-    MapType,
-    MonthDayNanoIntervalType,
-    NullType,
-    RunEndEncodedType,
-    SparseUnionType,
-    StringType,
-    StringViewType,
-    StructType,
-    Time32Type,
-    Time64Type,
-    TimestampType,
-    UInt8Type,
-    UInt16Type,
-    Uint32Type,
-    UInt64Type,
-)
-
-_SignedInteger: TypeAlias = Int8Type | Int16Type | Int32Type | Int64Type
-_UnsignedInteger: TypeAlias = UInt8Type | UInt16Type | Uint32Type | UInt64Type
-_Integer: TypeAlias = _SignedInteger | _UnsignedInteger
-_Floating: TypeAlias = Float16Type | Float32Type | Float64Type
-_Decimal: TypeAlias = (
-    Decimal32Type[Any, Any]
-    | Decimal64Type[Any, Any]
-    | Decimal128Type[Any, Any]
-    | Decimal256Type[Any, Any]
-)
-_Date: TypeAlias = Date32Type | Date64Type
-_Time: TypeAlias = Time32Type[Any] | Time64Type[Any]
-_Interval: TypeAlias = MonthDayNanoIntervalType
-_Temporal: TypeAlias = TimestampType[Any, Any] | DurationType[Any] | _Time | _Date | _Interval
-_Union: TypeAlias = SparseUnionType | DenseUnionType
-_Nested: TypeAlias = (
-    ListType[Any]
-    | FixedSizeListType[Any, Any]
-    | LargeListType[Any]
-    | ListViewType[Any]
-    | LargeListViewType[Any]
-    | StructType
-    | MapType[Any, Any, Any]
-    | _Union
-)
-
-def is_null(t: DataType) -> TypeIs[NullType]:
-    """
-    Return True if value is an instance of type: null.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_boolean(t: DataType) -> TypeIs[BoolType]:
-    """
-    Return True if value is an instance of type: boolean.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_integer(t: DataType) -> TypeIs[_Integer]:
-    """
-    Return True if value is an instance of type: any integer.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_signed_integer(t: DataType) -> TypeIs[_SignedInteger]:
-    """
-    Return True if value is an instance of type: signed integer.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_unsigned_integer(t: DataType) -> TypeIs[_UnsignedInteger]:
-    """
-    Return True if value is an instance of type: unsigned integer.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_int8(t: DataType) -> TypeIs[Int8Type]:
-    """
-    Return True if value is an instance of type: int8.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_int16(t: DataType) -> TypeIs[Int16Type]:
-    """
-    Return True if value is an instance of type: int16.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_int32(t: DataType) -> TypeIs[Int32Type]:
-    """
-    Return True if value is an instance of type: int32.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_int64(t: DataType) -> TypeIs[Int64Type]:
-    """
-    Return True if value is an instance of type: int64.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_uint8(t: DataType) -> TypeIs[UInt8Type]:
-    """
-    Return True if value is an instance of type: uint8.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_uint16(t: DataType) -> TypeIs[UInt16Type]:
-    """
-    Return True if value is an instance of type: uint16.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_uint32(t: DataType) -> TypeIs[Uint32Type]:
-    """
-    Return True if value is an instance of type: uint32.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_uint64(t: DataType) -> TypeIs[UInt64Type]:
-    """
-    Return True if value is an instance of type: uint64.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_floating(t: DataType) -> TypeIs[_Floating]:
-    """
-    Return True if value is an instance of type: floating point numeric.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_float16(t: DataType) -> TypeIs[Float16Type]:
-    """
-    Return True if value is an instance of type: float16 (half-precision).
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_float32(t: DataType) -> TypeIs[Float32Type]:
-    """
-    Return True if value is an instance of type: float32 (single precision).
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_float64(t: DataType) -> TypeIs[Float64Type]:
-    """
-    Return True if value is an instance of type: float64 (double precision).
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_list(t: DataType) -> TypeIs[ListType[Any]]:
-    """
-    Return True if value is an instance of type: list.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_large_list(t: DataType) -> TypeIs[LargeListType[Any]]:
-    """
-    Return True if value is an instance of type: large list.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_fixed_size_list(t: DataType) -> TypeIs[FixedSizeListType[Any, Any]]:
-    """
-    Return True if value is an instance of type: fixed size list.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_list_view(t: DataType) -> TypeIs[ListViewType[Any]]:
-    """
-    Return True if value is an instance of type: list view.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_large_list_view(t: DataType) -> TypeIs[LargeListViewType[Any]]:
-    """
-    Return True if value is an instance of type: large list view.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_struct(t: DataType) -> TypeIs[StructType]:
-    """
-    Return True if value is an instance of type: struct.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_union(t: DataType) -> TypeIs[_Union]:
-    """
-    Return True if value is an instance of type: union.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_nested(t: DataType) -> TypeIs[_Nested]:
-    """
-    Return True if value is an instance of type: nested type.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_run_end_encoded(t: DataType) -> TypeIs[RunEndEncodedType[Any, Any]]:
-    """
-    Return True if value is an instance of type: run-end encoded.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_temporal(t: DataType) -> TypeIs[_Temporal]:
-    """
-    Return True if value is an instance of type: date, time, timestamp or duration.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_timestamp(t: DataType) -> TypeIs[TimestampType[Any, Any]]:
-    """
-    Return True if value is an instance of type: timestamp.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_duration(t: DataType) -> TypeIs[DurationType[Any]]:
-    """
-    Return True if value is an instance of type: duration.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_time(t: DataType) -> TypeIs[_Time]:
-    """
-    Return True if value is an instance of type: time.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_time32(t: DataType) -> TypeIs[Time32Type[Any]]:
-    """
-    Return True if value is an instance of type: time32.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_time64(t: DataType) -> TypeIs[Time64Type[Any]]:
-    """
-    Return True if value is an instance of type: time64.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_binary(t: DataType) -> TypeIs[BinaryType]:
-    """
-    Return True if value is an instance of type: variable-length binary.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_large_binary(t: DataType) -> TypeIs[LargeBinaryType]:
-    """
-    Return True if value is an instance of type: large variable-length binary.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_unicode(t: DataType) -> TypeIs[StringType]:
-    """
-    Alias for is_string.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_string(t: DataType) -> TypeIs[StringType]:
-    """
-    Return True if value is an instance of type: string (utf8 unicode).
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_large_unicode(t: DataType) -> TypeIs[LargeStringType]:
-    """
-    Alias for is_large_string.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_large_string(t: DataType) -> TypeIs[LargeStringType]:
-    """
-    Return True if value is an instance of type: large string (utf8 unicode).
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_fixed_size_binary(t: DataType) -> TypeIs[FixedSizeBinaryType]:
-    """
-    Return True if value is an instance of type: fixed size binary.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_binary_view(t: DataType) -> TypeIs[BinaryViewType]:
-    """
-    Return True if value is an instance of type: variable-length binary view.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_string_view(t: DataType) -> TypeIs[StringViewType]:
-    """
-    Return True if value is an instance of type: variable-length string (utf-8) view.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_date(t: DataType) -> TypeIs[_Date]:
-    """
-    Return True if value is an instance of type: date.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_date32(t: DataType) -> TypeIs[Date32Type]:
-    """
-    Return True if value is an instance of type: date32 (days).
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_date64(t: DataType) -> TypeIs[Date64Type]:
-    """
-    Return True if value is an instance of type: date64 (milliseconds).
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_map(t: DataType) -> TypeIs[MapType[Any, Any, Any]]:
-    """
-    Return True if value is an instance of type: map.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_decimal(t: DataType) -> TypeIs[_Decimal]:
-    """
-    Return True if value is an instance of type: decimal.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_decimal32(t: DataType) -> TypeIs[Decimal32Type[Any, Any]]:
-    """
-    Return True if value is an instance of type: decimal32.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_decimal64(t: DataType) -> TypeIs[Decimal64Type[Any, Any]]:
-    """
-    Return True if value is an instance of type: decimal64.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_decimal128(t: DataType) -> TypeIs[Decimal128Type[Any, Any]]:
-    """
-    Return True if value is an instance of type: decimal128.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_decimal256(t: DataType) -> TypeIs[Decimal256Type[Any, Any]]:
-    """
-    Return True if value is an instance of type: decimal256.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_dictionary(t: DataType) -> TypeIs[DictionaryType[Any, Any, Any]]:
-    """
-    Return True if value is an instance of type: dictionary-encoded.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_interval(t: DataType) -> TypeIs[_Interval]:
-    """
-    Return True if value is an instance of type: interval.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_primitive(t: DataType) -> bool:
-    """
-    Return True if value is an instance of type: primitive type.
-
-    Parameters
-    ----------
-    t : DataType
-    """
-def is_boolean_value(obj: Any) -> bool:
-    """
-    Check if the object is a boolean.
-
-    Parameters
-    ----------
-    obj : object
-        The object to check
-    """
-
-def is_integer_value(obj: Any) -> bool:
-    """
-    Check if the object is an integer.
-
-    Parameters
-    ----------
-    obj : object
-        The object to check
-    """
-
-def is_float_value(obj: Any) -> bool:
-    """
-    Check if the object is a float.
-
-    Parameters
-    ----------
-    obj : object
-        The object to check
-    """
-
-__all__ = [
-    "is_binary",
-    "is_binary_view",
-    "is_boolean",
-    "is_date",
-    "is_date32",
-    "is_date64",
-    "is_decimal",
-    "is_decimal128",
-    "is_decimal256",
-    "is_decimal32",
-    "is_decimal64",
-    "is_dictionary",
-    "is_duration",
-    "is_fixed_size_binary",
-    "is_fixed_size_list",
-    "is_float16",
-    "is_float32",
-    "is_float64",
-    "is_floating",
-    "is_int16",
-    "is_int32",
-    "is_int64",
-    "is_int8",
-    "is_integer",
-    "is_interval",
-    "is_large_binary",
-    "is_large_list",
-    "is_large_list_view",
-    "is_large_string",
-    "is_large_unicode",
-    "is_list",
-    "is_list_view",
-    "is_map",
-    "is_nested",
-    "is_null",
-    "is_primitive",
-    "is_run_end_encoded",
-    "is_signed_integer",
-    "is_string",
-    "is_string_view",
-    "is_struct",
-    "is_temporal",
-    "is_time",
-    "is_time32",
-    "is_time64",
-    "is_timestamp",
-    "is_uint16",
-    "is_uint32",
-    "is_uint64",
-    "is_uint8",
-    "is_unicode",
-    "is_union",
-    "is_unsigned_integer",
-]

From 6dacfe1fc43361eb0922a061cfde71eb635d42db Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Mon, 15 Sep 2025 22:01:18 +0200
Subject: [PATCH 12/26] work

---
 python/pyarrow-stubs/_types.pyi |   8 +--
 python/pyarrow-stubs/array.pyi  |   5 +-
 python/pyarrow-stubs/io.pyi     |  11 ++--
 python/pyarrow-stubs/lib.pyi    |   1 -
 python/pyarrow-stubs/py.typed   |  16 +++++
 python/pyarrow-stubs/scalar.pyi | 113 +++++++++++++++++---------------
 python/pyarrow-stubs/tensor.pyi |   1 +
 7 files changed, 92 insertions(+), 63 deletions(-)

diff --git a/python/pyarrow-stubs/_types.pyi b/python/pyarrow-stubs/_types.pyi
index 0c8afe2cbbb..32543d4b04b 100644
--- a/python/pyarrow-stubs/_types.pyi
+++ b/python/pyarrow-stubs/_types.pyi
@@ -46,9 +46,6 @@ from typing_extensions import TypeVar, deprecated
 from .io import Buffer
 from .scalar import ExtensionScalar
 
-_AsPyType = TypeVar("_AsPyType")
-_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
-
 class _Weakrefable: ...
 class _Metadata(_Weakrefable): ...
 
@@ -207,6 +204,9 @@ class DataType(_Weakrefable):
             ArrowSchema pointer.
         """
 
+_AsPyType = TypeVar("_AsPyType")
+_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
+
 class _BasicDataType(DataType, Generic[_AsPyType]): ...
 class NullType(_BasicDataType[None]): ...
 class BoolType(_BasicDataType[bool]): ...
@@ -4286,5 +4286,5 @@ __all__ = [
     "_Tz",
     "_Time32Unit",
     "_Time64Unit",
-
+    "_DataTypeT",
 ]
diff --git a/python/pyarrow-stubs/array.pyi b/python/pyarrow-stubs/array.pyi
index c01f167029e..fcd9ec8f135 100644
--- a/python/pyarrow-stubs/array.pyi
+++ b/python/pyarrow-stubs/array.pyi
@@ -54,6 +54,7 @@ from pyarrow.lib import ( # type: ignore[attr-defined]
     _Weakrefable,
 )
 from typing_extensions import deprecated
+import builtins
 
 from .scalar import *
 from .device import DeviceAllocationType  # type: ignore[import-not-found]
@@ -891,7 +892,7 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         result : Array
             A new array with nulls replaced by the given value.
         """
-    def __getitem__(self, key: int | slice) -> _Scalar_co | Self:
+    def __getitem__(self, key: int | builtins.slice) -> _Scalar_co | Self:
         """
         Slice or return value at given index
 
@@ -2488,7 +2489,7 @@ class UnionArray(Array[UnionScalar]):
         """
     @staticmethod
     def from_dense(
-        type: Int8Array,
+        types: Int8Array,
         value_offsets: Int32Array,
         children: NullableCollection[Array],
         field_names: list[str] | None = None,
diff --git a/python/pyarrow-stubs/io.pyi b/python/pyarrow-stubs/io.pyi
index ebcfa8c470b..b8404225e18 100644
--- a/python/pyarrow-stubs/io.pyi
+++ b/python/pyarrow-stubs/io.pyi
@@ -32,12 +32,13 @@ else:
     from typing_extensions import TypeAlias
 
 from typing import Any, Literal, SupportsIndex
+import builtins
 
 from pyarrow._stubs_typing import Compression, SupportPyBuffer
 from pyarrow.lib import MemoryPool, _Weakrefable
 
 # from .device import Device, DeviceAllocationType, MemoryManager
-from .types import KeyValueMetadata
+from ._types import KeyValueMetadata
 
 def have_libhdfs() -> bool:
     """
@@ -205,7 +206,7 @@ class NativeFile(_Weakrefable):
         -------
         stream : NativeFile
         """
-    def read_at(self) -> bytes:
+    def read_at(self, nbytes: int, offset: int) -> bytes:
         """
         Read indicated number of bytes at offset from the file
 
@@ -218,7 +219,7 @@ class NativeFile(_Weakrefable):
         -------
         data : bytes
         """
-    def read1(self) -> bytes:
+    def read1(self, nbytes: int | None = None) -> bytes:
         """
         Read and return up to n bytes.
 
@@ -324,6 +325,8 @@ class NativeFile(_Weakrefable):
             The buffer size to use for data transfers.
         """
 
+    def writable(self): ...
+
 # ----------------------------------------------------------------------
 # Python file-like objects
 
@@ -632,7 +635,7 @@ class Buffer(_Weakrefable):
     #     """
     @property
     def parent(self) -> Buffer | None: ...
-    def __getitem__(self, key: slice | int) -> Self | int:
+    def __getitem__(self, key: builtins.slice | int) -> Self | int:
         """
         Return self[key].
         """
diff --git a/python/pyarrow-stubs/lib.pyi b/python/pyarrow-stubs/lib.pyi
index a1a016ef2f2..527f946b53a 100644
--- a/python/pyarrow-stubs/lib.pyi
+++ b/python/pyarrow-stubs/lib.pyi
@@ -38,7 +38,6 @@ from .scalar import *
 from .tensor import *
 from ._types import *
 
-_DataTypeT = TypeVar("_DataTypeT", bound=DataType)
 
 class MonthDayNano(NamedTuple):
     days: int
diff --git a/python/pyarrow-stubs/py.typed b/python/pyarrow-stubs/py.typed
index e69de29bb2d..13a83393a91 100644
--- a/python/pyarrow-stubs/py.typed
+++ b/python/pyarrow-stubs/py.typed
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyarrow-stubs/scalar.pyi b/python/pyarrow-stubs/scalar.pyi
index 2532026e7bc..0bcd97dd038 100644
--- a/python/pyarrow-stubs/scalar.pyi
+++ b/python/pyarrow-stubs/scalar.pyi
@@ -36,7 +36,6 @@ from pyarrow.lib import Array, Buffer, MemoryPool, MonthDayNano, Tensor, _Weakre
 from typing_extensions import  TypeVar
 
 from ._types import (
-    # _AsPyType,
     _DataTypeT,
     _Time32Unit,
     _Time64Unit,
@@ -49,7 +48,17 @@ from ._types import (
     LargeListViewType,
     FixedSizeListType,
 )
-from . import types
+from ._types import (
+    Decimal256Type, _Precision, _Scale, NullType, BoolType, UInt8Type, Int8Type,
+    UInt16Type, Int16Type, Uint32Type, Int32Type, UInt64Type, Int64Type,
+    Float16Type, Float32Type, Float64Type, Decimal32Type, Decimal64Type,
+    Decimal128Type, Date32Type, Date64Type, Time32Type, Time64Type, TimestampType,
+    _Size, DurationType, MonthDayNanoIntervalType, BinaryType, LargeBinaryType,
+    FixedSizeBinaryType, StringType, LargeStringType, BinaryViewType, StringViewType,
+    StructType, _K, _ValueT, _IndexT, _BasicValueT, RunEndEncodedType, _RunEndType,
+    UnionType, ExtensionType, BaseExtensionType, Bool8Type, UuidType, JsonType,
+    OpaqueType, DictionaryType, MapType, _BasicDataType,
+)
 
 _AsPyTypeK = TypeVar("_AsPyTypeK")
 _AsPyTypeV = TypeVar("_AsPyTypeV")
@@ -148,115 +157,115 @@ class Scalar(_Weakrefable, Generic[_DataType_co]):
 _NULL: TypeAlias = None
 NA = _NULL
 
-class NullScalar(Scalar[types.NullType]):
+class NullScalar(Scalar[NullType]):
     """
     Concrete class for null scalars.
     """
-class BooleanScalar(Scalar[types.BoolType]):
+class BooleanScalar(Scalar[BoolType]):
     """
     Concrete class for boolean scalars.
     """
-class UInt8Scalar(Scalar[types.UInt8Type]):
+class UInt8Scalar(Scalar[UInt8Type]):
     """
     Concrete class for uint8 scalars.
     """
-class Int8Scalar(Scalar[types.Int8Type]):
+class Int8Scalar(Scalar[Int8Type]):
     """
     Concrete class for int8 scalars.
     """
-class UInt16Scalar(Scalar[types.UInt16Type]):
+class UInt16Scalar(Scalar[UInt16Type]):
     """
     Concrete class for uint16 scalars.
     """
-class Int16Scalar(Scalar[types.Int16Type]):
+class Int16Scalar(Scalar[Int16Type]):
     """
     Concrete class for int16 scalars.
     """
-class UInt32Scalar(Scalar[types.Uint32Type]):
+class UInt32Scalar(Scalar[Uint32Type]):
     """
     Concrete class for uint32 scalars.
     """
-class Int32Scalar(Scalar[types.Int32Type]):
+class Int32Scalar(Scalar[Int32Type]):
     """
     Concrete class for int32 scalars.
     """
-class UInt64Scalar(Scalar[types.UInt64Type]):
+class UInt64Scalar(Scalar[UInt64Type]):
     """
     Concrete class for uint64 scalars.
     """
-class Int64Scalar(Scalar[types.Int64Type]):
+class Int64Scalar(Scalar[Int64Type]):
     """
     Concrete class for int64 scalars.
     """
-class HalfFloatScalar(Scalar[types.Float16Type]):
+class HalfFloatScalar(Scalar[Float16Type]):
     """
     Concrete class for float scalars.
     """
-class FloatScalar(Scalar[types.Float32Type]):
+class FloatScalar(Scalar[Float32Type]):
     """
     Concrete class for float scalars.
     """
-class DoubleScalar(Scalar[types.Float64Type]):
+class DoubleScalar(Scalar[Float64Type]):
     """
     Concrete class for double scalars.
     """
-class Decimal32Scalar(Scalar[types.Decimal32Type[types._Precision, types._Scale]]):
+class Decimal32Scalar(Scalar[Decimal32Type[_Precision, _Scale]]):
     """
     Concrete class for decimal32 scalars.
     """
-class Decimal64Scalar(Scalar[types.Decimal64Type[types._Precision, types._Scale]]):
+class Decimal64Scalar(Scalar[Decimal64Type[_Precision, _Scale]]):
     """
     Concrete class for decimal64 scalars.
     """
-class Decimal128Scalar(Scalar[types.Decimal128Type[types._Precision, types._Scale]]):
+class Decimal128Scalar(Scalar[Decimal128Type[_Precision, _Scale]]):
     """
     Concrete class for decimal128 scalars.
     """
-class Decimal256Scalar(Scalar[types.Decimal256Type[types._Precision, types._Scale]]):
+class Decimal256Scalar(Scalar[Decimal256Type[_Precision, _Scale]]):
     """
     Concrete class for decimal256 scalars.
     """
-class Date32Scalar(Scalar[types.Date32Type]):
+class Date32Scalar(Scalar[Date32Type]):
     """
     Concrete class for date32 scalars.
     """
 
-class Date64Scalar(Scalar[types.Date64Type]):
+class Date64Scalar(Scalar[Date64Type]):
     """
     Concrete class for date64 scalars.
     """
     @property
     def value(self) -> dt.date | None: ...
 
-class Time32Scalar(Scalar[types.Time32Type[_Time32Unit]]):
+class Time32Scalar(Scalar[Time32Type[_Time32Unit]]):
     """
     Concrete class for time32 scalars.
     """
     @property
     def value(self) -> dt.time | None: ...
 
-class Time64Scalar(Scalar[types.Time64Type[_Time64Unit]]):
+class Time64Scalar(Scalar[Time64Type[_Time64Unit]]):
     """
     Concrete class for time64 scalars.
     """
     @property
     def value(self) -> dt.time | None: ...
 
-class TimestampScalar(Scalar[types.TimestampType[_Unit, _Tz]]):
+class TimestampScalar(Scalar[TimestampType[_Unit, _Tz]]):
     """
     Concrete class for timestamp scalars.
     """
     @property
     def value(self) -> int | None: ...
 
-class DurationScalar(Scalar[types.DurationType[_Unit]]):
+class DurationScalar(Scalar[DurationType[_Unit]]):
     """
     Concrete class for duration scalars.
     """
     @property
     def value(self) -> dt.timedelta | None: ...
 
-class MonthDayNanoIntervalScalar(Scalar[types.MonthDayNanoIntervalType]):
+class MonthDayNanoIntervalScalar(Scalar[MonthDayNanoIntervalType]):
     """
     Concrete class for month, day, nanosecond interval scalars.
     """
@@ -266,7 +275,7 @@ class MonthDayNanoIntervalScalar(Scalar[types.MonthDayNanoIntervalType]):
         Same as self.as_py()
         """
 
-class BinaryScalar(Scalar[types.BinaryType]):
+class BinaryScalar(Scalar[BinaryType]):
     """
     Concrete class for binary-like scalars.
     """
@@ -275,7 +284,7 @@ class BinaryScalar(Scalar[types.BinaryType]):
         Return a view over this value as a Buffer object.
         """
 
-class LargeBinaryScalar(Scalar[types.LargeBinaryType]):
+class LargeBinaryScalar(Scalar[LargeBinaryType]):
     """
     """
     def as_buffer(self) -> Buffer:
@@ -285,7 +294,7 @@ class LargeBinaryScalar(Scalar[types.LargeBinaryType]):
         Return a view over this value as a Buffer object.
         """
 
-class FixedSizeBinaryScalar(Scalar[types.FixedSizeBinaryType]):
+class FixedSizeBinaryScalar(Scalar[FixedSizeBinaryType]):
     """
     """
     def as_buffer(self) -> Buffer:
@@ -295,7 +304,7 @@ class FixedSizeBinaryScalar(Scalar[types.FixedSizeBinaryType]):
         Return a view over this value as a Buffer object.
         """
 
-class StringScalar(Scalar[types.StringType]):
+class StringScalar(Scalar[StringType]):
     """
     Concrete class for string-like (utf8) scalars.
     """
@@ -306,7 +315,7 @@ class StringScalar(Scalar[types.StringType]):
         Return a view over this value as a Buffer object.
         """
 
-class LargeStringScalar(Scalar[types.LargeStringType]):
+class LargeStringScalar(Scalar[LargeStringType]):
     """
     """
     def as_buffer(self) -> Buffer:
@@ -316,7 +325,7 @@ class LargeStringScalar(Scalar[types.LargeStringType]):
         Return a view over this value as a Buffer object.
         """
 
-class BinaryViewScalar(Scalar[types.BinaryViewType]):
+class BinaryViewScalar(Scalar[BinaryViewType]):
     """
     """
     def as_buffer(self) -> Buffer:
@@ -326,7 +335,7 @@ class BinaryViewScalar(Scalar[types.BinaryViewType]):
         Return a view over this value as a Buffer object.
         """
 
-class StringViewScalar(Scalar[types.StringViewType]):
+class StringViewScalar(Scalar[StringViewType]):
     """
     """
     def as_buffer(self) -> Buffer:
@@ -355,7 +364,7 @@ class ListScalar(Scalar[ListType[_DataTypeT]]):
         Iterate over this element's values.
         """
 
-class FixedSizeListScalar(Scalar[FixedSizeListType[_DataTypeT, types._Size]]):
+class FixedSizeListScalar(Scalar[FixedSizeListType[_DataTypeT, _Size]]):
     """
     """
     @property
@@ -451,7 +460,7 @@ class LargeListViewScalar(Scalar[LargeListViewType[_DataTypeT]]):
         Iterate over this element's values.
         """
 
-class StructScalar(Scalar[types.StructType], collections.abc.Mapping[str, Scalar]):
+class StructScalar(Scalar[StructType], collections.abc.Mapping[str, Scalar]):
     """
     Concrete class for struct scalars.
     """
@@ -478,7 +487,7 @@ class StructScalar(Scalar[types.StructType], collections.abc.Mapping[str, Scalar
         """
     def _as_py_tuple(self) -> list[tuple[str, Any]]: ...
 
-class MapScalar(Scalar[types.MapType[types._K, types._ValueT]]):
+class MapScalar(Scalar[MapType[_K, _ValueT]]):
     """
     Concrete class for map scalars.
     """
@@ -490,48 +499,48 @@ class MapScalar(Scalar[types.MapType[types._K, types._ValueT]]):
 
         Return the number of values.
         """
-    def __getitem__(self, i: int) -> tuple[Scalar[types._K], types._ValueT, Any]:
+    def __getitem__(self, i: int) -> tuple[Scalar[_K], _ValueT, Any]:
         """
         Return the value at the given index or key.
         """
     def __iter__(
         self: Scalar[
-            types.MapType[types._BasicDataType[_AsPyTypeK], types._BasicDataType[_AsPyTypeV]],]
-            | Scalar[types.MapType[Any, types._BasicDataType[_AsPyTypeV]]]
-            | Scalar[types.MapType[types._BasicDataType[_AsPyTypeK], Any]]
+            MapType[_BasicDataType[_AsPyTypeK], _BasicDataType[_AsPyTypeV]],]
+            | Scalar[MapType[Any, _BasicDataType[_AsPyTypeV]]]
+            | Scalar[MapType[_BasicDataType[_AsPyTypeK], Any]]
     ) -> Iterator[tuple[_AsPyTypeK, _AsPyTypeV]] | Iterator[tuple[Any, _AsPyTypeV]] | Iterator[tuple[_AsPyTypeK, Any]]:
         """
         Iterate over this element's values.
         """
 
-class DictionaryScalar(Scalar[types.DictionaryType[types._IndexT, types._BasicValueT]]):
+class DictionaryScalar(Scalar[DictionaryType[_IndexT, _BasicValueT]]):
     """
     Concrete class for dictionary-encoded scalars.
     """
     @property
-    def index(self) -> Scalar[types._IndexT]:
+    def index(self) -> Scalar[_IndexT]:
         """
         Return this value's underlying index as a scalar.
         """
     @property
-    def value(self) -> Scalar[types._BasicValueT]:
+    def value(self) -> Scalar[_BasicValueT]:
         """
         Return the encoded value as a scalar.
         """
     @property
     def dictionary(self) -> Array: ...
 
-class RunEndEncodedScalar(Scalar[types.RunEndEncodedType[types._RunEndType, types._BasicValueT]]):
+class RunEndEncodedScalar(Scalar[RunEndEncodedType[_RunEndType, _BasicValueT]]):
     """
     Concrete class for RunEndEncoded scalars.
     """
     @property
-    def value(self) -> tuple[int, types._BasicValueT] | None:
+    def value(self) -> tuple[int, _BasicValueT] | None:
         """
         Return underlying value as a scalar.
         """
 
-class UnionScalar(Scalar[types.UnionType]):
+class UnionScalar(Scalar[UnionType]):
     """
     Concrete class for Union scalars.
     """
@@ -546,7 +555,7 @@ class UnionScalar(Scalar[types.UnionType]):
         Return the union type code for this scalar.
         """
 
-class ExtensionScalar(Scalar[types.ExtensionType]):
+class ExtensionScalar(Scalar[ExtensionType]):
     """
     Concrete class for Extension scalars.
     """
@@ -556,7 +565,7 @@ class ExtensionScalar(Scalar[types.ExtensionType]):
         Return storage value as a scalar.
         """
     @staticmethod
-    def from_storage(typ: types.BaseExtensionType, value) -> ExtensionScalar:
+    def from_storage(typ: BaseExtensionType, value) -> ExtensionScalar:
         """
         Construct ExtensionScalar from type and storage value.
 
@@ -572,19 +581,19 @@ class ExtensionScalar(Scalar[types.ExtensionType]):
         ext_scalar : ExtensionScalar
         """
 
-class Bool8Scalar(Scalar[types.Bool8Type]):
+class Bool8Scalar(Scalar[Bool8Type]):
     """
     Concrete class for bool8 extension scalar.
     """
-class UuidScalar(Scalar[types.UuidType]):
+class UuidScalar(Scalar[UuidType]):
     """
     Concrete class for Uuid extension scalar.
     """
-class JsonScalar(Scalar[types.JsonType]):
+class JsonScalar(Scalar[JsonType]):
     """
     Concrete class for JSON extension scalar.
     """
-class OpaqueScalar(Scalar[types.OpaqueType]):
+class OpaqueScalar(Scalar[OpaqueType]):
     """
     Concrete class for opaque extension scalar.
     """
diff --git a/python/pyarrow-stubs/tensor.pyi b/python/pyarrow-stubs/tensor.pyi
index ac34fa08ffc..7e9b86ea1cd 100644
--- a/python/pyarrow-stubs/tensor.pyi
+++ b/python/pyarrow-stubs/tensor.pyi
@@ -619,6 +619,7 @@ class SparseCSFTensor(_Weakrefable):
         indptr: np.ndarray,
         indices: np.ndarray,
         shape: tuple[int, ...],
+        axis_order: list[int] | None = None,
         dim_names: list[str] | None = None,
     ) -> Self:
         """

From 7a907f5ec4e3750a598e78578c3b97e84cf131b4 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 16 Sep 2025 01:43:36 +0200
Subject: [PATCH 13/26] work

---
 python/pyarrow-stubs/__init__.pyi             |  183 +-
 python/pyarrow-stubs/_compute.pyi             | 1721 +++++
 python/pyarrow-stubs/_dataset.pyi             | 2300 ++++++
 python/pyarrow-stubs/_ipc.pyi                 |  709 ++
 python/pyarrow-stubs/_types.pyi               |   14 +-
 python/pyarrow-stubs/array.pyi                |    1 +
 python/pyarrow-stubs/compute.pyi              | 6168 +++++++++++++++++
 python/pyarrow-stubs/config.pyi               |   41 +
 python/pyarrow-stubs/dataset.pyi              |  229 +
 python/pyarrow-stubs/device.pyi               |   88 +
 python/pyarrow-stubs/error.pyi                |   53 +
 python/pyarrow-stubs/interchange/__init__.pyi |    0
 python/pyarrow-stubs/interchange/buffer.pyi   |   58 +
 python/pyarrow-stubs/interchange/column.pyi   |  252 +
 .../pyarrow-stubs/interchange/dataframe.pyi   |  102 +
 .../interchange/from_dataframe.pyi            |  244 +
 python/pyarrow-stubs/io.pyi                   |   57 +-
 python/pyarrow-stubs/ipc.pyi                  |  123 +
 python/pyarrow-stubs/lib.pyi                  |   10 +-
 python/pyarrow-stubs/table.pyi                | 5154 ++++++++++++++
 python/pyarrow-stubs/util.pyi                 |   27 +
 python/pyarrow/{ipc.py => _ipc.py}            |    0
 22 files changed, 17397 insertions(+), 137 deletions(-)
 create mode 100644 python/pyarrow-stubs/_compute.pyi
 create mode 100644 python/pyarrow-stubs/_dataset.pyi
 create mode 100644 python/pyarrow-stubs/_ipc.pyi
 create mode 100644 python/pyarrow-stubs/compute.pyi
 create mode 100644 python/pyarrow-stubs/config.pyi
 create mode 100644 python/pyarrow-stubs/dataset.pyi
 create mode 100644 python/pyarrow-stubs/device.pyi
 create mode 100644 python/pyarrow-stubs/error.pyi
 create mode 100644 python/pyarrow-stubs/interchange/__init__.pyi
 create mode 100644 python/pyarrow-stubs/interchange/buffer.pyi
 create mode 100644 python/pyarrow-stubs/interchange/column.pyi
 create mode 100644 python/pyarrow-stubs/interchange/dataframe.pyi
 create mode 100644 python/pyarrow-stubs/interchange/from_dataframe.pyi
 create mode 100644 python/pyarrow-stubs/ipc.pyi
 create mode 100644 python/pyarrow-stubs/table.pyi
 create mode 100644 python/pyarrow-stubs/util.pyi
 rename python/pyarrow/{ipc.py => _ipc.py} (100%)

diff --git a/python/pyarrow-stubs/__init__.pyi b/python/pyarrow-stubs/__init__.pyi
index d366d1793ff..3f5e3073fd8 100644
--- a/python/pyarrow-stubs/__init__.pyi
+++ b/python/pyarrow-stubs/__init__.pyi
@@ -22,20 +22,19 @@ import pyarrow.lib as _lib
 
 _gc_enabled: bool
 
-# TODO
 from pyarrow.lib import (
-    # BuildInfo,
-    # RuntimeInfo,
-    # set_timezone_db_path,
+    BuildInfo,
+    RuntimeInfo,
+    set_timezone_db_path,
     MonthDayNano,
-    # VersionInfo,
-    # cpp_build_info,
-    # cpp_version,
-    # cpp_version_info,
-    # runtime_info,
+    VersionInfo,
+    cpp_build_info,
+    cpp_version,
+    cpp_version_info,
+    runtime_info,
     cpu_count,
     set_cpu_count,
-    # enable_signal_handlers,
+    enable_signal_handlers,
     io_thread_count,
     set_io_thread_count,
 )
@@ -51,7 +50,6 @@ def show_info() -> None:
 def _module_is_available(module: str) -> bool: ...
 def _filesystem_is_available(fs: str) -> bool: ...
 
-# TODO
 from pyarrow.lib import (
     null,
     bool_,
@@ -144,8 +142,8 @@ from pyarrow.lib import (
     Array,
     Tensor,
     array,
-    # chunked_array,
-    # record_batch,
+    chunked_array,
+    record_batch,
     nulls,
     repeat,
     SparseCOOTensor,
@@ -257,7 +255,7 @@ from pyarrow.lib import (
 )
 
 # Buffers, allocation
-# from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager
+from pyarrow.lib import DeviceAllocationType, Device, MemoryManager, default_cpu_memory_manager
 
 from pyarrow.lib import (
     Buffer,
@@ -311,54 +309,52 @@ from pyarrow.lib import (
     have_libhdfs,
 )
 
-# TODO
 from pyarrow.lib import (
-    # ChunkedArray,
-    # RecordBatch,
-    # Table,
-    # table,
+    ChunkedArray,
+    RecordBatch,
+    Table,
+    table,
     concat_arrays,
-    # concat_tables,
-    # TableGroupBy,
-    # RecordBatchReader,
+    concat_tables,
+    TableGroupBy,
+    RecordBatchReader,
 )
 
 # Exceptions
-# from pyarrow.lib import (
-#     ArrowCancelled,
-#     ArrowCapacityError,
-#     ArrowException,
-#     ArrowKeyError,
-#     ArrowIndexError,
-#     ArrowInvalid,
-#     ArrowIOError,
-#     ArrowMemoryError,
-#     ArrowNotImplementedError,
-#     ArrowTypeError,
-#     ArrowSerializationError,
-# )
+from pyarrow.lib import (
+    ArrowCancelled,
+    ArrowCapacityError,
+    ArrowException,
+    ArrowKeyError,
+    ArrowIndexError,
+    ArrowInvalid,
+    ArrowIOError,
+    ArrowMemoryError,
+    ArrowNotImplementedError,
+    ArrowTypeError,
+    ArrowSerializationError,
+)
 
-# TODO
-# from ipc import serialize_pandas, deserialize_pandas
-# import ipc as ipc
+from .ipc import serialize_pandas, deserialize_pandas
+# TODO?
+# import _ipc as ipc
 
 import types as types
 
 # ----------------------------------------------------------------------
 # Deprecations
 
-# from util import _deprecate_api, _deprecate_class
+from .util import _deprecate_api, _deprecate_class
 
-# TODO
-# from pyarrow.ipc import (
-#     Message,
-#     MessageReader,
-#     MetadataVersion,
-#     RecordBatchFileReader,
-#     RecordBatchFileWriter,
-#     RecordBatchStreamReader,
-#     RecordBatchStreamWriter,
-# )
+from pyarrow.ipc import (
+    Message,
+    MessageReader,
+    MetadataVersion,
+    RecordBatchFileReader,
+    RecordBatchFileWriter,
+    RecordBatchStreamReader,
+    RecordBatchStreamWriter,
+)
 
 # ----------------------------------------------------------------------
 # Returning absolute path to the pyarrow include directory (if bundled, e.g. in
@@ -401,18 +397,18 @@ __all__ = [
     "__version__",
     "_lib",
     "_gc_enabled",
-    # "BuildInfo",
-    # "RuntimeInfo",
-    # "set_timezone_db_path",
+    "BuildInfo",
+    "RuntimeInfo",
+    "set_timezone_db_path",
     "MonthDayNano",
-    # "VersionInfo",
-    # "cpp_build_info",
-    # "cpp_version",
-    # "cpp_version_info",
-    # "runtime_info",
+    "VersionInfo",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
     "cpu_count",
     "set_cpu_count",
-    # "enable_signal_handlers",
+    "enable_signal_handlers",
     "io_thread_count",
     "set_io_thread_count",
     "show_versions",
@@ -510,8 +506,8 @@ __all__ = [
     "Array",
     "Tensor",
     "array",
-    # "chunked_array",
-    # "record_batch",
+    "chunked_array",
+    "record_batch",
     "nulls",
     "repeat",
     "SparseCOOTensor",
@@ -620,10 +616,10 @@ __all__ = [
     "UuidScalar",
     "JsonScalar",
     "OpaqueScalar",
-    # "DeviceAllocationType",
-    # "Device",
-    # "MemoryManager",
-    # "default_cpu_memory_manager",
+    "DeviceAllocationType",
+    "Device",
+    "MemoryManager",
+    "default_cpu_memory_manager",
     "Buffer",
     "ResizableBuffer",
     "foreign_buffer",
@@ -666,38 +662,37 @@ __all__ = [
     "input_stream",
     "output_stream",
     "have_libhdfs",
-    # "ChunkedArray",
-    # "RecordBatch",
-    # "Table",
-    # "table",
+    "ChunkedArray",
+    "RecordBatch",
+    "Table",
+    "table",
     "concat_arrays",
-    # "concat_tables",
-    # "TableGroupBy",
-    # "RecordBatchReader",
-    # "ArrowCancelled",
-    # "ArrowCapacityError",
-    # "ArrowException",
-    # "ArrowKeyError",
-    # "ArrowIndexError",
-    # "ArrowInvalid",
-    # "ArrowIOError",
-    # "ArrowMemoryError",
-    # "ArrowNotImplementedError",
-    # "ArrowTypeError",
-    # "ArrowSerializationError",
-    # "serialize_pandas",
-    # "deserialize_pandas",
-    # "ipc",
+    "concat_tables",
+    "TableGroupBy",
+    "RecordBatchReader",
+    "ArrowCancelled",
+    "ArrowCapacityError",
+    "ArrowException",
+    "ArrowKeyError",
+    "ArrowIndexError",
+    "ArrowInvalid",
+    "ArrowIOError",
+    "ArrowMemoryError",
+    "ArrowNotImplementedError",
+    "ArrowTypeError",
+    "ArrowSerializationError",
+    "serialize_pandas",
+    "deserialize_pandas",
     "types",
-    # "_deprecate_api",
-    # "_deprecate_class",
-    # "Message",
-    # "MessageReader",
-    # "MetadataVersion",
-    # "RecordBatchFileReader",
-    # "RecordBatchFileWriter",
-    # "RecordBatchStreamReader",
-    # "RecordBatchStreamWriter",
+    "_deprecate_api",
+    "_deprecate_class",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
     "get_include",
     "_get_pkg_config_executable",
     "_has_pkg_config",
diff --git a/python/pyarrow-stubs/_compute.pyi b/python/pyarrow-stubs/_compute.pyi
new file mode 100644
index 00000000000..3d61ae42787
--- /dev/null
+++ b/python/pyarrow-stubs/_compute.pyi
@@ -0,0 +1,1721 @@
+from typing import (
+    Any,
+    Callable,
+    Iterable,
+    Literal,
+    Sequence,
+    TypeAlias,
+    TypedDict,
+    overload,
+)
+
+from . import lib
+
+_Order: TypeAlias = Literal["ascending", "descending"]
+_Placement: TypeAlias = Literal["at_start", "at_end"]
+
+class Kernel(lib._Weakrefable):
+    """
+    A kernel object.
+
+    Kernels handle the execution of a Function for a certain signature.
+    """
+
+class Function(lib._Weakrefable):
+    """
+    A compute function.
+
+    A function implements a certain logical computation over a range of
+    possible input signatures.  Each signature accepts a range of input
+    types and is implemented by a given Kernel.
+
+    Functions can be of different kinds:
+
+    * "scalar" functions apply an item-wise computation over all items
+      of their inputs.  Each item in the output only depends on the values
+      of the inputs at the same position.  Examples: addition, comparisons,
+      string predicates...
+
+    * "vector" functions apply a collection-wise computation, such that
+      each item in the output may depend on the values of several items
+      in each input.  Examples: dictionary encoding, sorting, extracting
+      unique values...
+
+    * "scalar_aggregate" functions reduce the dimensionality of the inputs by
+      applying a reduction function.  Examples: sum, min_max, mode...
+
+    * "hash_aggregate" functions apply a reduction function to an input
+      subdivided by grouping criteria.  They may not be directly called.
+      Examples: hash_sum, hash_min_max...
+
+    * "meta" functions dispatch to other functions.
+    """
+    @property
+    def arity(self) -> int:
+        """
+        The function arity.
+
+        If Ellipsis (i.e. `...`) is returned, the function takes a variable
+        number of arguments.
+        """
+    @property
+    def kind(
+        self,
+    ) -> Literal["scalar", "vector", "scalar_aggregate", "hash_aggregate", "meta"]:
+        """
+        The function kind.
+        """
+    @property
+    def name(self) -> str:
+        """
+        The function name.
+        """
+    @property
+    def num_kernels(self) -> int:
+        """
+        The number of kernels implementing this function.
+        """
+    def call(
+        self,
+        args: Iterable,
+        options: FunctionOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        length: int | None = None,
+    ) -> Any:
+        """
+        Call the function on the given arguments.
+
+        Parameters
+        ----------
+        args : iterable
+            The arguments to pass to the function.  Accepted types depend
+            on the specific function.
+        options : FunctionOptions, optional
+            Options instance for executing this function.  This should have
+            the right concrete options type.
+        memory_pool : pyarrow.MemoryPool, optional
+            If not passed, will allocate memory from the default memory pool.
+        length : int, optional
+            Batch size for execution, for nullary (no argument) functions. If
+            not passed, will be inferred from passed data.
+        """
+
+class FunctionOptions(lib._Weakrefable):
+    def serialize(self) -> lib.Buffer: ...
+    @classmethod
+    def deserialize(cls, buf: lib.Buffer) -> FunctionOptions: ...
+
+class FunctionRegistry(lib._Weakrefable):
+    def get_function(self, name: str) -> Function:
+        """
+        Look up a function by name in the registry.
+
+        Parameters
+        ----------
+        name : str
+            The name of the function to lookup
+        """
+
+    def list_functions(self) -> list[str]:
+        """
+        Return all function names in the registry.
+        """
+
+class HashAggregateFunction(Function): ...
+class HashAggregateKernel(Kernel): ...
+class ScalarAggregateFunction(Function): ...
+class ScalarAggregateKernel(Kernel): ...
+class ScalarFunction(Function): ...
+class ScalarKernel(Kernel): ...
+class VectorFunction(Function): ...
+class VectorKernel(Kernel): ...
+
+# ==================== _compute.pyx Option classes ====================
+class ArraySortOptions(FunctionOptions):
+    """
+    Options for the `array_sort_indices` function.
+
+    Parameters
+    ----------
+    order : str, default "ascending"
+        Which order to sort values in.
+        Accepted values are "ascending", "descending".
+    null_placement : str, default "at_end"
+        Where nulls in the input should be sorted.
+        Accepted values are "at_start", "at_end".
+    """
+    def __init__(
+        self,
+        order: _Order = "ascending",
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+class AssumeTimezoneOptions(FunctionOptions):
+    """
+    Options for the `assume_timezone` function.
+
+    Parameters
+    ----------
+    timezone : str
+        Timezone to assume for the input.
+    ambiguous : str, default "raise"
+        How to handle timestamps that are ambiguous in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    nonexistent : str, default "raise"
+        How to handle timestamps that don't exist in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    """
+
+    def __init__(
+        self,
+        timezone: str,
+        *,
+        ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+        nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    ) -> None: ...
+
+class CastOptions(FunctionOptions):
+    """
+    Options for the `cast` function.
+
+    Parameters
+    ----------
+    target_type : DataType, optional
+        The PyArrow type to cast to.
+    allow_int_overflow : bool, default False
+        Whether integer overflow is allowed when casting.
+    allow_time_truncate : bool, default False
+        Whether time precision truncation is allowed when casting.
+    allow_time_overflow : bool, default False
+        Whether date/time range overflow is allowed when casting.
+    allow_decimal_truncate : bool, default False
+        Whether decimal precision truncation is allowed when casting.
+    allow_float_truncate : bool, default False
+        Whether floating-point precision truncation is allowed when casting.
+    allow_invalid_utf8 : bool, default False
+        Whether producing invalid utf8 data is allowed when casting.
+    """
+
+    allow_int_overflow: bool
+    allow_time_truncate: bool
+    allow_time_overflow: bool
+    allow_decimal_truncate: bool
+    allow_float_truncate: bool
+    allow_invalid_utf8: bool
+
+    def __init__(
+        self,
+        target_type: lib.DataType | None = None,
+        *,
+        allow_int_overflow: bool | None = None,
+        allow_time_truncate: bool | None = None,
+        allow_time_overflow: bool | None = None,
+        allow_decimal_truncate: bool | None = None,
+        allow_float_truncate: bool | None = None,
+        allow_invalid_utf8: bool | None = None,
+    ) -> None: ...
+    @staticmethod
+    def safe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    @staticmethod
+    def unsafe(target_type: lib.DataType | None = None) -> CastOptions: ...
+    def is_safe(self) -> bool: ...
+
+class CountOptions(FunctionOptions):
+    """
+    Options for the `count` function.
+
+    Parameters
+    ----------
+    mode : str, default "only_valid"
+        Which values to count in the input.
+        Accepted values are "only_valid", "only_null", "all".
+    """
+    def __init__(self, mode: Literal["only_valid", "only_null", "all"] = "only_valid") -> None: ...
+
+class CumulativeOptions(FunctionOptions):
+    """
+    Options for `cumulative_*` functions.
+
+    - cumulative_sum
+    - cumulative_sum_checked
+    - cumulative_prod
+    - cumulative_prod_checked
+    - cumulative_max
+    - cumulative_min
+
+    Parameters
+    ----------
+    start : Scalar, default None
+        Starting value for the cumulative operation. If none is given,
+        a default value depending on the operation and input type is used.
+    skip_nulls : bool, default False
+        When false, the first encountered null is propagated.
+    """
+    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
+
+class CumulativeSumOptions(FunctionOptions):
+    """
+    Options for `cumulative_sum` function.
+
+    Parameters
+    ----------
+    start : Scalar, default None
+        Starting value for sum computation
+    skip_nulls : bool, default False
+        When false, the first encountered null is propagated.
+    """
+    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
+
+class DayOfWeekOptions(FunctionOptions):
+    """
+    Options for the `day_of_week` function.
+
+    Parameters
+    ----------
+    count_from_zero : bool, default True
+        If True, number days from 0, otherwise from 1.
+    week_start : int, default 1
+        Which day does the week start with (Monday=1, Sunday=7).
+        How this value is numbered is unaffected by `count_from_zero`.
+    """
+
+    def __init__(self, *, count_from_zero: bool = True, week_start: int = 1) -> None: ...
+
+class DictionaryEncodeOptions(FunctionOptions):
+    """
+    Options for dictionary encoding.
+
+    Parameters
+    ----------
+    null_encoding : str, default "mask"
+        How to encode nulls in the input.
+        Accepted values are "mask" (null inputs emit a null in the indices
+        array), "encode" (null inputs emit a non-null index pointing to
+        a null value in the dictionary array).
+    """
+    def __init__(self, null_encoding: Literal["mask", "encode"] = "mask") -> None: ...
+
+class RunEndEncodeOptions(FunctionOptions):
+    """
+    Options for run-end encoding.
+
+    Parameters
+    ----------
+    run_end_type : DataType, default pyarrow.int32()
+        The data type of the run_ends array.
+
+        Accepted values are pyarrow.{int16(), int32(), int64()}.
+    """
+    # TODO: default is DataType(int32)
+    def __init__(self, run_end_type: lib.DataType = ...) -> None: ...
+
+class ElementWiseAggregateOptions(FunctionOptions):
+    """
+    Options for element-wise aggregate functions.
+
+    Parameters
+    ----------
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    """
+    def __init__(self, *, skip_nulls: bool = True) -> None: ...
+
+class ExtractRegexOptions(FunctionOptions):
+    """
+    Options for the `extract_regex` function.
+
+    Parameters
+    ----------
+    pattern : str
+        Regular expression with named capture fields.
+    """
+    def __init__(self, pattern: str) -> None: ...
+
+class ExtractRegexSpanOptions(FunctionOptions):
+    """
+    Options for the `extract_regex_span` function.
+
+    Parameters
+    ----------
+    pattern : str
+        Regular expression with named capture fields.
+    """
+    def __init__(self, pattern: str) -> None: ...
+
+class FilterOptions(FunctionOptions):
+    """
+    Options for selecting with a boolean filter.
+
+    Parameters
+    ----------
+    null_selection_behavior : str, default "drop"
+        How to handle nulls in the selection filter.
+        Accepted values are "drop", "emit_null".
+    """
+
+    def __init__(self, null_selection_behavior: Literal["drop", "emit_null"] = "drop") -> None: ...
+
+class IndexOptions(FunctionOptions):
+    """
+    Options for the `index` function.
+
+    Parameters
+    ----------
+    value : Scalar
+        The value to search for.
+    """
+    def __init__(self, value: lib.Scalar) -> None: ...
+
+class JoinOptions(FunctionOptions):
+    """
+    Options for the `binary_join_element_wise` function.
+
+    Parameters
+    ----------
+    null_handling : str, default "emit_null"
+        How to handle null values in the inputs.
+        Accepted values are "emit_null", "skip", "replace".
+    null_replacement : str, default ""
+        Replacement string to emit for null inputs if `null_handling`
+        is "replace".
+    """
+    @overload
+    def __init__(self, null_handling: Literal["emit_null", "skip"] = "emit_null") -> None: ...
+    @overload
+    def __init__(self, null_handling: Literal["replace"], null_replacement: str = "") -> None: ...
+
+class ListSliceOptions(FunctionOptions):
+    """
+    Options for list array slicing.
+
+    Parameters
+    ----------
+    start : int
+        Index to start slicing inner list elements (inclusive).
+    stop : Optional[int], default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end. (NotImplemented)
+    step : int, default 1
+        Slice step.
+    return_fixed_size_list : Optional[bool], default None
+        Whether to return a FixedSizeListArray. If true _and_ stop is after
+        a list element's length, nulls will be appended to create the
+        requested slice size. The default of `None` will return the same
+        type which was passed in.
+    """
+    def __init__(
+        self,
+        start: int,
+        stop: int | None = None,
+        step: int = 1,
+        return_fixed_size_list: bool | None = None,
+    ) -> None: ...
+
+class ListFlattenOptions(FunctionOptions):
+    """
+    Options for `list_flatten` function
+
+    Parameters
+    ----------
+    recursive : bool, default False
+        When True, the list array is flattened recursively until an array
+        of non-list values is formed.
+    """
+    def __init__(self, recursive: bool = False) -> None: ...
+
+class MakeStructOptions(FunctionOptions):
+    """
+    Options for the `make_struct` function.
+
+    Parameters
+    ----------
+    field_names : sequence of str
+        Names of the struct fields to create.
+    field_nullability : sequence of bool, optional
+        Nullability information for each struct field.
+        If omitted, all fields are nullable.
+    field_metadata : sequence of KeyValueMetadata, optional
+        Metadata for each struct field.
+    """
+    def __init__(
+        self,
+        field_names: Sequence[str] = (),
+        *,
+        field_nullability: Sequence[bool] | None = None,
+        field_metadata: Sequence[lib.KeyValueMetadata] | None = None,
+    ) -> None: ...
+
+class MapLookupOptions(FunctionOptions):
+    """
+    Options for the `map_lookup` function.
+
+    Parameters
+    ----------
+    query_key : Scalar or Object can be converted to Scalar
+        The key to search for.
+    occurrence : str
+        The occurrence(s) to return from the Map
+        Accepted values are "first", "last", or "all".
+    """
+    # TODO: query_key: Scalar or Object can be converted to Scalar
+    def __init__(
+        self, query_key: lib.Scalar, occurrence: Literal["first", "last", "all"]
+    ) -> None: ...
+
+class MatchSubstringOptions(FunctionOptions):
+    """
+    Options for looking for a substring.
+
+    Parameters
+    ----------
+    pattern : str
+        Substring pattern to look for inside input values.
+    ignore_case : bool, default False
+        Whether to perform a case-insensitive match.
+    """
+
+    def __init__(self, pattern: str, *, ignore_case: bool = False) -> None: ...
+
+class ModeOptions(FunctionOptions):
+    """
+    Options for the `mode` function.
+
+    Parameters
+    ----------
+    n : int, default 1
+        Number of distinct most-common values to return.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(self, n: int = 1, *, skip_nulls: bool = True, min_count: int = 0) -> None: ...
+
+class NullOptions(FunctionOptions):
+    """
+    Options for the `is_null` function.
+
+    Parameters
+    ----------
+    nan_is_null : bool, default False
+        Whether floating-point NaN values are considered null.
+    """
+    def __init__(self, *, nan_is_null: bool = False) -> None: ...
+
+class PadOptions(FunctionOptions):
+    """
+    Options for padding strings.
+
+    Parameters
+    ----------
+    width : int
+        Desired string length.
+    padding : str, default " "
+        What to pad the string with. Should be one byte or codepoint.
+    lean_left_on_odd_padding : bool, default True
+        What to do if there is an odd number of padding characters (in case
+        of centered padding). Defaults to aligning on the left (i.e. adding
+        the extra padding character on the right).
+    """
+    def __init__(
+        self, width: int, padding: str = " ", lean_left_on_odd_padding: bool = True
+    ) -> None: ...
+
+class PairwiseOptions(FunctionOptions):
+    """
+    Options for `pairwise` functions.
+
+    Parameters
+    ----------
+    period : int, default 1
+        Period for applying the period function.
+    """
+    def __init__(self, period: int = 1) -> None: ...
+
+class PartitionNthOptions(FunctionOptions):
+    """
+    Options for the `partition_nth_indices` function.
+
+    Parameters
+    ----------
+    pivot : int
+        Index into the equivalent sorted array of the pivot element.
+    null_placement : str, default "at_end"
+        Where nulls in the input should be partitioned.
+        Accepted values are "at_start", "at_end".
+    """
+    def __init__(self, pivot: int, *, null_placement: _Placement = "at_end") -> None: ...
+
+class WinsorizeOptions(FunctionOptions):
+    """
+    Options for the `winsorize` function.
+
+    Parameters
+    ----------
+    lower_limit : float, between 0 and 1
+        The quantile below which all values are replaced with the quantile's value.
+    upper_limit : float, between 0 and 1
+        The quantile above which all values are replaced with the quantile's value.
+    """
+    def __init__(self, lower_limit: float, upper_limit: float) -> None: ...
+
+class QuantileOptions(FunctionOptions):
+    """
+    Options for the `quantile` function.
+
+    Parameters
+    ----------
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to compute. All values must be in
+        [0, 1].
+    interpolation : str, default "linear"
+        How to break ties between competing data points for a given quantile.
+        Accepted values are:
+
+        - "linear": compute an interpolation
+        - "lower": always use the smallest of the two data points
+        - "higher": always use the largest of the two data points
+        - "nearest": select the data point that is closest to the quantile
+        - "midpoint": compute the (unweighted) mean of the two data points
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(
+        self,
+        q: float | Sequence[float],
+        *,
+        interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+class RandomOptions(FunctionOptions):
+    """
+    Options for random generation.
+
+    Parameters
+    ----------
+    initializer : int or str
+        How to initialize the underlying random generator.
+        If an integer is given, it is used as a seed.
+        If "system" is given, the random generator is initialized with
+        a system-specific source of (hopefully true) randomness.
+        Other values are invalid.
+    """
+    def __init__(self, *, initializer: int | Literal["system"] = "system") -> None: ...
+
+class RankOptions(FunctionOptions):
+    """
+    Options for the `rank` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    tiebreaker : str, default "first"
+        Configure how ties between equal values are handled.
+        Accepted values are:
+
+        - "min": Ties get the smallest possible rank in sorted order.
+        - "max": Ties get the largest possible rank in sorted order.
+        - "first": Ranks are assigned in order of when ties appear in the
+                   input. This ensures the ranks are a stable permutation
+                   of the input.
+        - "dense": The ranks span a dense [1, M] interval where M is the
+                   number of distinct values in the input.
+    """
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+        tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    ) -> None: ...
+
+class RankQuantileOptions(FunctionOptions):
+    """
+    Options for the `rank_quantile` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    """
+
+    def __init__(
+        self,
+        sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
+        *,
+        null_placement: _Placement = "at_end",
+    ) -> None: ...
+
+class PivotWiderOptions(FunctionOptions):
+    """
+    Options for the `pivot_wider` function.
+
+    Parameters
+    ----------
+    key_names : sequence of str
+        The pivot key names expected in the pivot key column.
+        For each entry in `key_names`, a column with the same name is emitted
+        in the struct output.
+    unexpected_key_behavior : str, default "ignore"
+        The behavior when pivot keys not in `key_names` are encountered.
+        Accepted values are "ignore", "raise".
+        If "ignore", unexpected keys are silently ignored.
+        If "raise", unexpected keys raise a KeyError.
+    """
+    def __init__(
+        self,
+        key_names: Sequence[str],
+        *,
+        unexpected_key_behavior: Literal["ignore", "raise"] = "ignore",
+    ) -> None: ...
+
+class ReplaceSliceOptions(FunctionOptions):
+    """
+    Options for replacing slices.
+
+    Parameters
+    ----------
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int
+        Index to stop slicing at (exclusive).
+    replacement : str
+        What to replace the slice with.
+    """
+    def __init__(self, start: int, stop: int, replacement: str) -> None: ...
+
+class ReplaceSubstringOptions(FunctionOptions):
+    """
+    Options for replacing matched substrings.
+
+    Parameters
+    ----------
+    pattern : str
+        Substring pattern to look for inside input values.
+    replacement : str
+        What to replace the pattern with.
+    max_replacements : int or None, default None
+        The maximum number of strings to replace in each
+        input value (unlimited if None).
+    """
+    def __init__(
+        self, pattern: str, replacement: str, *, max_replacements: int | None = None
+    ) -> None: ...
+
+_RoundMode: TypeAlias = Literal[
+    "down",
+    "up",
+    "towards_zero",
+    "towards_infinity",
+    "half_down",
+    "half_up",
+    "half_towards_zero",
+    "half_towards_infinity",
+    "half_to_even",
+    "half_to_odd",
+]
+
+class RoundBinaryOptions(FunctionOptions):
+    """
+    Options for rounding numbers when ndigits is provided by a second array
+
+    Parameters
+    ----------
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    """
+    def __init__(
+        self,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+class RoundOptions(FunctionOptions):
+    """
+    Options for rounding numbers.
+
+    Parameters
+    ----------
+    ndigits : int, default 0
+        Number of fractional digits to round to.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    """
+    def __init__(
+        self,
+        ndigits: int = 0,
+        round_mode: _RoundMode = "half_to_even",
+    ) -> None: ...
+
+_DateTimeUint: TypeAlias = Literal[
+    "year",
+    "quarter",
+    "month",
+    "week",
+    "day",
+    "hour",
+    "minute",
+    "second",
+    "millisecond",
+    "microsecond",
+    "nanosecond",
+]
+
+class RoundTemporalOptions(FunctionOptions):
+    """
+    Options for rounding temporal values.
+
+    Parameters
+    ----------
+    multiple : int, default 1
+        Number of units to round to.
+    unit : str, default "day"
+        The unit in which `multiple` is expressed.
+        Accepted values are "year", "quarter", "month", "week", "day",
+        "hour", "minute", "second", "millisecond", "microsecond",
+        "nanosecond".
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    ceil_is_strictly_greater : bool, default False
+        If True, ceil returns a rounded value that is strictly greater than the
+        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+        if set to False.
+        This applies to the ceil_temporal function only.
+    calendar_based_origin : bool, default False
+        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+        rounding origin will be beginning of one less precise calendar unit.
+        E.g.: rounding to hours will use beginning of day as origin.
+
+        By default time is rounded to a multiple of units since
+        1970-01-01T00:00:00. By setting calendar_based_origin to true,
+        time will be rounded to number of units since the last greater
+        calendar unit.
+        For example: rounding to multiple of days since the beginning of the
+        month or to hours since the beginning of the day.
+        Exceptions: week and quarter are not used as greater units,
+        therefore days will be rounded to the beginning of the month not
+        week. Greater unit of week is a year.
+        Note that ceiling and rounding might change sorting order of an array
+        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+        order of an already ordered array.
+    """
+    def __init__(
+        self,
+        multiple: int = 1,
+        unit: _DateTimeUint = "day",
+        *,
+        week_starts_monday: bool = True,
+        ceil_is_strictly_greater: bool = False,
+        calendar_based_origin: bool = False,
+    ) -> None: ...
+
+class RoundToMultipleOptions(FunctionOptions):
+    """
+    Options for rounding numbers to a multiple.
+
+    Parameters
+    ----------
+    multiple : numeric scalar, default 1.0
+        Multiple to round to. Should be a scalar of a type compatible
+        with the argument to be rounded.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    """
+    def __init__(self, multiple: float = 1.0, round_mode: _RoundMode = "half_to_even") -> None: ...
+
+class ScalarAggregateOptions(FunctionOptions):
+    """
+    Options for scalar aggregations.
+
+    Parameters
+    ----------
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(self, *, skip_nulls: bool = True, min_count: int = 1) -> None: ...
+
+class SelectKOptions(FunctionOptions):
+    """
+    Options for top/bottom k-selection.
+
+    Parameters
+    ----------
+    k : int
+        Number of leading values to select in sorted order
+        (i.e. the largest values if sort order is "descending",
+        the smallest otherwise).
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    """
+
+    def __init__(self, k: int, sort_keys: Sequence[tuple[str, _Order]]) -> None: ...
+
+class SetLookupOptions(FunctionOptions):
+    """
+    Options for the `is_in` and `index_in` functions.
+
+    Parameters
+    ----------
+    value_set : Array
+        Set of values to look for in the input.
+    skip_nulls : bool, default False
+        If False, nulls in the input are matched in the value_set just
+        like regular values.
+        If True, nulls in the input always fail matching.
+    """
+    def __init__(self, value_set: lib.Array, *, skip_nulls: bool = True) -> None: ...
+
+class SliceOptions(FunctionOptions):
+    """
+    Options for slicing.
+
+    Parameters
+    ----------
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int or None, default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end.
+    step : int, default 1
+        Slice step.
+    """
+
+    def __init__(self, start: int, stop: int | None = None, step: int = 1) -> None: ...
+
+class SortOptions(FunctionOptions):
+    """
+    Options for the `sort_indices` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted, only applying to
+        columns/fields mentioned in `sort_keys`.
+        Accepted values are "at_start", "at_end".
+    """
+    def __init__(
+        self, sort_keys: Sequence[tuple[str, _Order]], *, null_placement: _Placement = "at_end"
+    ) -> None: ...
+
+class SplitOptions(FunctionOptions):
+    """
+    Options for splitting on whitespace.
+
+    Parameters
+    ----------
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    """
+
+    def __init__(self, *, max_splits: int | None = None, reverse: bool = False) -> None: ...
+
+class SplitPatternOptions(FunctionOptions):
+    """
+    Options for splitting on a string pattern.
+
+    Parameters
+    ----------
+    pattern : str
+        String pattern to split on.
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    """
+    def __init__(
+        self, pattern: str, *, max_splits: int | None = None, reverse: bool = False
+    ) -> None: ...
+
+class StrftimeOptions(FunctionOptions):
+    """
+    Options for the `strftime` function.
+
+    Parameters
+    ----------
+    format : str, default "%Y-%m-%dT%H:%M:%S"
+        Pattern for formatting input values.
+    locale : str, default "C"
+        Locale to use for locale-specific format specifiers.
+    """
+    def __init__(self, format: str = "%Y-%m-%dT%H:%M:%S", locale: str = "C") -> None: ...
+
+class StrptimeOptions(FunctionOptions):
+    """
+    Options for the `strptime` function.
+
+    Parameters
+    ----------
+    format : str
+        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
+        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
+        There are differences in behavior, for example how the "%y" placeholder
+        handles years with less than four digits.
+    unit : str
+        Timestamp unit of the output.
+        Accepted values are "s", "ms", "us", "ns".
+    error_is_null : boolean, default False
+        Return null on parsing errors if true or raise if false.
+    """
+    def __init__(
+        self, format: str, unit: Literal["s", "ms", "us", "ns"], error_is_null: bool = False
+    ) -> None: ...
+
+class StructFieldOptions(FunctionOptions):
+    """
+    Options for the `struct_field` function.
+
+    Parameters
+    ----------
+    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
+        List of indices for chained field lookup, for example `[4, 1]`
+        will look up the second nested field in the fifth outer field.
+    """
+    def __init__(
+        self, indices: list[str] | list[bytes] | list[int] | Expression | bytes | str | int
+    ) -> None: ...
+
+class TakeOptions(FunctionOptions):
+    """
+    Options for the `take` and `array_take` functions.
+
+    Parameters
+    ----------
+    boundscheck : boolean, default True
+        Whether to check indices are within bounds. If False and an
+        index is out of bounds, behavior is undefined (the process
+        may crash).
+    """
+    def __init__(self, boundscheck: bool = True) -> None: ...
+
+class TDigestOptions(FunctionOptions):
+    """
+    Options for the `tdigest` function.
+
+    Parameters
+    ----------
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to approximate. All values must be
+        in [0, 1].
+    delta : int, default 100
+        Compression parameter for the T-digest algorithm.
+    buffer_size : int, default 500
+        Buffer size for the T-digest algorithm.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(
+        self,
+        q: float | Sequence[float] = 0.5,
+        *,
+        delta: int = 100,
+        buffer_size: int = 500,
+        skip_nulls: bool = True,
+        min_count: int = 0,
+    ) -> None: ...
+
+class TrimOptions(FunctionOptions):
+    """
+    Options for trimming characters from strings.
+
+    Parameters
+    ----------
+    characters : str
+        Individual characters to be trimmed from the string.
+    """
+    def __init__(self, characters: str) -> None: ...
+
+class Utf8NormalizeOptions(FunctionOptions):
+    """
+    Options for the `utf8_normalize` function.
+
+    Parameters
+    ----------
+    form : str
+        Unicode normalization form.
+        Accepted values are "NFC", "NFKC", "NFD", NFKD".
+    """
+
+    def __init__(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: ...
+
+class VarianceOptions(FunctionOptions):
+    """
+    Options for the `variance` and `stddev` functions.
+
+    Parameters
+    ----------
+    ddof : int, default 0
+        Number of degrees of freedom.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(self, *, ddof: int = 0, skip_nulls: bool = True, min_count: int = 0) -> None: ...
+
+class SkewOptions(FunctionOptions):
+    """
+    Options for the `skew` and `kurtosis` functions.
+
+    Parameters
+    ----------
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    biased : bool, default True
+        Whether the calculated value is biased.
+        If False, the value computed includes a correction factor to reduce bias.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    """
+    def __init__(
+        self, *, skip_nulls: bool = True, biased: bool = True, min_count: int = 0
+    ) -> None: ...
+
+class WeekOptions(FunctionOptions):
+    """
+    Options for the `week` function.
+
+    Parameters
+    ----------
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    count_from_zero : bool, default False
+        If True, dates at the start of a year that fall into the last week
+        of the previous year emit 0.
+        If False, they emit 52 or 53 (the week number of the last week
+        of the previous year).
+    first_week_is_fully_in_year : bool, default False
+        If True, week number 0 is fully in January.
+        If False, a week that begins on December 29, 30 or 31 is considered
+        to be week number 0 of the following year.
+    """
+    def __init__(
+        self,
+        *,
+        week_starts_monday: bool = True,
+        count_from_zero: bool = False,
+        first_week_is_fully_in_year: bool = False,
+    ) -> None: ...
+
+# ==================== _compute.pyx Functions ====================
+
+def call_function(
+    name: str,
+    args: list,
+    options: FunctionOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+    length: int | None = None,
+) -> Any:
+    """
+    Call a named function.
+
+    The function is looked up in the global registry
+    (as returned by `function_registry()`).
+
+    Parameters
+    ----------
+    name : str
+        The name of the function to call.
+    args : list
+        The arguments to the function.
+    options : optional
+        options provided to the function.
+    memory_pool : MemoryPool, optional
+        memory pool to use for allocations during function execution.
+    length : int, optional
+        Batch size for execution, for nullary (no argument) functions. If not
+        passed, inferred from data.
+    """
+
+def function_registry() -> FunctionRegistry: ...
+def get_function(name: str) -> Function:
+    """
+    Get a function by name.
+
+    The function is looked up in the global registry
+    (as returned by `function_registry()`).
+
+    Parameters
+    ----------
+    name : str
+        The name of the function to lookup
+    """
+
+def list_functions() -> list[str]:
+    """
+    Return all function names in the global registry.
+    """
+
+# ==================== _compute.pyx Udf ====================
+
+def call_tabular_function(
+    function_name: str, args: Iterable | None = None, func_registry: FunctionRegistry | None = None
+) -> lib.RecordBatchReader:
+    """
+    Get a record batch iterator from a tabular function.
+
+    Parameters
+    ----------
+    function_name : str
+        Name of the function.
+    args : iterable
+        The arguments to pass to the function.  Accepted types depend
+        on the specific function.  Currently, only an empty args is supported.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+    """
+
+class _FunctionDoc(TypedDict):
+    summary: str
+    description: str
+
+def register_scalar_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined scalar function.
+
+    This API is EXPERIMENTAL.
+
+    A scalar function is a function that executes elementwise
+    operations on arrays or scalars, i.e. a scalar function must
+    be computed row-by-row with no state where each output row
+    is computed only from its corresponding input row.
+    In other words, all argument arrays have the same length,
+    and the output array is of the same length as the arguments.
+    Scalar functions are the only functions allowed in query engine
+    expressions.
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The first argument is the context argument of type
+        UdfContext.
+        Then, it must take arguments equal to the number of
+        in_types defined. It must return an Array or Scalar
+        matching the out_type. It must return a Scalar if
+        all arguments are scalar, else it must return an Array.
+
+        To define a varargs function, pass a callable that takes
+        *args. The last in_type will be the type of all varargs
+        arguments.
+    function_name : str
+        Name of the function. There should only be one function
+        registered with this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        A dictionary mapping function argument names to
+        their respective DataType.
+        The argument names will be used to generate
+        documentation for the function. The number of
+        arguments specified here determines the function
+        arity.
+    out_type : DataType
+        Output type of the function.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>>
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "simple udf"
+    >>> func_doc["description"] = "add a constant to a scalar"
+    >>>
+    >>> def add_constant(ctx, array):
+    ...     return pc.add(array, 1, memory_pool=ctx.memory_pool)
+    >>>
+    >>> func_name = "py_add_func"
+    >>> in_types = {"array": pa.int64()}
+    >>> out_type = pa.int64()
+    >>> pc.register_scalar_function(add_constant, func_name, func_doc, in_types, out_type)
+    >>>
+    >>> func = pc.get_function(func_name)
+    >>> func.name
+    'py_add_func'
+    >>> answer = pc.call_function(func_name, [pa.array([20])])
+    >>> answer
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      21
+    ]
+    """
+
+def register_tabular_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined tabular function.
+
+    This API is EXPERIMENTAL.
+
+    A tabular function is one accepting a context argument of type
+    UdfContext and returning a generator of struct arrays.
+    The in_types argument must be empty and the out_type argument
+    specifies a schema. Each struct array must have field types
+    corresponding to the schema.
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The only argument is the context argument of type
+        UdfContext. It must return a callable that
+        returns on each invocation a StructArray matching
+        the out_type, where an empty array indicates end.
+    function_name : str
+        Name of the function. There should only be one function
+        registered with this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        Must be an empty dictionary (reserved for future use).
+    out_type : Union[Schema, DataType]
+        Schema of the function's output, or a corresponding flat struct type.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+    """
+
+def register_aggregate_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined non-decomposable aggregate function.
+
+    This API is EXPERIMENTAL.
+
+    A non-decomposable aggregation function is a function that executes
+    aggregate operations on the whole data that it is aggregating.
+    In other words, non-decomposable aggregate function cannot be
+    split into consume/merge/finalize steps.
+
+    This is often used with ordered or segmented aggregation where groups
+    can be emit before accumulating all of the input data.
+
+    Note that currently the size of any input column cannot exceed 2 GB
+    for a single segment (all groups combined).
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The first argument is the context argument of type
+        UdfContext.
+        Then, it must take arguments equal to the number of
+        in_types defined. It must return a Scalar matching the
+        out_type.
+        To define a varargs function, pass a callable that takes
+        *args. The in_type needs to match in type of inputs when
+        the function gets called.
+    function_name : str
+        Name of the function. This name must be unique, i.e.,
+        there should only be one function registered with
+        this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        A dictionary mapping function argument names to
+        their respective DataType.
+        The argument names will be used to generate
+        documentation for the function. The number of
+        arguments specified here determines the function
+        arity.
+    out_type : DataType
+        Output type of the function.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>>
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "simple median udf"
+    >>> func_doc["description"] = "compute median"
+    >>>
+    >>> def compute_median(ctx, array):
+    ...     return pa.scalar(np.median(array))
+    >>>
+    >>> func_name = "py_compute_median"
+    >>> in_types = {"array": pa.int64()}
+    >>> out_type = pa.float64()
+    >>> pc.register_aggregate_function(compute_median, func_name, func_doc, in_types, out_type)
+    >>>
+    >>> func = pc.get_function(func_name)
+    >>> func.name
+    'py_compute_median'
+    >>> answer = pc.call_function(func_name, [pa.array([20, 40])])
+    >>> answer
+    <pyarrow.DoubleScalar: 30.0>
+    >>> table = pa.table([pa.array([1, 1, 2, 2]), pa.array([10, 20, 30, 40])], names=["k", "v"])
+    >>> result = table.group_by("k").aggregate([("v", "py_compute_median")])
+    >>> result
+    pyarrow.Table
+    k: int64
+    v_py_compute_median: double
+    ----
+    k: [[1,2]]
+    v_py_compute_median: [[15,35]]
+    """
+
+def register_vector_function(
+    func: Callable,
+    function_name: str,
+    function_doc: _FunctionDoc,
+    in_types: dict[str, lib.DataType],
+    out_type: lib.DataType,
+    func_registry: FunctionRegistry | None = None,
+) -> None:
+    """
+    Register a user-defined vector function.
+
+    This API is EXPERIMENTAL.
+
+    A vector function is a function that executes vector
+    operations on arrays. Vector function is often used
+    when compute doesn't fit other more specific types of
+    functions (e.g., scalar and aggregate).
+
+    Parameters
+    ----------
+    func : callable
+        A callable implementing the user-defined function.
+        The first argument is the context argument of type
+        UdfContext.
+        Then, it must take arguments equal to the number of
+        in_types defined. It must return an Array or Scalar
+        matching the out_type. It must return a Scalar if
+        all arguments are scalar, else it must return an Array.
+
+        To define a varargs function, pass a callable that takes
+        *args. The last in_type will be the type of all varargs
+        arguments.
+    function_name : str
+        Name of the function. There should only be one function
+        registered with this name in the function registry.
+    function_doc : dict
+        A dictionary object with keys "summary" (str),
+        and "description" (str).
+    in_types : Dict[str, DataType]
+        A dictionary mapping function argument names to
+        their respective DataType.
+        The argument names will be used to generate
+        documentation for the function. The number of
+        arguments specified here determines the function
+        arity.
+    out_type : DataType
+        Output type of the function.
+    func_registry : FunctionRegistry
+        Optional function registry to use instead of the default global one.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>>
+    >>> func_doc = {}
+    >>> func_doc["summary"] = "percent rank"
+    >>> func_doc["description"] = "compute percent rank"
+    >>>
+    >>> def list_flatten_udf(ctx, x):
+    ...     return pc.list_flatten(x)
+    >>>
+    >>> func_name = "list_flatten_udf"
+    >>> in_types = {"array": pa.list_(pa.int64())}
+    >>> out_type = pa.int64()
+    >>> pc.register_vector_function(list_flatten_udf, func_name, func_doc, in_types, out_type)
+    >>>
+    >>> answer = pc.call_function(func_name, [pa.array([[1, 2], [3, 4]])])
+    >>> answer
+    <pyarrow.lib.Int64Array object at ...>
+    [
+      1,
+      2,
+      3,
+      4
+    ]
+    """
+
+class UdfContext:
+    """
+    Per-invocation function context/state.
+
+    This object will always be the first argument to a user-defined
+    function. It should not be used outside of a call to the function.
+    """
+
+    @property
+    def batch_length(self) -> int:
+        """
+        The common length of all input arguments (int).
+
+        In the case that all arguments are scalars, this value
+        is used to pass the "actual length" of the arguments,
+        e.g. because the scalar values are encoding a column
+        with a constant value.
+        """
+    @property
+    def memory_pool(self) -> lib.MemoryPool:
+        """
+        A memory pool for allocations (:class:`MemoryPool`).
+
+        This is the memory pool supplied by the user when they invoked
+        the function and it should be used in any calls to arrow that the
+        UDF makes if that call accepts a memory_pool.
+        """
+
+# ==================== _compute.pyx Expression ====================
+class Expression(lib._Weakrefable):
+    """
+    A logical expression to be evaluated against some input.
+
+    To create an expression:
+
+    - Use the factory function ``pyarrow.compute.scalar()`` to create a
+      scalar (not necessary when combined, see example below).
+    - Use the factory function ``pyarrow.compute.field()`` to reference
+      a field (column in table).
+    - Compare fields and scalars with ``<``, ``<=``, ``==``, ``>=``, ``>``.
+    - Combine expressions using python operators ``&`` (logical and),
+      ``|`` (logical or) and ``~`` (logical not).
+      Note: python keywords ``and``, ``or`` and ``not`` cannot be used
+      to combine expressions.
+    - Create expression predicates using Expression methods such as
+      ``pyarrow.compute.Expression.isin()``.
+
+    Examples
+    --------
+
+    >>> import pyarrow.compute as pc
+    >>> (pc.field("a") < pc.scalar(3)) | (pc.field("b") > 7)
+    <pyarrow.compute.Expression ((a < 3) or (b > 7))>
+    >>> pc.field("a") != 3
+    <pyarrow.compute.Expression (a != 3)>
+    >>> pc.field("a").isin([1, 2, 3])
+    <pyarrow.compute.Expression is_in(a, {value_set=int64:[
+      1,
+      2,
+      3
+    ], null_matching_behavior=MATCH})>
+    """
+
+    @staticmethod
+    def from_substrait(buffer: bytes | lib.Buffer) -> Expression:
+        """
+        Deserialize an expression from Substrait
+
+        The serialized message must be an ExtendedExpression message that has
+        only a single expression.  The name of the expression and the schema
+        the expression was bound to will be ignored.  Use
+        pyarrow.substrait.deserialize_expressions if this information is needed
+        or if the message might contain multiple expressions.
+
+        Parameters
+        ----------
+        message : bytes or Buffer or a protobuf Message
+            The Substrait message to deserialize
+
+        Returns
+        -------
+        Expression
+            The deserialized expression
+        """
+    def to_substrait(self, schema: lib.Schema, allow_arrow_extensions: bool = False) -> lib.Buffer:
+        """
+        Serialize the expression using Substrait
+
+        The expression will be serialized as an ExtendedExpression message that has a
+        single expression named "expression"
+
+        Parameters
+        ----------
+        schema : Schema
+            The input schema the expression will be bound to
+        allow_arrow_extensions : bool, default False
+            If False then only functions that are part of the core Substrait function
+            definitions will be allowed.  Set this to True to allow pyarrow-specific functions
+            but the result may not be accepted by other compute libraries.
+
+        Returns
+        -------
+        Buffer
+            A buffer containing the serialized Protobuf plan.
+        """
+    def __invert__(self) -> Expression: ...
+    def __and__(self, other) -> Expression: ...
+    def __or__(self, other) -> Expression: ...
+    def __add__(self, other) -> Expression: ...
+    def __mul__(self, other) -> Expression: ...
+    def __sub__(self, other) -> Expression: ...
+    def __eq__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __ne__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __gt__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __lt__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __ge__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __le__(self, value: object) -> Expression: ...  # type: ignore[override]
+    def __truediv__(self, other) -> Expression: ...
+    def is_valid(self) -> bool:
+        """
+        Check whether the expression is not-null (valid).
+
+        This creates a new expression equivalent to calling the
+        `is_valid` compute function on this expression.
+
+        Returns
+        -------
+        is_valid : Expression
+        """
+    def is_null(self, nan_is_null: bool = False) -> Expression:
+        """
+        Check whether the expression is null.
+
+        This creates a new expression equivalent to calling the
+        `is_null` compute function on this expression.
+
+        Parameters
+        ----------
+        nan_is_null : boolean, default False
+            Whether floating-point NaNs are considered null.
+
+        Returns
+        -------
+        is_null : Expression
+        """
+    def is_nan(self) -> Expression:
+        """
+        Check whether the expression is NaN.
+
+        This creates a new expression equivalent to calling the
+        `is_nan` compute function on this expression.
+
+        Returns
+        -------
+        is_nan : Expression
+        """
+    def cast(
+        self, type: lib.DataType, safe: bool = True, options: CastOptions | None = None
+    ) -> Expression:
+        """
+        Explicitly set or change the expression's data type.
+
+        This creates a new expression equivalent to calling the
+        `cast` compute function on this expression.
+
+        Parameters
+        ----------
+        type : DataType, default None
+            Type to cast array to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        cast : Expression
+        """
+    def isin(self, values: lib.Array | Iterable) -> Expression:
+        """
+        Check whether the expression is contained in values.
+
+        This creates a new expression equivalent to calling the
+        `is_in` compute function on this expression.
+
+        Parameters
+        ----------
+        values : Array or iterable
+            The values to check for.
+
+        Returns
+        -------
+        isin : Expression
+            A new expression that, when evaluated, checks whether
+            this expression's value is contained in `values`.
+        """
+
+# ==================== _compute.py ====================
diff --git a/python/pyarrow-stubs/_dataset.pyi b/python/pyarrow-stubs/_dataset.pyi
new file mode 100644
index 00000000000..03e7762b6df
--- /dev/null
+++ b/python/pyarrow-stubs/_dataset.pyi
@@ -0,0 +1,2300 @@
+# import sys
+#
+# if sys.version_info >= (3, 11):
+#     from typing import Self
+# else:
+#     from typing_extensions import Self
+# from typing import (
+#     IO,
+#     Any,
+#     Callable,
+#     Generic,
+#     Iterator,
+#     Literal,
+#     NamedTuple,
+#     TypeVar,
+#     overload,
+# )
+#
+# from _typeshed import StrPath
+#
+# from . import _csv, _json, _parquet, lib
+# from ._fs import FileSelector, FileSystem, SupportedFileSystem
+# from ._stubs_typing import Indices, JoinType, Order
+# from .acero import ExecNodeOptions
+# from .compute import Expression
+# from .ipc import IpcWriteOptions, RecordBatchReader
+#
+# class Dataset(lib._Weakrefable):
+#     """
+#     Collection of data fragments and potentially child datasets.
+#
+#     Arrow Datasets allow you to query against data that has been split across
+#     multiple files. This sharding of data may indicate partitioning, which
+#     can accelerate queries that only touch some partitions (files).
+#     """
+#
+#     @property
+#     def partition_expression(self) -> Expression:
+#         """
+#         An Expression which evaluates to true for all data viewed by this
+#         Dataset.
+#         """
+#     def replace_schema(self, schema: lib.Schema) -> None:
+#         """
+#         Return a copy of this Dataset with a different schema.
+#
+#         The copy will view the same Fragments. If the new schema is not
+#         compatible with the original dataset's schema then an error will
+#         be raised.
+#
+#         Parameters
+#         ----------
+#         schema : Schema
+#             The new dataset schema.
+#         """
+#     def get_fragments(self, filter: Expression | None = None):
+#         """Returns an iterator over the fragments in this dataset.
+#
+#         Parameters
+#         ----------
+#         filter : Expression, default None
+#             Return fragments matching the optional filter, either using the
+#             partition_expression or internal information like Parquet's
+#             statistics.
+#
+#         Returns
+#         -------
+#         fragments : iterator of Fragment
+#         """
+#     def scanner(
+#         self,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> Scanner:
+#         """
+#         Build a scan operation against the dataset.
+#
+#         Data is not loaded immediately. Instead, this produces a Scanner,
+#         which exposes further operations (e.g. loading all data as a
+#         table, counting rows).
+#
+#         See the :meth:`Scanner.from_dataset` method for further information.
+#
+#         Parameters
+#         ----------
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         scanner : Scanner
+#
+#         Examples
+#         --------
+#         >>> import pyarrow as pa
+#         >>> table = pa.table(
+#         ...     {
+#         ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+#         ...         "n_legs": [2, 2, 4, 4, 5, 100],
+#         ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+#         ...     }
+#         ... )
+#         >>>
+#         >>> import pyarrow.parquet as pq
+#         >>> pq.write_table(table, "dataset_scanner.parquet")
+#
+#         >>> import pyarrow.dataset as ds
+#         >>> dataset = ds.dataset("dataset_scanner.parquet")
+#
+#         Selecting a subset of the columns:
+#
+#         >>> dataset.scanner(columns=["year", "n_legs"]).to_table()
+#         pyarrow.Table
+#         year: int64
+#         n_legs: int64
+#         ----
+#         year: [[2020,2022,2021,2022,2019,2021]]
+#         n_legs: [[2,2,4,4,5,100]]
+#
+#         Projecting selected columns using an expression:
+#
+#         >>> dataset.scanner(
+#         ...     columns={
+#         ...         "n_legs_uint": ds.field("n_legs").cast("uint8"),
+#         ...     }
+#         ... ).to_table()
+#         pyarrow.Table
+#         n_legs_uint: uint8
+#         ----
+#         n_legs_uint: [[2,2,4,4,5,100]]
+#
+#         Filtering rows while scanning:
+#
+#         >>> dataset.scanner(filter=ds.field("year") > 2020).to_table()
+#         pyarrow.Table
+#         year: int64
+#         n_legs: int64
+#         animal: string
+#         ----
+#         year: [[2022,2021,2022,2021]]
+#         n_legs: [[2,4,4,100]]
+#         animal: [["Parrot","Dog","Horse","Centipede"]]
+#         """
+#     def to_batches(
+#         self,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> Iterator[lib.RecordBatch]:
+#         """
+#         Read the dataset as materialized record batches.
+#
+#         Parameters
+#         ----------
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         record_batches : iterator of RecordBatch
+#         """
+#     def to_table(
+#         self,
+#         columns: list[str] | dict[str, Expression] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> lib.Table:
+#         """
+#         Read the dataset to an Arrow table.
+#
+#         Note that this method reads all the selected data from the dataset
+#         into memory.
+#
+#         Parameters
+#         ----------
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         table : Table
+#         """
+#     def take(
+#         self,
+#         indices: Indices,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> lib.Table:
+#         """
+#         Select rows of data by index.
+#
+#         Parameters
+#         ----------
+#         indices : Array or array-like
+#             indices of rows to select in the dataset.
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         table : Table
+#         """
+#     def head(
+#         self,
+#         num_rows: int,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> lib.Table:
+#         """
+#         Load the first N rows of the dataset.
+#
+#         Parameters
+#         ----------
+#         num_rows : int
+#             The number of rows to load.
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         table : Table
+#         """
+#     def count_rows(
+#         self,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> int:
+#         """
+#         Count rows matching the scanner filter.
+#
+#         Parameters
+#         ----------
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         count : int
+#         """
+#     @property
+#     def schema(self) -> lib.Schema:
+#         """The common schema of the full Dataset"""
+#     def filter(self, expression: Expression) -> Self:
+#         """
+#         Apply a row filter to the dataset.
+#
+#         Parameters
+#         ----------
+#         expression : Expression
+#             The filter that should be applied to the dataset.
+#
+#         Returns
+#         -------
+#         Dataset
+#         """
+#     def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> InMemoryDataset:
+#         """
+#         Sort the Dataset by one or multiple columns.
+#
+#         Parameters
+#         ----------
+#         sorting : str or list[tuple(name, order)]
+#             Name of the column to use to sort (ascending), or
+#             a list of multiple sorting conditions where
+#             each entry is a tuple with column name
+#             and sorting order ("ascending" or "descending")
+#         **kwargs : dict, optional
+#             Additional sorting options.
+#             As allowed by :class:`SortOptions`
+#
+#         Returns
+#         -------
+#         InMemoryDataset
+#             A new dataset sorted according to the sort keys.
+#         """
+#     def join(
+#         self,
+#         right_dataset: Dataset,
+#         keys: str | list[str],
+#         right_keys: str | list[str] | None = None,
+#         join_type: JoinType = "left outer",
+#         left_suffix: str | None = None,
+#         right_suffix: str | None = None,
+#         coalesce_keys: bool = True,
+#         use_threads: bool = True,
+#     ) -> InMemoryDataset:
+#         """
+#         Perform a join between this dataset and another one.
+#
+#         Result of the join will be a new dataset, where further
+#         operations can be applied.
+#
+#         Parameters
+#         ----------
+#         right_dataset : dataset
+#             The dataset to join to the current one, acting as the right dataset
+#             in the join operation.
+#         keys : str or list[str]
+#             The columns from current dataset that should be used as keys
+#             of the join operation left side.
+#         right_keys : str or list[str], default None
+#             The columns from the right_dataset that should be used as keys
+#             on the join operation right side.
+#             When ``None`` use the same key names as the left dataset.
+#         join_type : str, default "left outer"
+#             The kind of join that should be performed, one of
+#             ("left semi", "right semi", "left anti", "right anti",
+#             "inner", "left outer", "right outer", "full outer")
+#         left_suffix : str, default None
+#             Which suffix to add to right column names. This prevents confusion
+#             when the columns in left and right datasets have colliding names.
+#         right_suffix : str, default None
+#             Which suffix to add to the left column names. This prevents confusion
+#             when the columns in left and right datasets have colliding names.
+#         coalesce_keys : bool, default True
+#             If the duplicated keys should be omitted from one of the sides
+#             in the join result.
+#         use_threads : bool, default True
+#             Whenever to use multithreading or not.
+#
+#         Returns
+#         -------
+#         InMemoryDataset
+#         """
+#     def join_asof(
+#         self,
+#         right_dataset: Dataset,
+#         on: str,
+#         by: str | list[str],
+#         tolerance: int,
+#         right_on: str | list[str] | None = None,
+#         right_by: str | list[str] | None = None,
+#     ) -> InMemoryDataset:
+#         """
+#         Perform an asof join between this dataset and another one.
+#
+#         This is similar to a left-join except that we match on nearest key rather
+#         than equal keys. Both datasets must be sorted by the key. This type of join
+#         is most useful for time series data that are not perfectly aligned.
+#
+#         Optionally match on equivalent keys with "by" before searching with "on".
+#
+#         Result of the join will be a new Dataset, where further
+#         operations can be applied.
+#
+#         Parameters
+#         ----------
+#         right_dataset : dataset
+#             The dataset to join to the current one, acting as the right dataset
+#             in the join operation.
+#         on : str
+#             The column from current dataset that should be used as the "on" key
+#             of the join operation left side.
+#
+#             An inexact match is used on the "on" key, i.e. a row is considered a
+#             match if and only if left_on - tolerance <= right_on <= left_on.
+#
+#             The input table must be sorted by the "on" key. Must be a single
+#             field of a common type.
+#
+#             Currently, the "on" key must be an integer, date, or timestamp type.
+#         by : str or list[str]
+#             The columns from current dataset that should be used as the keys
+#             of the join operation left side. The join operation is then done
+#             only for the matches in these columns.
+#         tolerance : int
+#             The tolerance for inexact "on" key matching. A right row is considered
+#             a match with the left row `right.on - left.on <= tolerance`. The
+#             `tolerance` may be:
+#
+#             - negative, in which case a past-as-of-join occurs;
+#             - or positive, in which case a future-as-of-join occurs;
+#             - or zero, in which case an exact-as-of-join occurs.
+#
+#             The tolerance is interpreted in the same units as the "on" key.
+#         right_on : str or list[str], default None
+#             The columns from the right_dataset that should be used as the on key
+#             on the join operation right side.
+#             When ``None`` use the same key name as the left dataset.
+#         right_by : str or list[str], default None
+#             The columns from the right_dataset that should be used as by keys
+#             on the join operation right side.
+#             When ``None`` use the same key names as the left dataset.
+#
+#         Returns
+#         -------
+#         InMemoryDataset
+#         """
+#
+# class InMemoryDataset(Dataset):
+#     """
+#     A Dataset wrapping in-memory data.
+#
+#     Parameters
+#     ----------
+#     source : RecordBatch, Table, list, tuple
+#         The data for this dataset. Can be a RecordBatch, Table, list of
+#         RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
+#         If an iterable is provided, the schema must also be provided.
+#     schema : Schema, optional
+#         Only required if passing an iterable as the source
+#     """
+#
+# class UnionDataset(Dataset):
+#     """
+#     A Dataset wrapping child datasets.
+#
+#     Children's schemas must agree with the provided schema.
+#
+#     Parameters
+#     ----------
+#     schema : Schema
+#         A known schema to conform to.
+#     children : list of Dataset
+#         One or more input children
+#     """
+#
+#     @property
+#     def children(self) -> list[Dataset]: ...
+#
+# class FileSystemDataset(Dataset):
+#     """
+#     A Dataset of file fragments.
+#
+#     A FileSystemDataset is composed of one or more FileFragment.
+#
+#     Parameters
+#     ----------
+#     fragments : list[Fragments]
+#         List of fragments to consume.
+#     schema : Schema
+#         The top-level schema of the Dataset.
+#     format : FileFormat
+#         File format of the fragments, currently only ParquetFileFormat,
+#         IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
+#     filesystem : FileSystem
+#         FileSystem of the fragments.
+#     root_partition : Expression, optional
+#         The top-level partition of the DataDataset.
+#     """
+#
+#     def __init__(
+#         self,
+#         fragments: list[Fragment],
+#         schema: lib.Schema,
+#         format: FileFormat,
+#         filesystem: SupportedFileSystem | None = None,
+#         root_partition: Expression | None = None,
+#     ) -> None: ...
+#     @classmethod
+#     def from_paths(
+#         cls,
+#         paths: list[str],
+#         schema: lib.Schema | None = None,
+#         format: FileFormat | None = None,
+#         filesystem: SupportedFileSystem | None = None,
+#         partitions: list[Expression] | None = None,
+#         root_partition: Expression | None = None,
+#     ) -> FileSystemDataset:
+#         """
+#         A Dataset created from a list of paths on a particular filesystem.
+#
+#         Parameters
+#         ----------
+#         paths : list of str
+#             List of file paths to create the fragments from.
+#         schema : Schema
+#             The top-level schema of the DataDataset.
+#         format : FileFormat
+#             File format to create fragments from, currently only
+#             ParquetFileFormat, IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
+#         filesystem : FileSystem
+#             The filesystem which files are from.
+#         partitions : list[Expression], optional
+#             Attach additional partition information for the file paths.
+#         root_partition : Expression, optional
+#             The top-level partition of the DataDataset.
+#         """
+#     @property
+#     def filesystem(self) -> FileSystem: ...
+#     @property
+#     def partitioning(self) -> Partitioning | None:
+#         """
+#         The partitioning of the Dataset source, if discovered.
+#
+#         If the FileSystemDataset is created using the ``dataset()`` factory
+#         function with a partitioning specified, this will return the
+#         finalized Partitioning object from the dataset discovery. In all
+#         other cases, this returns None.
+#         """
+#     @property
+#     def files(self) -> list[str]:
+#         """List of the files"""
+#     @property
+#     def format(self) -> FileFormat:
+#         """The FileFormat of this source."""
+#
+# class FileWriteOptions(lib._Weakrefable):
+#     @property
+#     def format(self) -> FileFormat: ...
+#
+# class FileFormat(lib._Weakrefable):
+#     def inspect(
+#         self, file: StrPath | IO, filesystem: SupportedFileSystem | None = None
+#     ) -> lib.Schema:
+#         """
+#         Infer the schema of a file.
+#
+#         Parameters
+#         ----------
+#         file : file-like object, path-like or str
+#             The file or file path to infer a schema from.
+#         filesystem : Filesystem, optional
+#             If `filesystem` is given, `file` must be a string and specifies
+#             the path of the file to read from the filesystem.
+#
+#         Returns
+#         -------
+#         schema : Schema
+#             The schema inferred from the file
+#         """
+#     def make_fragment(
+#         self,
+#         file: StrPath | IO,
+#         filesystem: SupportedFileSystem | None = None,
+#         partition_expression: Expression | None = None,
+#         *,
+#         file_size: int | None = None,
+#     ) -> Fragment:
+#         """
+#         Make a FileFragment from a given file.
+#
+#         Parameters
+#         ----------
+#         file : file-like object, path-like or str
+#             The file or file path to make a fragment from.
+#         filesystem : Filesystem, optional
+#             If `filesystem` is given, `file` must be a string and specifies
+#             the path of the file to read from the filesystem.
+#         partition_expression : Expression, optional
+#             An expression that is guaranteed true for all rows in the fragment.  Allows
+#             fragment to be potentially skipped while scanning with a filter.
+#         file_size : int, optional
+#             The size of the file in bytes. Can improve performance with high-latency filesystems
+#             when file size needs to be known before reading.
+#
+#         Returns
+#         -------
+#         fragment : Fragment
+#             The file fragment
+#         """
+#     def make_write_options(self) -> FileWriteOptions: ...
+#     @property
+#     def default_extname(self) -> str: ...
+#     @property
+#     def default_fragment_scan_options(self) -> FragmentScanOptions: ...
+#     @default_fragment_scan_options.setter
+#     def default_fragment_scan_options(self, options: FragmentScanOptions) -> None: ...
+#
+# class Fragment(lib._Weakrefable):
+#     """Fragment of data from a Dataset."""
+#     @property
+#     def physical_schema(self) -> lib.Schema:
+#         """Return the physical schema of this Fragment. This schema can be
+#         different from the dataset read schema."""
+#     @property
+#     def partition_expression(self) -> Expression:
+#         """An Expression which evaluates to true for all data viewed by this
+#         Fragment.
+#         """
+#     def scanner(
+#         self,
+#         schema: lib.Schema | None = None,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> Scanner:
+#         """
+#         Build a scan operation against the fragment.
+#
+#         Data is not loaded immediately. Instead, this produces a Scanner,
+#         which exposes further operations (e.g. loading all data as a
+#         table, counting rows).
+#
+#         Parameters
+#         ----------
+#         schema : Schema
+#             Schema to use for scanning. This is used to unify a Fragment to
+#             its Dataset's schema. If not specified this will use the
+#             Fragment's physical schema which might differ for each Fragment.
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         scanner : Scanner
+#         """
+#     def to_batches(
+#         self,
+#         schema: lib.Schema | None = None,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> Iterator[lib.RecordBatch]:
+#         """
+#         Read the fragment as materialized record batches.
+#
+#         Parameters
+#         ----------
+#         schema : Schema, optional
+#             Concrete schema to use for scanning.
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         record_batches : iterator of RecordBatch
+#         """
+#     def to_table(
+#         self,
+#         schema: lib.Schema | None = None,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> lib.Table:
+#         """
+#         Convert this Fragment into a Table.
+#
+#         Use this convenience utility with care. This will serially materialize
+#         the Scan result in memory before creating the Table.
+#
+#         Parameters
+#         ----------
+#         schema : Schema, optional
+#             Concrete schema to use for scanning.
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         table : Table
+#         """
+#     def take(
+#         self,
+#         indices: Indices,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> lib.Table:
+#         """
+#         Select rows of data by index.
+#
+#         Parameters
+#         ----------
+#         indices : Array or array-like
+#             The indices of row to select in the dataset.
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         Table
+#         """
+#     def head(
+#         self,
+#         num_rows: int,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> lib.Table:
+#         """
+#         Load the first N rows of the fragment.
+#
+#         Parameters
+#         ----------
+#         num_rows : int
+#             The number of rows to load.
+#         columns : list of str, default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         Table
+#         """
+#     def count_rows(
+#         self,
+#         columns: list[str] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> int:
+#         """
+#         Count rows matching the scanner filter.
+#
+#         Parameters
+#         ----------
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#
+#         Returns
+#         -------
+#         count : int
+#         """
+#
+# class FileFragment(Fragment):
+#     """A Fragment representing a data file."""
+#
+#     def open(self) -> lib.NativeFile:
+#         """
+#         Open a NativeFile of the buffer or file viewed by this fragment.
+#         """
+#     @property
+#     def path(self) -> str:
+#         """
+#         The path of the data file viewed by this fragment, if it views a
+#         file. If instead it views a buffer, this will be "<Buffer>".
+#         """
+#     @property
+#     def filesystem(self) -> FileSystem:
+#         """
+#         The FileSystem containing the data file viewed by this fragment, if
+#         it views a file. If instead it views a buffer, this will be None.
+#         """
+#     @property
+#     def buffer(self) -> lib.Buffer:
+#         """
+#         The buffer viewed by this fragment, if it views a buffer. If
+#         instead it views a file, this will be None.
+#         """
+#     @property
+#     def format(self) -> FileFormat:
+#         """
+#         The format of the data file viewed by this fragment.
+#         """
+#
+# class FragmentScanOptions(lib._Weakrefable):
+#     """Scan options specific to a particular fragment and scan operation."""
+#
+#     @property
+#     def type_name(self) -> str: ...
+#
+# class IpcFileWriteOptions(FileWriteOptions):
+#     @property
+#     def write_options(self) -> IpcWriteOptions: ...
+#     @write_options.setter
+#     def write_options(self, write_options: IpcWriteOptions) -> None: ...
+#
+# class IpcFileFormat(FileFormat):
+#     def equals(self, other: IpcFileFormat) -> bool: ...
+#     def make_write_options(self, **kwargs) -> IpcFileWriteOptions: ...
+#     @property
+#     def default_extname(self) -> str: ...
+#
+# class FeatherFileFormat(IpcFileFormat): ...
+#
+# class CsvFileFormat(FileFormat):
+#     """
+#     FileFormat for CSV files.
+#
+#     Parameters
+#     ----------
+#     parse_options : pyarrow.csv.ParseOptions
+#         Options regarding CSV parsing.
+#     default_fragment_scan_options : CsvFragmentScanOptions
+#         Default options for fragments scan.
+#     convert_options : pyarrow.csv.ConvertOptions
+#         Options regarding value conversion.
+#     read_options : pyarrow.csv.ReadOptions
+#         General read options.
+#     """
+#     def __init__(
+#         self,
+#         parse_options: _csv.ParseOptions | None = None,
+#         default_fragment_scan_options: CsvFragmentScanOptions | None = None,
+#         convert_options: _csv.ConvertOptions | None = None,
+#         read_options: _csv.ReadOptions | None = None,
+#     ) -> None: ...
+#     def make_write_options(self) -> _csv.WriteOptions: ...  # type: ignore[override]
+#     @property
+#     def parse_options(self) -> _csv.ParseOptions: ...
+#     @parse_options.setter
+#     def parse_options(self, parse_options: _csv.ParseOptions) -> None: ...
+#     def equals(self, other: CsvFileFormat) -> bool: ...
+#
+# class CsvFragmentScanOptions(FragmentScanOptions):
+#     """
+#     Scan-specific options for CSV fragments.
+#
+#     Parameters
+#     ----------
+#     convert_options : pyarrow.csv.ConvertOptions
+#         Options regarding value conversion.
+#     read_options : pyarrow.csv.ReadOptions
+#         General read options.
+#     """
+#
+#     convert_options: _csv.ConvertOptions
+#     read_options: _csv.ReadOptions
+#
+#     def __init__(
+#         self, convert_options: _csv.ConvertOptions, read_options: _csv.ReadOptions
+#     ) -> None: ...
+#     def equals(self, other: CsvFragmentScanOptions) -> bool: ...
+#
+# class CsvFileWriteOptions(FileWriteOptions):
+#     write_options: _csv.WriteOptions
+#
+# class JsonFileFormat(FileFormat):
+#     """
+#     FileFormat for JSON files.
+#
+#     Parameters
+#     ----------
+#     default_fragment_scan_options : JsonFragmentScanOptions
+#         Default options for fragments scan.
+#     parse_options : pyarrow.json.ParseOptions
+#         Options regarding json parsing.
+#     read_options : pyarrow.json.ReadOptions
+#         General read options.
+#     """
+#     def __init__(
+#         self,
+#         default_fragment_scan_options: JsonFragmentScanOptions | None = None,
+#         parse_options: _json.ParseOptions | None = None,
+#         read_options: _json.ReadOptions | None = None,
+#     ) -> None: ...
+#     def equals(self, other: JsonFileFormat) -> bool: ...
+#
+# class JsonFragmentScanOptions(FragmentScanOptions):
+#     """
+#     Scan-specific options for JSON fragments.
+#
+#     Parameters
+#     ----------
+#     parse_options : pyarrow.json.ParseOptions
+#         Options regarding JSON parsing.
+#     read_options : pyarrow.json.ReadOptions
+#         General read options.
+#     """
+#
+#     parse_options: _json.ParseOptions
+#     read_options: _json.ReadOptions
+#     def __init__(
+#         self, parse_options: _json.ParseOptions, read_options: _json.ReadOptions
+#     ) -> None: ...
+#     def equals(self, other: JsonFragmentScanOptions) -> bool: ...
+#
+# class Partitioning(lib._Weakrefable):
+#     def parse(self, path: str) -> Expression:
+#         """
+#         Parse a path into a partition expression.
+#
+#         Parameters
+#         ----------
+#         path : str
+#
+#         Returns
+#         -------
+#         pyarrow.dataset.Expression
+#         """
+#     def format(self, expr: Expression) -> tuple[str, str]:
+#         """
+#         Convert a filter expression into a tuple of (directory, filename) using
+#         the current partitioning scheme
+#
+#         Parameters
+#         ----------
+#         expr : pyarrow.dataset.Expression
+#
+#         Returns
+#         -------
+#         tuple[str, str]
+#
+#         Examples
+#         --------
+#
+#         Specify the Schema for paths like "/2009/June":
+#
+#         >>> import pyarrow as pa
+#         >>> import pyarrow.dataset as ds
+#         >>> import pyarrow.compute as pc
+#         >>> part = ds.partitioning(pa.schema([("year", pa.int16()), ("month", pa.string())]))
+#         >>> part.format((pc.field("year") == 1862) & (pc.field("month") == "Jan"))
+#         ('1862/Jan', '')
+#         """
+#     @property
+#     def schema(self) -> lib.Schema:
+#         """The arrow Schema attached to the partitioning."""
+#
+# class PartitioningFactory(lib._Weakrefable):
+#     @property
+#     def type_name(self) -> str: ...
+#
+# class KeyValuePartitioning(Partitioning):
+#     @property
+#     def dictionaries(self) -> list[lib.Array | None]:
+#         """
+#         The unique values for each partition field, if available.
+#
+#         Those values are only available if the Partitioning object was
+#         created through dataset discovery from a PartitioningFactory, or
+#         if the dictionaries were manually specified in the constructor.
+#         If no dictionary field is available, this returns an empty list.
+#         """
+#
+# class DirectoryPartitioning(KeyValuePartitioning):
+#     """
+#     A Partitioning based on a specified Schema.
+#
+#     The DirectoryPartitioning expects one segment in the file path for each
+#     field in the schema (all fields are required to be present).
+#     For example given schema<year:int16, month:int8> the path "/2009/11" would
+#     be parsed to ("year"_ == 2009 and "month"_ == 11).
+#
+#     Parameters
+#     ----------
+#     schema : Schema
+#         The schema that describes the partitions present in the file path.
+#     dictionaries : dict[str, Array]
+#         If the type of any field of `schema` is a dictionary type, the
+#         corresponding entry of `dictionaries` must be an array containing
+#         every value which may be taken by the corresponding column or an
+#         error will be raised in parsing.
+#     segment_encoding : str, default "uri"
+#         After splitting paths into segments, decode the segments. Valid
+#         values are "uri" (URI-decode segments) and "none" (leave as-is).
+#
+#     Returns
+#     -------
+#     DirectoryPartitioning
+#
+#     Examples
+#     --------
+#     >>> from pyarrow.dataset import DirectoryPartitioning
+#     >>> partitioning = DirectoryPartitioning(
+#     ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
+#     ... )
+#     >>> print(partitioning.parse("/2009/11/"))
+#     ((year == 2009) and (month == 11))
+#     """
+#
+#     @staticmethod
+#     def discover(
+#         field_names: list[str] | None = None,
+#         infer_dictionary: bool = False,
+#         max_partition_dictionary_size: int = 0,
+#         schema: lib.Schema | None = None,
+#         segment_encoding: Literal["uri", "none"] = "uri",
+#     ) -> PartitioningFactory:
+#         """
+#         Discover a DirectoryPartitioning.
+#
+#         Parameters
+#         ----------
+#         field_names : list of str
+#             The names to associate with the values from the subdirectory names.
+#             If schema is given, will be populated from the schema.
+#         infer_dictionary : bool, default False
+#             When inferring a schema for partition fields, yield dictionary
+#             encoded types instead of plain types. This can be more efficient
+#             when materializing virtual columns, and Expressions parsed by the
+#             finished Partitioning will include dictionaries of all unique
+#             inspected values for each field.
+#         max_partition_dictionary_size : int, default 0
+#             Synonymous with infer_dictionary for backwards compatibility with
+#             1.0: setting this to -1 or None is equivalent to passing
+#             infer_dictionary=True.
+#         schema : Schema, default None
+#             Use this schema instead of inferring a schema from partition
+#             values. Partition values will be validated against this schema
+#             before accumulation into the Partitioning's dictionary.
+#         segment_encoding : str, default "uri"
+#             After splitting paths into segments, decode the segments. Valid
+#             values are "uri" (URI-decode segments) and "none" (leave as-is).
+#
+#         Returns
+#         -------
+#         PartitioningFactory
+#             To be used in the FileSystemFactoryOptions.
+#         """
+#     def __init__(
+#         self,
+#         schema: lib.Schema,
+#         dictionaries: dict[str, lib.Array] | None = None,
+#         segment_encoding: Literal["uri", "none"] = "uri",
+#     ) -> None: ...
+#
+# class HivePartitioning(KeyValuePartitioning):
+#     """
+#     A Partitioning for "/$key=$value/" nested directories as found in
+#     Apache Hive.
+#
+#     Multi-level, directory based partitioning scheme originating from
+#     Apache Hive with all data files stored in the leaf directories. Data is
+#     partitioned by static values of a particular column in the schema.
+#     Partition keys are represented in the form $key=$value in directory names.
+#     Field order is ignored, as are missing or unrecognized field names.
+#
+#     For example, given schema<year:int16, month:int8, day:int8>, a possible
+#     path would be "/year=2009/month=11/day=15".
+#
+#     Parameters
+#     ----------
+#     schema : Schema
+#         The schema that describes the partitions present in the file path.
+#     dictionaries : dict[str, Array]
+#         If the type of any field of `schema` is a dictionary type, the
+#         corresponding entry of `dictionaries` must be an array containing
+#         every value which may be taken by the corresponding column or an
+#         error will be raised in parsing.
+#     null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
+#         If any field is None then this fallback will be used as a label
+#     segment_encoding : str, default "uri"
+#         After splitting paths into segments, decode the segments. Valid
+#         values are "uri" (URI-decode segments) and "none" (leave as-is).
+#
+#     Returns
+#     -------
+#     HivePartitioning
+#
+#     Examples
+#     --------
+#     >>> from pyarrow.dataset import HivePartitioning
+#     >>> partitioning = HivePartitioning(pa.schema([("year", pa.int16()), ("month", pa.int8())]))
+#     >>> print(partitioning.parse("/year=2009/month=11/"))
+#     ((year == 2009) and (month == 11))
+#
+#     """
+#     def __init__(
+#         self,
+#         schema: lib.Schema,
+#         dictionaries: dict[str, lib.Array] | None = None,
+#         null_fallback: str = "__HIVE_DEFAULT_PARTITION__",
+#         segment_encoding: Literal["uri", "none"] = "uri",
+#     ) -> None: ...
+#     @staticmethod
+#     def discover(
+#         infer_dictionary: bool = False,
+#         max_partition_dictionary_size: int = 0,
+#         null_fallback="__HIVE_DEFAULT_PARTITION__",
+#         schema: lib.Schema | None = None,
+#         segment_encoding: Literal["uri", "none"] = "uri",
+#     ) -> PartitioningFactory:
+#         """
+#         Discover a HivePartitioning.
+#
+#         Parameters
+#         ----------
+#         infer_dictionary : bool, default False
+#             When inferring a schema for partition fields, yield dictionary
+#             encoded types instead of plain. This can be more efficient when
+#             materializing virtual columns, and Expressions parsed by the
+#             finished Partitioning will include dictionaries of all unique
+#             inspected values for each field.
+#         max_partition_dictionary_size : int, default 0
+#             Synonymous with infer_dictionary for backwards compatibility with
+#             1.0: setting this to -1 or None is equivalent to passing
+#             infer_dictionary=True.
+#         null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
+#             When inferring a schema for partition fields this value will be
+#             replaced by null.  The default is set to __HIVE_DEFAULT_PARTITION__
+#             for compatibility with Spark
+#         schema : Schema, default None
+#             Use this schema instead of inferring a schema from partition
+#             values. Partition values will be validated against this schema
+#             before accumulation into the Partitioning's dictionary.
+#         segment_encoding : str, default "uri"
+#             After splitting paths into segments, decode the segments. Valid
+#             values are "uri" (URI-decode segments) and "none" (leave as-is).
+#
+#         Returns
+#         -------
+#         PartitioningFactory
+#             To be used in the FileSystemFactoryOptions.
+#         """
+#
+# class FilenamePartitioning(KeyValuePartitioning):
+#     """
+#     A Partitioning based on a specified Schema.
+#
+#     The FilenamePartitioning expects one segment in the file name for each
+#     field in the schema (all fields are required to be present) separated
+#     by '_'. For example given schema<year:int16, month:int8> the name
+#     ``"2009_11_"`` would be parsed to ("year" == 2009 and "month" == 11).
+#
+#     Parameters
+#     ----------
+#     schema : Schema
+#         The schema that describes the partitions present in the file path.
+#     dictionaries : dict[str, Array]
+#         If the type of any field of `schema` is a dictionary type, the
+#         corresponding entry of `dictionaries` must be an array containing
+#         every value which may be taken by the corresponding column or an
+#         error will be raised in parsing.
+#     segment_encoding : str, default "uri"
+#         After splitting paths into segments, decode the segments. Valid
+#         values are "uri" (URI-decode segments) and "none" (leave as-is).
+#
+#     Returns
+#     -------
+#     FilenamePartitioning
+#
+#     Examples
+#     --------
+#     >>> from pyarrow.dataset import FilenamePartitioning
+#     >>> partitioning = FilenamePartitioning(
+#     ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
+#     ... )
+#     >>> print(partitioning.parse("2009_11_data.parquet"))
+#     ((year == 2009) and (month == 11))
+#     """
+#
+#     def __init__(
+#         self,
+#         schema: lib.Schema,
+#         dictionaries: dict[str, lib.Array] | None = None,
+#         segment_encoding: Literal["uri", "none"] = "uri",
+#     ) -> None: ...
+#     @staticmethod
+#     def discover(
+#         field_names: list[str] | None = None,
+#         infer_dictionary: bool = False,
+#         schema: lib.Schema | None = None,
+#         segment_encoding: Literal["uri", "none"] = "uri",
+#     ) -> PartitioningFactory:
+#         """
+#         Discover a FilenamePartitioning.
+#
+#         Parameters
+#         ----------
+#         field_names : list of str
+#             The names to associate with the values from the subdirectory names.
+#             If schema is given, will be populated from the schema.
+#         infer_dictionary : bool, default False
+#             When inferring a schema for partition fields, yield dictionary
+#             encoded types instead of plain types. This can be more efficient
+#             when materializing virtual columns, and Expressions parsed by the
+#             finished Partitioning will include dictionaries of all unique
+#             inspected values for each field.
+#         schema : Schema, default None
+#             Use this schema instead of inferring a schema from partition
+#             values. Partition values will be validated against this schema
+#             before accumulation into the Partitioning's dictionary.
+#         segment_encoding : str, default "uri"
+#             After splitting paths into segments, decode the segments. Valid
+#             values are "uri" (URI-decode segments) and "none" (leave as-is).
+#
+#         Returns
+#         -------
+#         PartitioningFactory
+#             To be used in the FileSystemFactoryOptions.
+#         """
+#
+# class DatasetFactory(lib._Weakrefable):
+#     """
+#     DatasetFactory is used to create a Dataset, inspect the Schema
+#     of the fragments contained in it, and declare a partitioning.
+#     """
+#
+#     root_partition: Expression
+#     def finish(self, schema: lib.Schema | None = None) -> Dataset:
+#         """
+#         Create a Dataset using the inspected schema or an explicit schema
+#         (if given).
+#
+#         Parameters
+#         ----------
+#         schema : Schema, default None
+#             The schema to conform the source to.  If None, the inspected
+#             schema is used.
+#
+#         Returns
+#         -------
+#         Dataset
+#         """
+#     def inspect(self) -> lib.Schema:
+#         """
+#         Inspect all data fragments and return a common Schema.
+#
+#         Returns
+#         -------
+#         Schema
+#         """
+#     def inspect_schemas(self) -> list[lib.Schema]: ...
+#
+# class FileSystemFactoryOptions(lib._Weakrefable):
+#     """
+#     Influences the discovery of filesystem paths.
+#
+#     Parameters
+#     ----------
+#     partition_base_dir : str, optional
+#         For the purposes of applying the partitioning, paths will be
+#         stripped of the partition_base_dir. Files not matching the
+#         partition_base_dir prefix will be skipped for partitioning discovery.
+#         The ignored files will still be part of the Dataset, but will not
+#         have partition information.
+#     partitioning : Partitioning/PartitioningFactory, optional
+#        Apply the Partitioning to every discovered Fragment. See Partitioning or
+#        PartitioningFactory documentation.
+#     exclude_invalid_files : bool, optional (default True)
+#         If True, invalid files will be excluded (file format specific check).
+#         This will incur IO for each files in a serial and single threaded
+#         fashion. Disabling this feature will skip the IO, but unsupported
+#         files may be present in the Dataset (resulting in an error at scan
+#         time).
+#     selector_ignore_prefixes : list, optional
+#         When discovering from a Selector (and not from an explicit file list),
+#         ignore files and directories matching any of these prefixes.
+#         By default this is ['.', '_'].
+#     """
+#
+#     partitioning: Partitioning
+#     partitioning_factory: PartitioningFactory
+#     partition_base_dir: str
+#     exclude_invalid_files: bool
+#     selector_ignore_prefixes: list[str]
+#
+#     def __init__(
+#         self,
+#         artition_base_dir: str | None = None,
+#         partitioning: Partitioning | PartitioningFactory | None = None,
+#         exclude_invalid_files: bool = True,
+#         selector_ignore_prefixes: list[str] | None = None,
+#     ) -> None: ...
+#
+# class FileSystemDatasetFactory(DatasetFactory):
+#     """
+#     Create a DatasetFactory from a list of paths with schema inspection.
+#
+#     Parameters
+#     ----------
+#     filesystem : pyarrow.fs.FileSystem
+#         Filesystem to discover.
+#     paths_or_selector : pyarrow.fs.FileSelector or list of path-likes
+#         Either a Selector object or a list of path-like objects.
+#     format : FileFormat
+#         Currently only ParquetFileFormat and IpcFileFormat are supported.
+#     options : FileSystemFactoryOptions, optional
+#         Various flags influencing the discovery of filesystem paths.
+#     """
+#
+#     def __init__(
+#         self,
+#         filesystem: SupportedFileSystem,
+#         paths_or_selector: FileSelector,
+#         format: FileFormat,
+#         options: FileSystemFactoryOptions | None = None,
+#     ) -> None: ...
+#
+# class UnionDatasetFactory(DatasetFactory):
+#     """
+#     Provides a way to inspect/discover a Dataset's expected schema before
+#     materialization.
+#
+#     Parameters
+#     ----------
+#     factories : list of DatasetFactory
+#     """
+#     def __init__(self, factories: list[DatasetFactory]) -> None: ...
+#
+# _RecordBatchT = TypeVar("_RecordBatchT", bound=lib.RecordBatch)
+#
+# class RecordBatchIterator(lib._Weakrefable, Generic[_RecordBatchT]):
+#     """An iterator over a sequence of record batches."""
+#     def __iter__(self) -> Self: ...
+#     def __next__(self) -> _RecordBatchT: ...
+#
+# class TaggedRecordBatch(NamedTuple):
+#     """
+#     A combination of a record batch and the fragment it came from.
+#
+#     Parameters
+#     ----------
+#     record_batch : RecordBatch
+#         The record batch.
+#     fragment : Fragment
+#         Fragment of the record batch.
+#     """
+#
+#     record_batch: lib.RecordBatch
+#     fragment: Fragment
+#
+# class TaggedRecordBatchIterator(lib._Weakrefable):
+#     """An iterator over a sequence of record batches with fragments."""
+#     def __iter__(self) -> Self: ...
+#     def __next__(self) -> TaggedRecordBatch: ...
+#
+# class Scanner(lib._Weakrefable):
+#     """A materialized scan operation with context and options bound.
+#
+#     A scanner is the class that glues the scan tasks, data fragments and data
+#     sources together.
+#     """
+#     @staticmethod
+#     def from_dataset(
+#         dataset: Dataset,
+#         *,
+#         columns: list[str] | dict[str, Expression] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> Scanner:
+#         """
+#         Create Scanner from Dataset,
+#
+#         Parameters
+#         ----------
+#         dataset : Dataset
+#             Dataset to scan.
+#         columns : list[str] or dict[str, Expression], default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#         """
+#     @staticmethod
+#     def from_fragment(
+#         fragment: Fragment,
+#         *,
+#         schema: lib.Schema | None = None,
+#         columns: list[str] | dict[str, Expression] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> Scanner:
+#         """
+#         Create Scanner from Fragment,
+#
+#         Parameters
+#         ----------
+#         fragment : Fragment
+#             fragment to scan.
+#         schema : Schema, optional
+#             The schema of the fragment.
+#         columns : list[str] or dict[str, Expression], default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#         """
+#     @overload
+#     @staticmethod
+#     def from_batches(
+#         source: Iterator[lib.RecordBatch],
+#         *,
+#         schema: lib.Schema,
+#         columns: list[str] | dict[str, Expression] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> Scanner: ...
+#     @overload
+#     @staticmethod
+#     def from_batches(
+#         source: RecordBatchReader,
+#         *,
+#         columns: list[str] | dict[str, Expression] | None = None,
+#         filter: Expression | None = None,
+#         batch_size: int = ...,
+#         batch_readahead: int = 16,
+#         fragment_readahead: int = 4,
+#         fragment_scan_options: FragmentScanOptions | None = None,
+#         use_threads: bool = True,
+#         cache_metadata: bool = True,
+#         memory_pool: lib.MemoryPool | None = None,
+#     ) -> Scanner: ...
+#     @staticmethod
+#     def from_batches(*args, **kwargs):
+#         """
+#         Create a Scanner from an iterator of batches.
+#
+#         This creates a scanner which can be used only once. It is
+#         intended to support writing a dataset (which takes a scanner)
+#         from a source which can be read only once (e.g. a
+#         RecordBatchReader or generator).
+#
+#         Parameters
+#         ----------
+#         source : Iterator or Arrow-compatible stream object
+#             The iterator of Batches. This can be a pyarrow RecordBatchReader,
+#             any object that implements the Arrow PyCapsule Protocol for
+#             streams, or an actual Python iterator of RecordBatches.
+#         schema : Schema
+#             The schema of the batches (required when passing a Python
+#             iterator).
+#         columns : list[str] or dict[str, Expression], default None
+#             The columns to project. This can be a list of column names to
+#             include (order and duplicates will be preserved), or a dictionary
+#             with {new_column_name: expression} values for more advanced
+#             projections.
+#
+#             The list of columns or expressions may use the special fields
+#             `__batch_index` (the index of the batch within the fragment),
+#             `__fragment_index` (the index of the fragment within the dataset),
+#             `__last_in_fragment` (whether the batch is last in fragment), and
+#             `__filename` (the name of the source file or a description of the
+#             source fragment).
+#
+#             The columns will be passed down to Datasets and corresponding data
+#             fragments to avoid loading, copying, and deserializing columns
+#             that will not be required further down the compute chain.
+#             By default all of the available columns are projected. Raises
+#             an exception if any of the referenced column names does not exist
+#             in the dataset's Schema.
+#         filter : Expression, default None
+#             Scan will return only the rows matching the filter.
+#             If possible the predicate will be pushed down to exploit the
+#             partition information or internal metadata found in the data
+#             source, e.g. Parquet statistics. Otherwise filters the loaded
+#             RecordBatches before yielding them.
+#         batch_size : int, default 131_072
+#             The maximum row count for scanned record batches. If scanned
+#             record batches are overflowing memory then this method can be
+#             called to reduce their size.
+#         batch_readahead : int, default 16
+#             The number of batches to read ahead in a file. This might not work
+#             for all file formats. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_readahead : int, default 4
+#             The number of files to read ahead. Increasing this number will increase
+#             RAM usage but could also improve IO utilization.
+#         fragment_scan_options : FragmentScanOptions, default None
+#             Options specific to a particular scan and fragment type, which
+#             can change between different scans of the same dataset.
+#         use_threads : bool, default True
+#             If enabled, then maximum parallelism will be used determined by
+#             the number of available CPU cores.
+#         cache_metadata : bool, default True
+#             If enabled, metadata may be cached when scanning to speed up
+#             repeated scans.
+#         memory_pool : MemoryPool, default None
+#             For memory allocations, if required. If not specified, uses the
+#             default pool.
+#         """
+#     @property
+#     def dataset_schema(self) -> lib.Schema:
+#         """The schema with which batches will be read from fragments."""
+#     @property
+#     def projected_schema(self) -> lib.Schema:
+#         """
+#         The materialized schema of the data, accounting for projections.
+#
+#         This is the schema of any data returned from the scanner.
+#         """
+#     def to_batches(self) -> Iterator[lib.RecordBatch]:
+#         """
+#         Consume a Scanner in record batches.
+#
+#         Returns
+#         -------
+#         record_batches : iterator of RecordBatch
+#         """
+#     def scan_batches(self) -> TaggedRecordBatchIterator:
+#         """
+#         Consume a Scanner in record batches with corresponding fragments.
+#
+#         Returns
+#         -------
+#         record_batches : iterator of TaggedRecordBatch
+#         """
+#     def to_table(self) -> lib.Table:
+#         """
+#         Convert a Scanner into a Table.
+#
+#         Use this convenience utility with care. This will serially materialize
+#         the Scan result in memory before creating the Table.
+#
+#         Returns
+#         -------
+#         Table
+#         """
+#     def take(self, indices: Indices) -> lib.Table:
+#         """
+#         Select rows of data by index.
+#
+#         Will only consume as many batches of the underlying dataset as
+#         needed. Otherwise, this is equivalent to
+#         ``to_table().take(indices)``.
+#
+#         Parameters
+#         ----------
+#         indices : Array or array-like
+#             indices of rows to select in the dataset.
+#
+#         Returns
+#         -------
+#         Table
+#         """
+#     def head(self, num_rows: int) -> lib.Table:
+#         """
+#         Load the first N rows of the dataset.
+#
+#         Parameters
+#         ----------
+#         num_rows : int
+#             The number of rows to load.
+#
+#         Returns
+#         -------
+#         Table
+#         """
+#     def count_rows(self) -> int:
+#         """
+#         Count rows matching the scanner filter.
+#
+#         Returns
+#         -------
+#         count : int
+#         """
+#     def to_reader(self) -> RecordBatchReader:
+#         """Consume this scanner as a RecordBatchReader.
+#
+#         Returns
+#         -------
+#         RecordBatchReader
+#         """
+#
+# def get_partition_keys(partition_expression: Expression) -> dict[str, Any]:
+#     """
+#     Extract partition keys (equality constraints between a field and a scalar)
+#     from an expression as a dict mapping the field's name to its value.
+#
+#     NB: All expressions yielded by a HivePartitioning or DirectoryPartitioning
+#     will be conjunctions of equality conditions and are accessible through this
+#     function. Other subexpressions will be ignored.
+#
+#     Parameters
+#     ----------
+#     partition_expression : pyarrow.dataset.Expression
+#
+#     Returns
+#     -------
+#     dict
+#
+#     Examples
+#     --------
+#
+#     For example, an expression of
+#     <pyarrow.dataset.Expression ((part == A:string) and (year == 2016:int32))>
+#     is converted to {'part': 'A', 'year': 2016}
+#     """
+#
+# class WrittenFile(lib._Weakrefable):
+#     """
+#     Metadata information about files written as
+#     part of a dataset write operation
+#
+#     Parameters
+#     ----------
+#     path : str
+#         Path to the file.
+#     metadata : pyarrow.parquet.FileMetaData, optional
+#         For Parquet files, the Parquet file metadata.
+#     size : int
+#         The size of the file in bytes.
+#     """
+#     def __init__(self, path: str, metadata: _parquet.FileMetaData | None, size: int) -> None: ...
+#
+# def _filesystemdataset_write(
+#     data: Scanner,
+#     base_dir: StrPath,
+#     basename_template: str,
+#     filesystem: SupportedFileSystem,
+#     partitioning: Partitioning,
+#     file_options: FileWriteOptions,
+#     max_partitions: int,
+#     file_visitor: Callable[[str], None],
+#     existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"],
+#     max_open_files: int,
+#     max_rows_per_file: int,
+#     min_rows_per_group: int,
+#     max_rows_per_group: int,
+#     create_dir: bool,
+# ): ...
+#
+# class _ScanNodeOptions(ExecNodeOptions):
+#     def _set_options(self, dataset: Dataset, scan_options: dict) -> None: ...
+#
+# class ScanNodeOptions(_ScanNodeOptions):
+#     """
+#     A Source node which yields batches from a Dataset scan.
+#
+#     This is the option class for the "scan" node factory.
+#
+#     This node is capable of applying pushdown projections or filters
+#     to the file readers which reduce the amount of data that needs to
+#     be read (if supported by the file format). But note that this does not
+#     construct associated filter or project nodes to perform the final
+#     filtering or projection. Rather, you may supply the same filter
+#     expression or projection to the scan node that you also supply
+#     to the filter or project node.
+#
+#     Yielded batches will be augmented with fragment/batch indices when
+#     implicit_ordering=True to enable stable ordering for simple ExecPlans.
+#
+#     Parameters
+#     ----------
+#     dataset : pyarrow.dataset.Dataset
+#         The table which acts as the data source.
+#     **kwargs : dict, optional
+#         Scan options. See `Scanner.from_dataset` for possible arguments.
+#     require_sequenced_output : bool, default False
+#         Batches are yielded sequentially, like single-threaded
+#     implicit_ordering : bool, default False
+#         Preserve implicit ordering of data.
+#     """
+#
+#     def __init__(
+#         self, dataset: Dataset, require_sequenced_output: bool = False, **kwargs
+#     ) -> None: ...
diff --git a/python/pyarrow-stubs/_ipc.pyi b/python/pyarrow-stubs/_ipc.pyi
new file mode 100644
index 00000000000..fc48cae3c04
--- /dev/null
+++ b/python/pyarrow-stubs/_ipc.pyi
@@ -0,0 +1,709 @@
+import enum
+import sys
+
+from io import IOBase
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Iterable, Iterator, Literal, Mapping, NamedTuple
+
+import pandas as pd
+
+from pyarrow._stubs_typing import SupportArrowStream, SupportPyBuffer
+from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakrefable
+
+from .io import Buffer, Codec, NativeFile
+from ._types import DictionaryMemo, KeyValueMetadata
+
+class MetadataVersion(enum.IntEnum):
+    V1 = enum.auto()
+    V2 = enum.auto()
+    V3 = enum.auto()
+    V4 = enum.auto()
+    V5 = enum.auto()
+
+class WriteStats(NamedTuple):
+    """IPC write statistics
+
+    Parameters
+    ----------
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
+    """
+
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+class ReadStats(NamedTuple):
+    """IPC read statistics
+
+    Parameters
+    ----------
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
+    """
+
+    num_messages: int
+    num_record_batches: int
+    num_dictionary_batches: int
+    num_dictionary_deltas: int
+    num_replaced_dictionaries: int
+
+class IpcReadOptions(_Weakrefable):
+    """
+    Serialization options for reading IPC format.
+
+    Parameters
+    ----------
+    ensure_native_endian : bool, default True
+        Whether to convert incoming data to platform-native endianness.
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like decompression
+    included_fields : list
+        If empty (the default), return all deserialized fields.
+        If non-empty, the values are the indices of fields to read on
+        the top-level schema
+    """
+
+    ensure_native_endian: bool
+    use_threads: bool
+    included_fields: list[int]
+    def __init__(
+        self,
+        *,
+        ensure_native_endian: bool = True,
+        use_threads: bool = True,
+        included_fields: list[int] | None = None,
+    ) -> None: ...
+
+class IpcWriteOptions(_Weakrefable):
+    """
+    Serialization options for the IPC format.
+
+    Parameters
+    ----------
+    metadata_version : MetadataVersion, default MetadataVersion.V5
+        The metadata version to write.  V5 is the current and latest,
+        V4 is the pre-1.0 metadata version (with incompatible Union layout).
+    allow_64bit : bool, default False
+        If true, allow field lengths that don't fit in a signed 32-bit int.
+    use_legacy_format : bool, default False
+        Whether to use the pre-Arrow 0.15 IPC format.
+    compression : str, Codec, or None
+        compression codec to use for record batch buffers.
+        If None then batch buffers will be uncompressed.
+        Must be "lz4", "zstd" or None.
+        To specify a compression_level use `pyarrow.Codec`
+    use_threads : bool
+        Whether to use the global CPU thread pool to parallelize any
+        computational tasks like compression.
+    emit_dictionary_deltas : bool
+        Whether to emit dictionary deltas.  Default is false for maximum
+        stream compatibility.
+    unify_dictionaries : bool
+        If true then calls to write_table will attempt to unify dictionaries
+        across all batches in the table.  This can help avoid the need for
+        replacement dictionaries (which the file format does not support)
+        but requires computing the unified dictionary and then remapping
+        the indices arrays.
+
+        This parameter is ignored when writing to the IPC stream format as
+        the IPC stream format can support replacement dictionaries.
+    """
+
+    metadata_version: MetadataVersion
+    allow_64bit: bool
+    use_legacy_format: bool
+    compression: Codec | Literal["lz4", "zstd"] | None
+    use_threads: bool
+    emit_dictionary_deltas: bool
+    unify_dictionaries: bool
+    def __init__(
+        self,
+        *,
+        metadata_version: MetadataVersion = MetadataVersion.V5,
+        allow_64bit: bool = False,
+        use_legacy_format: bool = False,
+        compression: Codec | Literal["lz4", "zstd"] | None = None,
+        use_threads: bool = True,
+        emit_dictionary_deltas: bool = False,
+        unify_dictionaries: bool = False,
+    ) -> None: ...
+
+class Message(_Weakrefable):
+    """
+    Container for an Arrow IPC message with metadata and optional body
+    """
+
+    @property
+    def type(self) -> str: ...
+    @property
+    def metadata(self) -> Buffer: ...
+    @property
+    def metadata_version(self) -> MetadataVersion: ...
+    @property
+    def body(self) -> Buffer | None: ...
+    def equals(self, other: Message) -> bool: ...
+    def serialize_to(
+        self, sink: NativeFile, alignment: int = 8, memory_pool: MemoryPool | None = None
+    ):
+        """
+        Write message to generic OutputStream
+
+        Parameters
+        ----------
+        sink : NativeFile
+        alignment : int, default 8
+            Byte alignment for metadata and body
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+        """
+    def serialize(self, alignment: int = 8, memory_pool: MemoryPool | None = None) -> Buffer:
+        """
+        Write message as encapsulated IPC message
+
+        Parameters
+        ----------
+        alignment : int, default 8
+            Byte alignment for metadata and body
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+
+        Returns
+        -------
+        serialized : Buffer
+        """
+
+class MessageReader(_Weakrefable):
+    """
+    Interface for reading Message objects from some source (like an
+    InputStream)
+    """
+    @classmethod
+    def open_stream(cls, source: bytes | NativeFile | IOBase | SupportPyBuffer) -> Self:
+        """
+        Open stream from source, if you want to use memory map use
+        MemoryMappedFile as source.
+
+        Parameters
+        ----------
+        source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
+            A readable source, like an InputStream
+        """
+    def __iter__(self) -> Self: ...
+    def read_next_message(self) -> Message:
+        """
+        Read next Message from the stream.
+
+        Raises
+        ------
+        StopIteration
+            At end of stream
+        """
+    __next__ = read_next_message
+
+# ----------------------------------------------------------------------
+# File and stream readers and writers
+
+class _CRecordBatchWriter(_Weakrefable):
+    """The base RecordBatchWriter wrapper.
+
+    Provides common implementations of convenience methods. Should not
+    be instantiated directly by user code.
+    """
+    def write(self, table_or_batch: Table | RecordBatch):
+        """
+        Write RecordBatch or Table to stream.
+
+        Parameters
+        ----------
+        table_or_batch : {RecordBatch, Table}
+        """
+    def write_batch(
+        self,
+        batch: RecordBatch,
+        custom_metadata: Mapping[bytes, bytes] | KeyValueMetadata | None = None,
+    ):
+        """
+        Write RecordBatch to stream.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+        custom_metadata : mapping or KeyValueMetadata
+            Keys and values must be string-like / coercible to bytes
+        """
+    def write_table(self, table: Table, max_chunksize: int | None = None) -> None:
+        """
+        Write Table to stream in (contiguous) RecordBatch objects.
+
+        Parameters
+        ----------
+        table : Table
+        max_chunksize : int, default None
+            Maximum number of rows for RecordBatch chunks. Individual chunks may
+            be smaller depending on the chunk layout of individual columns.
+        """
+    def close(self) -> None:
+        """
+        Close stream and write end-of-stream 0 marker.
+        """
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def stats(self) -> WriteStats:
+        """
+        Current IPC write statistics.
+        """
+
+class _RecordBatchStreamWriter(_CRecordBatchWriter):
+    @property
+    def _use_legacy_format(self) -> bool: ...
+    @property
+    def _metadata_version(self) -> MetadataVersion: ...
+    def _open(self, sink, schema: Schema, options: IpcWriteOptions = IpcWriteOptions()): ...
+
+
+class _ReadPandasMixin:
+    def read_pandas(self, **options) -> pd.DataFrame:
+        """
+        Read contents of stream to a pandas.DataFrame.
+
+        Read all record batches as a pyarrow.Table then convert it to a
+        pandas.DataFrame using Table.to_pandas.
+
+        Parameters
+        ----------
+        **options
+            Arguments to forward to :meth:`Table.to_pandas`.
+
+        Returns
+        -------
+        df : pandas.DataFrame
+        """
+
+class RecordBatchReader(_Weakrefable):
+    """Base class for reading stream of record batches.
+
+    Record batch readers function as iterators of record batches that also
+    provide the schema (without the need to get any batches).
+
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the
+    ``RecordBatchReader.from_*`` functions instead.
+
+    Notes
+    -----
+    To import and export using the Arrow C stream interface, use the
+    ``_import_from_c`` and ``_export_to_c`` methods. However, keep in mind this
+    interface is intended for expert users.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> schema = pa.schema([("x", pa.int64())])
+    >>> def iter_record_batches():
+    ...     for i in range(2):
+    ...         yield pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], schema=schema)
+    >>> reader = pa.RecordBatchReader.from_batches(schema, iter_record_batches())
+    >>> print(reader.schema)
+    x: int64
+    >>> for batch in reader:
+    ...     print(batch)
+    pyarrow.RecordBatch
+    x: int64
+    ----
+    x: [1,2,3]
+    pyarrow.RecordBatch
+    x: int64
+    ----
+    x: [1,2,3]
+    """
+
+    def __iter__(self) -> Self: ...
+    def read_next_batch(self) -> RecordBatch:
+        """
+        Read next RecordBatch from the stream.
+
+        Raises
+        ------
+        StopIteration:
+            At end of stream.
+
+        Returns
+        -------
+        RecordBatch
+        """
+    __next__ = read_next_batch
+    @property
+    def schema(self) -> Schema:
+        """
+        Shared schema of the record batches in the stream.
+
+        Returns
+        -------
+        Schema
+        """
+    def read_next_batch_with_custom_metadata(self) -> RecordBatchWithMetadata:
+        """
+        Read next RecordBatch from the stream along with its custom metadata.
+
+        Raises
+        ------
+        StopIteration:
+            At end of stream.
+
+        Returns
+        -------
+        batch : RecordBatch
+        custom_metadata : KeyValueMetadata
+        """
+    def iter_batches_with_custom_metadata(
+        self,
+    ) -> Iterator[RecordBatchWithMetadata]:
+        """
+        Iterate over record batches from the stream along with their custom
+        metadata.
+
+        Yields
+        ------
+        RecordBatchWithMetadata
+        """
+    def read_all(self) -> Table:
+        """
+        Read all record batches as a pyarrow.Table.
+
+        Returns
+        -------
+        Table
+        """
+    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    def close(self) -> None:
+        """
+        Release any resources associated with the reader.
+        """
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    def cast(self, target_schema: Schema) -> Self:
+        """
+        Wrap this reader with one that casts each batch lazily as it is pulled.
+        Currently only a safe cast to target_schema is implemented.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+    def _export_to_c(self, out_ptr: int) -> None:
+        """
+        Export to a C ArrowArrayStream struct, given its pointer.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArrayStream struct.
+
+        Be careful: if you don't pass the ArrowArrayStream struct to a
+        consumer, array memory will leak.  This is a low-level function
+        intended for expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int) -> Self:
+        """
+        Import RecordBatchReader from a C ArrowArrayStream struct,
+        given its pointer.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArrayStream struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export to a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+            A capsule containing a C ArrowArrayStream struct.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self:
+        """
+        Import RecordBatchReader from a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        stream: PyCapsule
+            A capsule containing a C ArrowArrayStream PyCapsule.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+    @classmethod
+    def from_stream(cls, data: SupportArrowStream, schema: Schema | None = None) -> Self:
+        """
+        Create RecordBatchReader from a Arrow-compatible stream object.
+
+        This accepts objects implementing the Arrow PyCapsule Protocol for
+        streams, i.e. objects that have a ``__arrow_c_stream__`` method.
+
+        Parameters
+        ----------
+        data : Arrow-compatible stream object
+            Any object that implements the Arrow PyCapsule Protocol for
+            streams.
+        schema : Schema, default None
+            The schema to which the stream should be casted, if supported
+            by the stream object.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+    @classmethod
+    def from_batches(cls, schema: Schema, batches: Iterable[RecordBatch]) -> Self:
+        """
+        Create RecordBatchReader from an iterable of batches.
+
+        Parameters
+        ----------
+        schema : Schema
+            The shared schema of the record batches
+        batches : Iterable[RecordBatch]
+            The batches that this reader will return.
+
+        Returns
+        -------
+        reader : RecordBatchReader
+        """
+
+class _RecordBatchStreamReader(RecordBatchReader):
+    @property
+    def stats(self) -> ReadStats:
+        """
+        Current IPC read statistics.
+        """
+
+class _RecordBatchFileWriter(_RecordBatchStreamWriter): ...
+
+class RecordBatchWithMetadata(NamedTuple):
+    """RecordBatch with its custom metadata
+
+    Parameters
+    ----------
+    batch : RecordBatch
+    custom_metadata : KeyValueMetadata
+    """
+
+    batch: RecordBatch
+    custom_metadata: KeyValueMetadata
+
+class _RecordBatchFileReader(_Weakrefable):
+    @property
+    def num_record_batches(self) -> int:
+        """
+        The number of record batches in the IPC file.
+        """
+    def get_batch(self, i: int) -> RecordBatch:
+        """
+        Read the record batch with the given index.
+
+        Parameters
+        ----------
+        i : int
+            The index of the record batch in the IPC file.
+
+        Returns
+        -------
+        batch : RecordBatch
+        """
+    get_record_batch = get_batch
+    def get_batch_with_custom_metadata(self, i: int) -> RecordBatchWithMetadata:
+        """
+        Read the record batch with the given index along with
+        its custom metadata
+
+        Parameters
+        ----------
+        i : int
+            The index of the record batch in the IPC file.
+
+        Returns
+        -------
+        batch : RecordBatch
+        custom_metadata : KeyValueMetadata
+        """
+    def read_all(self) -> Table:
+        """
+        Read all record batches as a pyarrow.Table
+        """
+    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_val, exc_tb): ...
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def stats(self) -> ReadStats: ...
+
+def get_tensor_size(tensor: Tensor) -> int:
+    """
+    Return total size of serialized Tensor including metadata and padding.
+
+    Parameters
+    ----------
+    tensor : Tensor
+        The tensor for which we want to known the size.
+    """
+
+def get_record_batch_size(batch: RecordBatch) -> int:
+    """
+    Return total size of serialized RecordBatch including metadata and padding.
+
+    Parameters
+    ----------
+    batch : RecordBatch
+        The recordbatch for which we want to know the size.
+    """
+
+def write_tensor(tensor: Tensor, dest: NativeFile) -> int:
+    """
+    Write pyarrow.Tensor to pyarrow.NativeFile object its current position.
+
+    Parameters
+    ----------
+    tensor : pyarrow.Tensor
+    dest : pyarrow.NativeFile
+
+    Returns
+    -------
+    bytes_written : int
+        Total number of bytes written to the file
+    """
+
+def read_tensor(source: NativeFile) -> Tensor:
+    """Read pyarrow.Tensor from pyarrow.NativeFile object from current
+    position. If the file source supports zero copy (e.g. a memory map), then
+    this operation does not allocate any memory. This function not assume that
+    the stream is aligned
+
+    Parameters
+    ----------
+    source : pyarrow.NativeFile
+
+    Returns
+    -------
+    tensor : Tensor
+
+    """
+
+def read_message(source: NativeFile | IOBase | SupportPyBuffer) -> Message:
+    """
+    Read length-prefixed message from file or buffer-like object
+
+    Parameters
+    ----------
+    source : pyarrow.NativeFile, file-like object, or buffer-like object
+
+    Returns
+    -------
+    message : Message
+    """
+
+def read_schema(obj: Buffer | Message, dictionary_memo: DictionaryMemo | None = None) -> Schema:
+    """
+    Read Schema from message or buffer
+
+    Parameters
+    ----------
+    obj : buffer or Message
+    dictionary_memo : DictionaryMemo, optional
+        Needed to be able to reconstruct dictionary-encoded fields
+        with read_record_batch
+
+    Returns
+    -------
+    schema : Schema
+    """
+
+def read_record_batch(
+    obj: Message | SupportPyBuffer, schema: Schema, dictionary_memo: DictionaryMemo | None = None
+) -> RecordBatch:
+    """
+    Read RecordBatch from message, given a known schema. If reading data from a
+    complete IPC stream, use ipc.open_stream instead
+
+    Parameters
+    ----------
+    obj : Message or Buffer-like
+    schema : Schema
+    dictionary_memo : DictionaryMemo, optional
+        If message contains dictionaries, must pass a populated
+        DictionaryMemo
+
+    Returns
+    -------
+    batch : RecordBatch
+    """
+
+__all__ = [
+    "MetadataVersion",
+    "WriteStats",
+    "ReadStats",
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "_CRecordBatchWriter",
+    "_RecordBatchStreamWriter",
+    "_ReadPandasMixin",
+    "RecordBatchReader",
+    "_RecordBatchStreamReader",
+    "_RecordBatchFileWriter",
+    "RecordBatchWithMetadata",
+    "_RecordBatchFileReader",
+    "get_tensor_size",
+    "get_record_batch_size",
+    "write_tensor",
+    "read_tensor",
+    "read_message",
+    "read_schema",
+    "read_record_batch",
+]
diff --git a/python/pyarrow-stubs/_types.pyi b/python/pyarrow-stubs/_types.pyi
index 32543d4b04b..6596fb3e1d1 100644
--- a/python/pyarrow-stubs/_types.pyi
+++ b/python/pyarrow-stubs/_types.pyi
@@ -32,14 +32,13 @@ import numpy as np
 import pandas as pd
 
 from pyarrow._stubs_typing import SupportArrowSchema
-# TODO
 from pyarrow.lib import (
     Array,
-    # ChunkedArray,
+    ChunkedArray,
     ExtensionArray,
     MemoryPool,
     MonthDayNano,
-    # Table,
+    Table,
 )
 from typing_extensions import TypeVar, deprecated
 
@@ -1151,9 +1150,7 @@ class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]):
     @property
     def value_type(self) -> _BasicValueT: ...
 
-# TODO: replace below with:
-# _StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
-_StorageT = TypeVar("_StorageT", bound=Array | Any)
+_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
 
 class BaseExtensionType(DataType):
     """
@@ -1438,6 +1435,7 @@ class OpaqueType(BaseExtensionType):
         The name of the external system.
         """
 
+# TODO
 # @deprecated(
 #     "This class is deprecated and its deserialization is disabled by default. "
 #     ":class:`ExtensionType` is recommended instead."
@@ -2063,9 +2061,7 @@ class Schema(_Weakrefable):
         >>> schema.metadata
         {b'n_legs': b'Number of legs per animal'}
         """
-    # TODO: replace below with:
-    # def empty_table(self) -> Table:
-    def empty_table(self) -> Any:
+    def empty_table(self) -> Table:
         """
         Provide an empty table according to the schema.
 
diff --git a/python/pyarrow-stubs/array.pyi b/python/pyarrow-stubs/array.pyi
index fcd9ec8f135..3027d689372 100644
--- a/python/pyarrow-stubs/array.pyi
+++ b/python/pyarrow-stubs/array.pyi
@@ -3310,4 +3310,5 @@ __all__ = [
     "FixedShapeTensorArray",
     "concat_arrays",
     "_empty_array",
+    "_CastAs",
 ]
diff --git a/python/pyarrow-stubs/compute.pyi b/python/pyarrow-stubs/compute.pyi
new file mode 100644
index 00000000000..5c816773c62
--- /dev/null
+++ b/python/pyarrow-stubs/compute.pyi
@@ -0,0 +1,6168 @@
+# ruff: noqa: I001
+from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence
+from collections.abc import Callable
+
+# Option classes
+from pyarrow._compute import ArraySortOptions as ArraySortOptions
+from pyarrow._compute import AssumeTimezoneOptions as AssumeTimezoneOptions
+from pyarrow._compute import CastOptions as CastOptions
+from pyarrow._compute import CountOptions as CountOptions
+from pyarrow._compute import CumulativeOptions as CumulativeOptions
+from pyarrow._compute import CumulativeSumOptions as CumulativeSumOptions
+from pyarrow._compute import DayOfWeekOptions as DayOfWeekOptions
+from pyarrow._compute import DictionaryEncodeOptions as DictionaryEncodeOptions
+from pyarrow._compute import ElementWiseAggregateOptions as ElementWiseAggregateOptions
+
+# Expressions
+from pyarrow._compute import Expression as Expression
+from pyarrow._compute import ExtractRegexOptions as ExtractRegexOptions
+from pyarrow._compute import ExtractRegexSpanOptions as ExtractRegexSpanOptions
+from pyarrow._compute import FilterOptions as FilterOptions
+from pyarrow._compute import Function as Function
+from pyarrow._compute import FunctionOptions as FunctionOptions
+from pyarrow._compute import FunctionRegistry as FunctionRegistry
+from pyarrow._compute import HashAggregateFunction as HashAggregateFunction
+from pyarrow._compute import HashAggregateKernel as HashAggregateKernel
+from pyarrow._compute import IndexOptions as IndexOptions
+from pyarrow._compute import JoinOptions as JoinOptions
+from pyarrow._compute import Kernel as Kernel
+from pyarrow._compute import ListFlattenOptions as ListFlattenOptions
+from pyarrow._compute import ListSliceOptions as ListSliceOptions
+from pyarrow._compute import MakeStructOptions as MakeStructOptions
+from pyarrow._compute import MapLookupOptions as MapLookupOptions
+from pyarrow._compute import MatchSubstringOptions as MatchSubstringOptions
+from pyarrow._compute import ModeOptions as ModeOptions
+from pyarrow._compute import NullOptions as NullOptions
+from pyarrow._compute import PadOptions as PadOptions
+from pyarrow._compute import PairwiseOptions as PairwiseOptions
+from pyarrow._compute import PartitionNthOptions as PartitionNthOptions
+from pyarrow._compute import PivotWiderOptions as PivotWiderOptions
+from pyarrow._compute import QuantileOptions as QuantileOptions
+from pyarrow._compute import RandomOptions as RandomOptions
+from pyarrow._compute import RankOptions as RankOptions
+from pyarrow._compute import RankQuantileOptions as RankQuantileOptions
+from pyarrow._compute import ReplaceSliceOptions as ReplaceSliceOptions
+from pyarrow._compute import ReplaceSubstringOptions as ReplaceSubstringOptions
+from pyarrow._compute import RoundBinaryOptions as RoundBinaryOptions
+from pyarrow._compute import RoundOptions as RoundOptions
+from pyarrow._compute import RoundTemporalOptions as RoundTemporalOptions
+from pyarrow._compute import RoundToMultipleOptions as RoundToMultipleOptions
+from pyarrow._compute import RunEndEncodeOptions as RunEndEncodeOptions
+from pyarrow._compute import ScalarAggregateFunction as ScalarAggregateFunction
+from pyarrow._compute import ScalarAggregateKernel as ScalarAggregateKernel
+from pyarrow._compute import ScalarAggregateOptions as ScalarAggregateOptions
+from pyarrow._compute import ScalarFunction as ScalarFunction
+from pyarrow._compute import ScalarKernel as ScalarKernel
+from pyarrow._compute import SelectKOptions as SelectKOptions
+from pyarrow._compute import SetLookupOptions as SetLookupOptions
+from pyarrow._compute import SkewOptions as SkewOptions
+from pyarrow._compute import SliceOptions as SliceOptions
+from pyarrow._compute import SortOptions as SortOptions
+from pyarrow._compute import SplitOptions as SplitOptions
+from pyarrow._compute import SplitPatternOptions as SplitPatternOptions
+from pyarrow._compute import StrftimeOptions as StrftimeOptions
+from pyarrow._compute import StrptimeOptions as StrptimeOptions
+from pyarrow._compute import StructFieldOptions as StructFieldOptions
+from pyarrow._compute import TakeOptions as TakeOptions
+from pyarrow._compute import TDigestOptions as TDigestOptions
+from pyarrow._compute import TrimOptions as TrimOptions
+from pyarrow._compute import UdfContext as UdfContext
+from pyarrow._compute import Utf8NormalizeOptions as Utf8NormalizeOptions
+from pyarrow._compute import VarianceOptions as VarianceOptions
+from pyarrow._compute import VectorFunction as VectorFunction
+from pyarrow._compute import VectorKernel as VectorKernel
+from pyarrow._compute import WeekOptions as WeekOptions
+from pyarrow._compute import WinsorizeOptions as WinsorizeOptions
+
+# Functions
+from pyarrow._compute import call_function as call_function
+
+# Udf
+from pyarrow._compute import call_tabular_function as call_tabular_function
+from pyarrow._compute import function_registry as function_registry
+from pyarrow._compute import get_function as get_function
+from pyarrow._compute import list_functions as list_functions
+from pyarrow._compute import register_aggregate_function as register_aggregate_function
+from pyarrow._compute import register_scalar_function as register_scalar_function
+from pyarrow._compute import register_tabular_function as register_tabular_function
+from pyarrow._compute import register_vector_function as register_vector_function
+
+from pyarrow._compute import _Order, _Placement
+from pyarrow._stubs_typing import ArrayLike, ScalarLike
+from . import lib
+
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+
+def field(*name_or_index: str | tuple[str, ...] | int) -> Expression:
+    """Reference a column of the dataset.
+
+    Stores only the field's name. Type and other information is known only when
+    the expression is bound to a dataset having an explicit scheme.
+
+    Nested references are allowed by passing multiple names or a tuple of
+    names. For example ``('foo', 'bar')`` references the field named "bar"
+    inside the field named "foo".
+
+    Parameters
+    ----------
+    *name_or_index : string, multiple strings, tuple or int
+        The name or index of the (possibly nested) field the expression
+        references to.
+
+    Returns
+    -------
+    field_expr : Expression
+        Reference to the given field
+
+    Examples
+    --------
+    >>> import pyarrow.compute as pc
+    >>> pc.field("a")
+    <pyarrow.compute.Expression a>
+    >>> pc.field(1)
+    <pyarrow.compute.Expression FieldPath(1)>
+    >>> pc.field(("a", "b"))
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    >>> pc.field("a", "b")
+    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
+    """
+
+def scalar(value: bool | float | str) -> Expression:
+    """Expression representing a scalar value.
+
+    Creates an Expression object representing a scalar value that can be used
+    in compute expressions and predicates.
+
+    Parameters
+    ----------
+    value : bool, int, float or string
+        Python value of the scalar. This function accepts any value that can be
+        converted to a ``pyarrow.Scalar`` using ``pa.scalar()``.
+
+    Notes
+    -----
+    This function differs from ``pyarrow.scalar()`` in the following way:
+
+    * ``pyarrow.scalar()`` creates a ``pyarrow.Scalar`` object that represents
+      a single value in Arrow's memory model.
+    * ``pyarrow.compute.scalar()`` creates an ``Expression`` object representing
+      a scalar value that can be used in compute expressions, predicates, and
+      dataset filtering operations.
+
+    Returns
+    -------
+    scalar_expr : Expression
+        An Expression representing the scalar value
+    """
+
+def _clone_signature(f: Callable[_P, _R]) -> Callable[_P, _R]: ...
+
+# ============= compute functions =============
+_DataTypeT = TypeVar("_DataTypeT", bound=lib.DataType)
+_Scalar_CoT = TypeVar("_Scalar_CoT", bound=lib.Scalar, covariant=True)
+_ScalarT = TypeVar("_ScalarT", bound=lib.Scalar)
+_ArrayT = TypeVar("_ArrayT", bound=lib.Array | lib.ChunkedArray)
+_ScalarOrArrayT = TypeVar("_ScalarOrArrayT", bound=lib.Array | lib.Scalar | lib.ChunkedArray)
+ArrayOrChunkedArray: TypeAlias = lib.Array[_Scalar_CoT] | lib.ChunkedArray[_Scalar_CoT]
+ScalarOrArray: TypeAlias = ArrayOrChunkedArray[_Scalar_CoT] | _Scalar_CoT
+
+SignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.Int8Type]
+    | lib.Scalar[lib.Int16Type]
+    | lib.Scalar[lib.Int32Type]
+    | lib.Scalar[lib.Int64Type]
+)
+UnsignedIntegerScalar: TypeAlias = (
+    lib.Scalar[lib.UInt8Type]
+    | lib.Scalar[lib.UInt16Type]
+    | lib.Scalar[lib.Uint32Type]
+    | lib.Scalar[lib.UInt64Type]
+)
+IntegerScalar: TypeAlias = SignedIntegerScalar | UnsignedIntegerScalar
+FloatScalar: TypeAlias = (
+    lib.Scalar[lib.Float16Type] | lib.Scalar[lib.Float32Type] | lib.Scalar[lib.Float64Type]
+)
+DecimalScalar: TypeAlias = (
+    lib.Scalar[lib.Decimal32Type]
+    | lib.Scalar[lib.Decimal64Type]
+    | lib.Scalar[lib.Decimal128Type]
+    | lib.Scalar[lib.Decimal256Type]
+)
+NonFloatNumericScalar: TypeAlias = IntegerScalar | DecimalScalar
+NumericScalar: TypeAlias = IntegerScalar | FloatScalar | DecimalScalar
+BinaryScalar: TypeAlias = (
+    lib.Scalar[lib.BinaryType]
+    | lib.Scalar[lib.LargeBinaryType]
+    | lib.Scalar[lib.FixedSizeBinaryType]
+)
+StringScalar: TypeAlias = lib.Scalar[lib.StringType] | lib.Scalar[lib.LargeStringType]
+StringOrBinaryScalar: TypeAlias = StringScalar | BinaryScalar
+_ListScalar: TypeAlias = lib.ListViewScalar[_DataTypeT] | lib.FixedSizeListScalar[_DataTypeT, Any]
+_LargeListScalar: TypeAlias = lib.LargeListScalar[_DataTypeT] | lib.LargeListViewScalar[_DataTypeT]
+ListScalar: TypeAlias = (
+    lib.ListScalar[_DataTypeT] | _ListScalar[_DataTypeT] | _LargeListScalar[_DataTypeT]
+)
+TemporalScalar: TypeAlias = (
+    lib.Date32Scalar
+    | lib.Date64Scalar
+    | lib.Time32Scalar[Any]
+    | lib.Time64Scalar[Any]
+    | lib.TimestampScalar[Any]
+    | lib.DurationScalar[Any]
+    | lib.MonthDayNanoIntervalScalar
+)
+NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
+NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
+
+_NumericOrTemporalScalarT = TypeVar("_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar)
+NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT]
+_NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray)
+_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
+_NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar)
+NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar]
+_NumericOrDurationArrayT = TypeVar("_NumericOrDurationArrayT", bound=NumericOrDurationArray)
+NumericOrTemporalArray: TypeAlias = ArrayOrChunkedArray[_NumericOrTemporalScalarT]
+_NumericOrTemporalArrayT = TypeVar("_NumericOrTemporalArrayT", bound=NumericOrTemporalArray)
+BooleanArray: TypeAlias = ArrayOrChunkedArray[lib.BooleanScalar]
+_BooleanArrayT = TypeVar("_BooleanArrayT", bound=BooleanArray)
+IntegerArray: TypeAlias = ArrayOrChunkedArray[IntegerScalar]
+_FloatScalarT = TypeVar("_FloatScalarT", bound=FloatScalar)
+FloatArray: TypeAlias = ArrayOrChunkedArray[FloatScalar]
+_FloatArrayT = TypeVar("_FloatArrayT", bound=FloatArray)
+_StringScalarT = TypeVar("_StringScalarT", bound=StringScalar)
+StringArray: TypeAlias = ArrayOrChunkedArray[StringScalar]
+_StringArrayT = TypeVar("_StringArrayT", bound=StringArray)
+_BinaryScalarT = TypeVar("_BinaryScalarT", bound=BinaryScalar)
+BinaryArray: TypeAlias = ArrayOrChunkedArray[BinaryScalar]
+_BinaryArrayT = TypeVar("_BinaryArrayT", bound=BinaryArray)
+_StringOrBinaryScalarT = TypeVar("_StringOrBinaryScalarT", bound=StringOrBinaryScalar)
+StringOrBinaryArray: TypeAlias = StringArray | BinaryArray
+_StringOrBinaryArrayT = TypeVar("_StringOrBinaryArrayT", bound=StringOrBinaryArray)
+_TemporalScalarT = TypeVar("_TemporalScalarT", bound=TemporalScalar)
+TemporalArray: TypeAlias = ArrayOrChunkedArray[TemporalScalar]
+_TemporalArrayT = TypeVar("_TemporalArrayT", bound=TemporalArray)
+_ListArray: TypeAlias = ArrayOrChunkedArray[_ListScalar[_DataTypeT]]
+_LargeListArray: TypeAlias = ArrayOrChunkedArray[_LargeListScalar[_DataTypeT]]
+ListArray: TypeAlias = ArrayOrChunkedArray[ListScalar[_DataTypeT]]
+# =============================== 1. Aggregation ===============================
+
+# ========================= 1.1 functions =========================
+
+def all(
+    array: lib.BooleanScalar | BooleanArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar:
+    """
+    Test whether all elements in a boolean array evaluate to true.
+
+    Null values are ignored by default.
+    If the `skip_nulls` option is set to false, then Kleene logic is used.
+    See "kleene_and" for more details on Kleene logic.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+any = _clone_signature(all)
+"""
+Test whether any element in a boolean array evaluates to true.
+
+Null values are ignored by default.
+If the `skip_nulls` option is set to false, then Kleene logic is used.
+See "kleene_or" for more details on Kleene logic.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def approximate_median(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar:
+    """
+    Approximate median of a numeric array with T-Digest algorithm.
+
+    Nulls and NaNs are ignored.
+    A null scalar is returned if there is no valid data point.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def count(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar:
+    """
+    Count the number of null / non-null values.
+
+    By default, only non-null values are counted.
+    This can be changed through CountOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    mode : str, default "only_valid"
+        Which values to count in the input.
+        Accepted values are "only_valid", "only_null", "all".
+    options : pyarrow.compute.CountOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def count_distinct(
+    array: lib.Array | lib.ChunkedArray,
+    /,
+    mode: Literal["only_valid", "only_null", "all"] = "only_valid",
+    *,
+    options: CountOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar:
+    """
+    Count the number of unique values.
+
+    By default, only non-null values are counted.
+    This can be changed through CountOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    mode : str, default "only_valid"
+        Which values to count in the input.
+        Accepted values are "only_valid", "only_null", "all".
+    options : pyarrow.compute.CountOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def first(
+    array: lib.Array[_ScalarT] | lib.ChunkedArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT:
+    """
+    Compute the first value in each group.
+
+    Null values are ignored by default.
+    If skip_nulls = false, then this will return the first and last values
+    regardless if it is null
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def first_last(
+    array: lib.Array[Any] | lib.ChunkedArray[Any],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar:
+    """
+    Compute the first and last values of an array.
+
+    Null values are ignored by default.
+    If skip_nulls = false, then this will return the first and last values
+    regardless if it is null
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def index(
+    data: lib.Array[Any] | lib.ChunkedArray[Any],
+    value,
+    start: int | None = None,
+    end: int | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar:
+    """
+    Find the index of the first occurrence of a given value.
+
+    Parameters
+    ----------
+    data : Array-like
+    value : Scalar-like object
+        The value to search for.
+    start : int, optional
+    end : int, optional
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    index : int
+        the index, or -1 if not found
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"])
+    >>> pc.index(arr, "ipsum")
+    <pyarrow.Int64Scalar: 1>
+    >>> pc.index(arr, "ipsum", start=2)
+    <pyarrow.Int64Scalar: 5>
+    >>> pc.index(arr, "amet")
+    <pyarrow.Int64Scalar: -1>
+    """
+
+last = _clone_signature(first)
+"""
+Compute the first and last values of an array.
+
+Null values are ignored by default.
+If skip_nulls = false, then this will return the first and last values
+regardless if it is null
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+In [15]: print(pc.last.__doc__)
+Compute the first value in each group.
+
+Null values are ignored by default.
+If skip_nulls = false, then this will return the first and last values
+regardless if it is null
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+max = _clone_signature(first)
+"""
+Compute the minimum or maximum values of a numeric array.
+
+Null values are ignored by default.
+This can be changed through ScalarAggregateOptions.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+min = _clone_signature(first)
+"""
+Compute the minimum or maximum values of a numeric array.
+
+Null values are ignored by default.
+This can be changed through ScalarAggregateOptions.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+min_max = _clone_signature(first_last)
+"""
+Compute the minimum and maximum values of a numeric array.
+
+Null values are ignored by default.
+This can be changed through ScalarAggregateOptions.
+
+Parameters
+----------
+array : Array-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+min_count : int, default 1
+    Minimum number of non-null values in the input.  If the number
+    of non-null values is below `min_count`, the output is null.
+options : pyarrow.compute.ScalarAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def mean(
+    array: FloatScalar | FloatArray
+    | lib.NumericArray[lib.Scalar[Any]]
+    | lib.ChunkedArray[lib.Scalar[Any]]
+    | lib.Scalar[Any],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Scalar[Any]:
+    """
+    Compute the mean of a numeric array.
+
+    Null values are ignored by default. Minimum count of non-null
+    values can be set and null is returned if too few are present.
+    This can be changed through ScalarAggregateOptions.
+    The result is a double for integer and floating point arguments,
+    and a decimal with the same bit-width/precision/scale for decimal arguments.
+    For integers and floats, NaN is returned if min_count = 0 and
+    there are no values. For decimals, null is returned instead.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def mode(
+    array: NumericScalar | NumericArray,
+    /,
+    n: int = 1,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: ModeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructArray:
+    """
+    Compute the modal (most common) values of a numeric array.
+
+    Compute the n most common values and their respective occurrence counts.
+    The output has type `struct<mode: T, count: int64>`, where T is the
+    input type.
+    The results are ordered by descending `count` first, and ascending `mode`
+    when breaking ties.
+    Nulls are ignored.  If there are no non-null values in the array,
+    an empty array is returned.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    n : int, default 1
+        Number of distinct most-common values to return.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ModeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
+    >>> modes = pc.mode(arr, 2)
+    >>> modes[0]
+    <pyarrow.StructScalar: [('mode', 2), ('count', 5)]>
+    >>> modes[1]
+    <pyarrow.StructScalar: [('mode', 1), ('count', 2)]>
+    """
+
+def product(
+    array: _ScalarT | lib.NumericArray[_ScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarT:
+    """
+    Compute the product of values in a numeric array.
+
+    Null values are ignored by default. Minimum count of non-null
+    values can be set and null is returned if too few are present.
+    This can be changed through ScalarAggregateOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def quantile(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float = 0.5,
+    *,
+    interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: QuantileOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray:
+    """
+    Compute an array of quantiles of a numeric array or chunked array.
+
+    By default, 0.5 quantile (median) is returned.
+    If quantile lies between two data points, an interpolated value is
+    returned based on selected interpolation method.
+    Nulls and NaNs are ignored.
+    An array of nulls is returned if there is no valid data point.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to compute. All values must be in
+        [0, 1].
+    interpolation : str, default "linear"
+        How to break ties between competing data points for a given quantile.
+        Accepted values are:
+
+        - "linear": compute an interpolation
+        - "lower": always use the smallest of the two data points
+        - "higher": always use the largest of the two data points
+        - "nearest": select the data point that is closest to the quantile
+        - "midpoint": compute the (unweighted) mean of the two data points
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.QuantileOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def stddev(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    ddof: float = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar:
+    """
+    Calculate the standard deviation of a numeric array.
+
+    The number of degrees of freedom can be controlled using VarianceOptions.
+    By default (`ddof` = 0), the population standard deviation is calculated.
+    Nulls are ignored.  If there are not enough non-null values in the array
+    to satisfy `ddof`, null is returned.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    ddof : int, default 0
+        Number of degrees of freedom.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.VarianceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def sum(
+    array: _NumericScalarT | NumericArray[_NumericScalarT],
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT:
+    """
+    Compute the sum of a numeric array.
+
+    Null values are ignored by default. Minimum count of non-null
+    values can be set and null is returned if too few are present.
+    This can be changed through ScalarAggregateOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 1
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.ScalarAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def tdigest(
+    array: NumericScalar | NumericArray,
+    /,
+    q: float = 0.5,
+    *,
+    delta: int = 100,
+    buffer_size: int = 500,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: TDigestOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray:
+    """
+    Approximate quantiles of a numeric array with T-Digest algorithm.
+
+    By default, 0.5 quantile (median) is returned.
+    Nulls and NaNs are ignored.
+    An array of nulls is returned if there is no valid data point.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    q : double or sequence of double, default 0.5
+        Probability levels of the quantiles to approximate. All values must be
+        in [0, 1].
+    delta : int, default 100
+        Compression parameter for the T-digest algorithm.
+    buffer_size : int, default 500
+        Buffer size for the T-digest algorithm.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.TDigestOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+def variance(
+    array: NumericScalar | NumericArray,
+    /,
+    *,
+    ddof: int = 0,
+    skip_nulls: bool = True,
+    min_count: int = 0,
+    options: VarianceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleScalar:
+    """
+    Calculate the variance of a numeric array.
+
+    The number of degrees of freedom can be controlled using VarianceOptions.
+    By default (`ddof` = 0), the population variance is calculated.
+    Nulls are ignored.  If there are not enough non-null values in the array
+    to satisfy `ddof`, null is returned.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    ddof : int, default 0
+        Number of degrees of freedom.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    min_count : int, default 0
+        Minimum number of non-null values in the input.  If the number
+        of non-null values is below `min_count`, the output is null.
+    options : pyarrow.compute.VarianceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def top_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array:
+    """
+    Select the indices of the top-k ordered elements from array- or table-like
+    data.
+
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get top indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array
+        Indices of the top-k ordered elements
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.top_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      5,
+      4,
+      2
+    ]
+    """
+
+def bottom_k_unstable(
+    values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
+    k: int,
+    sort_keys: list | None = None,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Array:
+    """
+    Select the indices of the bottom-k ordered elements from
+    array- or table-like data.
+
+    This is a specialization for :func:`select_k_unstable`. Output is not
+    guaranteed to be stable.
+
+    Parameters
+    ----------
+    values : Array, ChunkedArray, RecordBatch, or Table
+        Data to sort and get bottom indices from.
+    k : int
+        The number of `k` elements to keep.
+    sort_keys : List-like
+        Column key names to order by when input is table-like data.
+    memory_pool : MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    Returns
+    -------
+    result : Array of indices
+        Indices of the bottom-k ordered elements
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+    >>> pc.bottom_k_unstable(arr, k=3)
+    <pyarrow.lib.UInt64Array object at ...>
+    [
+      0,
+      1,
+      2
+    ]
+    """
+
+# ========================= 2. Element-wise (“scalar”) functions =========================
+
+# ========================= 2.1 Arithmetic =========================
+def abs(
+    x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationT | _NumericOrDurationArrayT | Expression:
+    """
+    Calculate the absolute value of the argument element-wise.
+
+    Results will wrap around on integer overflow.
+    Use function "abs_checked" if you want overflow
+    to return an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+abs_checked = _clone_signature(abs)
+"""
+Calculate the absolute value of the argument element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "abs".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def add(
+    x: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
+    y: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression:
+    """
+    Add the arguments element-wise.
+
+    Results will wrap around on integer overflow.
+    Use function "add_checked" if you want overflow
+    to return an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+add_checked = _clone_signature(add)
+"""
+Add the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "add".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+
+"""
+
+def divide(
+    x: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
+    y: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression:
+    """
+    Divide the arguments element-wise.
+
+    Integer division by zero returns an error. However, integer overflow
+    wraps around, and floating-point division by zero returns an infinite.
+    Use function "divide_checked" if you want to get an error
+    in all the aforementioned cases.
+
+    Parameters
+    ----------
+    dividend : Array-like or scalar-like
+        Argument to compute function.
+    divisor : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+divide_checked = _clone_signature(divide)
+"""
+Divide the arguments element-wise.
+
+An error is returned when trying to divide by zero, or when
+integer overflow is encountered.
+
+Parameters
+----------
+dividend : Array-like or scalar-like
+    Argument to compute function.
+divisor : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def exp(
+    exponent: _FloatArrayT | ArrayOrChunkedArray[NonFloatNumericScalar] | _FloatScalarT | NonFloatNumericScalar | lib.DoubleScalar,
+        /, *, memory_pool: lib.MemoryPool | None = None
+) -> _FloatArrayT | lib.DoubleArray | _FloatScalarT | lib.DoubleScalar | Expression:
+    """
+    Compute Euler's number raised to the power of specified exponent, element-wise.
+
+    If exponent is null the result will be null.
+
+    Parameters
+    ----------
+    exponent : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+multiply = _clone_signature(add)
+"""
+Multiply the arguments element-wise.
+
+Results will wrap around on integer overflow.
+Use function "multiply_checked" if you want overflow
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+multiply_checked = _clone_signature(add)
+"""
+Multiply the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "multiply".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def negate(
+    x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericOrDurationT | _NumericOrDurationArrayT | Expression:
+    """
+    Negate the argument element-wise.
+
+    Results will wrap around on integer overflow.
+    Use function "negate_checked" if you want overflow
+    to return an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+negate_checked = _clone_signature(negate)
+"""
+Negate the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "negate".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def power(
+    base: _NumericScalarT | _NumericArrayT | Expression | _NumericArrayT | NumericScalar,
+    exponent: _NumericScalarT | _NumericArrayT | Expression | _NumericArrayT | NumericScalar,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | _NumericArrayT | Expression:
+    """
+    Raise arguments to power element-wise.
+
+    Integer to negative integer power returns an error. However, integer overflow
+    wraps around. If either base or exponent is null the result will be null.
+
+    Parameters
+    ----------
+    base : Array-like or scalar-like
+        Argument to compute function.
+    exponent : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+power_checked = _clone_signature(power)
+"""
+Raise arguments to power element-wise.
+
+An error is returned when integer to negative integer power is encountered,
+or integer overflow is encountered.
+
+Parameters
+----------
+base : Array-like or scalar-like
+    Argument to compute function.
+exponent : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def sign(
+    x: NumericOrDurationArray | NumericOrDurationScalar | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> (
+    lib.NumericArray[lib.Int8Scalar]
+    | lib.NumericArray[lib.FloatScalar]
+    | lib.NumericArray[lib.DoubleScalar]
+    | lib.Int8Scalar | lib.FloatScalar | lib.DoubleScalar | Expression
+):
+    """
+    Get the signedness of the arguments element-wise.
+
+    Output is any of (-1,1) for nonzero inputs and 0 for zero input.
+    NaN values return NaN.  Integral values return signedness as Int8 and
+    floating-point values return it with the same type as the input values.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+def sqrt(x: NumericArray | NumericScalar | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatArray | FloatScalar | Expression:
+    """
+    Takes the square root of arguments element-wise.
+
+    A negative argument returns a NaN.  For a variant that returns an
+    error, use function "sqrt_checked".
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+
+    """
+
+sqrt_checked = _clone_signature(sqrt)
+"""
+Takes the square root of arguments element-wise.
+
+A negative argument returns an error.  For a variant that returns a
+NaN, use function "sqrt".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+subtract = _clone_signature(add)
+"""
+Subtract the arguments element-wise.
+
+Results will wrap around on integer overflow.
+Use function "subtract_checked" if you want overflow
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+subtract_checked = _clone_signature(add)
+"""
+Subtract the arguments element-wise.
+
+This function returns an error on overflow.  For a variant that
+doesn't fail on overflow, use function "subtract".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.1 Bit-wise functions =========================
+def bit_wise_and(
+    x: _NumericScalarT | _NumericArrayT | NumericScalar | Expression | ArrayOrChunkedArray[NumericScalar],
+    y: _NumericScalarT | _NumericArrayT | NumericScalar | Expression | ArrayOrChunkedArray[NumericScalar],
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT | _NumericArrayT | Expression:
+    """
+    Bit-wise AND the arguments element-wise.
+
+    Null values return null.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def bit_wise_not(
+    x: _NumericScalarT | _NumericArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _NumericScalarT | _NumericArrayT | Expression:
+    """
+    Bit-wise negate the arguments element-wise.
+
+    Null values return null.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+bit_wise_or = _clone_signature(bit_wise_and)
+"""
+Bit-wise OR the arguments element-wise.
+
+Null values return null.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+bit_wise_xor = _clone_signature(bit_wise_and)
+"""
+Bit-wise XOR the arguments element-wise.
+
+Null values return null.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_left = _clone_signature(bit_wise_and)
+"""
+Left shift `x` by `y`.
+
+The shift operates as if on the two's complement representation of the number.
+In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
+even if overflow occurs.
+`x` is returned if `y` (the amount to shift by) is (1) negative or
+(2) greater than or equal to the precision of `x`.
+Use function "shift_left_checked" if you want an invalid shift amount
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_left_checked = _clone_signature(bit_wise_and)
+"""
+Left shift `x` by `y`.
+
+The shift operates as if on the two's complement representation of the number.
+In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
+even if overflow occurs.
+An error is raised if `y` (the amount to shift by) is (1) negative or
+(2) greater than or equal to the precision of `x`.
+See "shift_left" for a variant that doesn't fail for an invalid shift amount.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_right = _clone_signature(bit_wise_and)
+"""
+Right shift `x` by `y`.
+
+This is equivalent to dividing `x` by 2 to the power `y`.
+`x` is returned if `y` (the amount to shift by) is: (1) negative or
+(2) greater than or equal to the precision of `x`.
+Use function "shift_right_checked" if you want an invalid shift amount
+to return an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+shift_right_checked = _clone_signature(bit_wise_and)
+"""
+Right shift `x` by `y`.
+
+This is equivalent to dividing `x` by 2 to the power `y`.
+An error is raised if `y` (the amount to shift by) is (1) negative or
+(2) greater than or equal to the precision of `x`.
+See "shift_right" for a variant that doesn't fail for an invalid shift amount
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.2 Rounding functions =========================
+def ceil(x: _FloatScalarT | _FloatArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatScalarT | _FloatArrayT | Expression:
+    """
+    Round up to the nearest integer.
+
+    Compute the smallest integer value not less in magnitude than `x`.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+floor = _clone_signature(ceil)
+"""
+Round down to the nearest integer.
+
+Compute the largest integer value not greater in magnitude than `x`.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def round(
+    x: _NumericScalarT | _NumericArrayT | Expression,
+    /,
+    ndigits: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | _NumericArrayT | Expression:
+    """
+    Round to a given precision.
+
+    Options are used to control the number of digits and rounding mode.
+    Default behavior is to round to the nearest integer and
+    use half-to-even rule to break ties.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    ndigits : int, default 0
+        Number of fractional digits to round to.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    options : pyarrow.compute.RoundOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def round_to_multiple(
+    x: _NumericScalarT | _NumericArrayT | Expression,
+    /,
+    multiple: int = 0,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundToMultipleOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | _NumericArrayT | Expression:
+    """
+    Round to a given multiple.
+
+    Options are used to control the rounding multiple and rounding mode.
+    Default behavior is to round to the nearest integer and
+    use half-to-even rule to break ties.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    multiple : numeric scalar, default 1.0
+        Multiple to round to. Should be a scalar of a type compatible
+        with the argument to be rounded.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    options : pyarrow.compute.RoundToMultipleOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def round_binary(
+    x: _NumericScalarT | _NumericArrayT | Expression,
+    s: int | lib.Int8Scalar | lib.Int16Scalar | lib.Int32Scalar | lib.Int64Scalar | Iterable,
+    /,
+    round_mode: Literal[
+        "down",
+        "up",
+        "towards_zero",
+        "towards_infinity",
+        "half_down",
+        "half_up",
+        "half_towards_zero",
+        "half_towards_infinity",
+        "half_to_even",
+        "half_to_odd",
+    ] = "half_to_even",
+    *,
+    options: RoundBinaryOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericScalarT | lib.NumericArray[_NumericScalarT] | _NumericArrayT | Expression:
+    """
+    Round to the given precision.
+
+    Options are used to control the rounding mode.
+    Default behavior is to use the half-to-even rule to break ties.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    s : Array-like or scalar-like
+        Argument to compute function.
+    round_mode : str, default "half_to_even"
+        Rounding and tie-breaking mode.
+        Accepted values are "down", "up", "towards_zero", "towards_infinity",
+        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
+        "half_to_even", "half_to_odd".
+    options : pyarrow.compute.RoundBinaryOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+trunc = _clone_signature(ceil)
+"""
+Compute the integral part.
+
+Compute the nearest integer not greater in magnitude than `x`.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.3 Logarithmic functions =========================
+def ln(
+    x: FloatScalar | FloatArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression:
+    """
+    Compute natural logarithm.
+
+    Non-positive values return -inf or NaN. Null values return null.
+    Use function "ln_checked" if you want non-positive values to raise an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ln_checked = _clone_signature(ln)
+"""
+Compute natural logarithm.
+
+Non-positive values raise an error. Null values return null.
+Use function "ln" if you want non-positive values to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log10 = _clone_signature(ln)
+"""
+Compute base 10 logarithm.
+
+Non-positive values return -inf or NaN. Null values return null.
+Use function "log10_checked" if you want non-positive values
+to raise an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log10_checked = _clone_signature(ln)
+"""
+Compute base 10 logarithm.
+
+Non-positive values raise an error. Null values return null.
+Use function "log10" if you want non-positive values
+to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log1p = _clone_signature(ln)
+"""
+Compute natural log of (1+x).
+
+Values <= -1 return -inf or NaN. Null values return null.
+This function may be more precise than log(1 + x) for x close to zero.
+Use function "log1p_checked" if you want invalid values to raise an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log1p_checked = _clone_signature(ln)
+"""
+Compute natural log of (1+x).
+
+Values <= -1 return -inf or NaN. Null values return null.
+This function may be more precise than log(1 + x) for x close to zero.
+Use function "log1p" if you want invalid values to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log2 = _clone_signature(ln)
+"""
+Compute base 2 logarithm.
+
+Non-positive values return -inf or NaN. Null values return null.
+Use function "log2_checked" if you want non-positive values
+to raise an error.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+log2_checked = _clone_signature(ln)
+"""
+Compute base 2 logarithm.
+
+Non-positive values raise an error. Null values return null.
+Use function "log2" if you want non-positive values
+to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def logb(
+    x: FloatScalar | FloatArray | Expression | Any, b: FloatScalar | FloatArray | Expression | Any,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression | Any:
+    """
+    Compute base `b` logarithm.
+
+    Values <= 0 return -inf or NaN. Null values return null.
+    Use function "logb_checked" if you want non-positive values to raise an error.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    b : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+logb_checked = _clone_signature(logb)
+"""
+Compute base `b` logarithm.
+
+Values <= 0 return -inf or NaN. Null values return null.
+Use function "logb" if you want non-positive values to return -inf or NaN.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+b : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.4 Trigonometric functions =========================
+acos = _clone_signature(ln)
+"""
+Compute the inverse cosine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "acos_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+acos_checked = _clone_signature(ln)
+"""
+Compute the inverse cosine.
+
+Invalid input values raise an error;
+to return NaN instead, see "acos".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+asin = _clone_signature(ln)
+"""
+Compute the inverse sine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "asin_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+asin_checked = _clone_signature(ln)
+"""
+Compute the inverse sine.
+
+Invalid input values raise an error;
+to return NaN instead, see "asin".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+atan = _clone_signature(ln)
+"""
+Compute the inverse tangent of x.
+
+The return value is in the range [-pi/2, pi/2];
+for a full return range [-pi, pi], see "atan2".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cos = _clone_signature(ln)
+"""
+Compute the cosine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "cos_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cos_checked = _clone_signature(ln)
+"""
+Compute the cosine.
+
+Infinite values raise an error;
+to return NaN instead, see "cos".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+sin = _clone_signature(ln)
+"""
+Compute the sine.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "sin_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+sin_checked = _clone_signature(ln)
+"""
+Compute the sine.
+
+Invalid input values raise an error;
+to return NaN instead, see "sin".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+tan = _clone_signature(ln)
+"""
+Compute the tangent.
+
+NaN is returned for invalid input values;
+to raise an error instead, see "tan_checked".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+tan_checked = _clone_signature(ln)
+"""
+Compute the tangent.
+
+Infinite values raise an error;
+to return NaN instead, see "tan".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def atan2(
+    y: FloatScalar | FloatArray | Expression | Any, x: FloatScalar | FloatArray | Expression | Any,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression:
+    """
+    Compute the inverse tangent of y/x.
+
+    The return value is in the range [-pi, pi].
+
+    Parameters
+    ----------
+    y : Array-like or scalar-like
+        Argument to compute function.
+    x : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.5 Comparisons functions =========================
+def equal(
+    x: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    y: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression:
+    """
+    Compare values for equality (x == y).
+
+    A null on either side emits a null comparison result.
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+greater = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x > y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+greater_equal = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x >= y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+less = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x < y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+less_equal = _clone_signature(equal)
+"""
+Compare values for ordered inequality (x <= y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+not_equal = _clone_signature(equal)
+"""
+Compare values for inequality (x != y).
+
+A null on either side emits a null comparison result.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def max_element_wise(
+    *args: ScalarOrArray[_Scalar_CoT] | Expression,
+    skip_nulls: bool = True,
+    options: ElementWiseAggregateOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _Scalar_CoT | Expression:
+    """
+    Find the element-wise maximum value.
+
+    Nulls are ignored (by default) or propagated.
+    NaN is preferred over null, but not over any valid value.
+
+    Parameters
+    ----------
+    *args : Array-like or scalar-like
+        Argument to compute function.
+    skip_nulls : bool, default True
+        Whether to skip (ignore) nulls in the input.
+        If False, any null in the input forces the output to null.
+    options : pyarrow.compute.ElementWiseAggregateOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+min_element_wise = _clone_signature(max_element_wise)
+"""
+Find the element-wise minimum value.
+
+Nulls are ignored (by default) or propagated.
+NaN is preferred over null, but not over any valid value.
+
+Parameters
+----------
+*args : Array-like or scalar-like
+    Argument to compute function.
+skip_nulls : bool, default True
+    Whether to skip (ignore) nulls in the input.
+    If False, any null in the input forces the output to null.
+options : pyarrow.compute.ElementWiseAggregateOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.6 Logical functions =========================
+def and_(
+    x: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar],
+    y: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar],
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar]:
+    """
+    Logical 'and' boolean values.
+
+    When a null is encountered in either input, a null is output.
+    For a different null behavior, see function "and_kleene".
+
+    Parameters
+    ----------
+    x : Array-like or scalar-like
+        Argument to compute function.
+    y : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+and_kleene = _clone_signature(and_)
+"""
+Logical 'and' boolean values (Kleene logic).
+
+This function behaves as follows with nulls:
+
+- true and null = null
+- null and true = null
+- false and null = false
+- null and false = false
+- null and null = null
+
+In other words, in this context a null value really means "unknown",
+and an unknown value 'and' false is always false.
+For a different null behavior, see function "and".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+and_not = _clone_signature(and_)
+"""
+Logical 'and not' boolean values.
+
+When a null is encountered in either input, a null is output.
+For a different null behavior, see function "and_not_kleene".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+and_not_kleene = _clone_signature(and_)
+"""
+Logical 'and not' boolean values (Kleene logic).
+
+This function behaves as follows with nulls:
+
+- true and not null = null
+- null and not false = null
+- false and not null = false
+- null and not true = false
+- null and not null = null
+
+In other words, in this context a null value really means "unknown",
+and an unknown value 'and not' true is always false, as is false
+'and not' an unknown value.
+For a different null behavior, see function "and_not".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+or_ = _clone_signature(and_)
+"""
+Logical 'or' boolean values.
+
+When a null is encountered in either input, a null is output.
+For a different null behavior, see function "or_kleene".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+or_kleene = _clone_signature(and_)
+"""
+Logical 'or' boolean values (Kleene logic).
+
+This function behaves as follows with nulls:
+
+- true or null = true
+- null or true = true
+- false or null = null
+- null or false = null
+- null or null = null
+
+In other words, in this context a null value really means "unknown",
+and an unknown value 'or' true is always true.
+For a different null behavior, see function "or".
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+xor = _clone_signature(and_)
+"""
+Logical 'xor' boolean values.
+
+When a null is encountered in either input, a null is output.
+
+Parameters
+----------
+x : Array-like or scalar-like
+    Argument to compute function.
+y : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def invert(
+    x: lib.BooleanScalar | _BooleanArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | _BooleanArrayT | Expression:
+    """
+    Invert boolean values.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.10 String predicates =========================
+def ascii_is_alnum(
+    strings: StringScalar | StringArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression:
+    """
+    Classify strings as ASCII alphanumeric.
+
+    For each string in `strings`, emit true iff the string is non-empty
+    and consists only of alphanumeric ASCII characters.  Null strings emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_is_alpha = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII alphabetic.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of alphabetic ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_decimal = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII decimal.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of decimal ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_lower = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII lowercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of lowercase ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_printable = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII printable.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of printable ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_space = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII whitespace.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of whitespace ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_upper = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII uppercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of uppercase ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_alnum = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as alphanumeric.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of alphanumeric Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_alpha = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as alphabetic.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of alphabetic Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_decimal = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as decimal.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of decimal Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_digit = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as digits.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of Unicode digits.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_lower = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as lowercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of lowercase Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_numeric = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as numeric.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of numeric Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_printable = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as printable.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of printable Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_space = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as whitespace.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of whitespace Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_upper = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as uppercase.
+
+For each string in `strings`, emit true iff the string is non-empty
+and consists only of uppercase Unicode characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_is_title = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII titlecase.
+
+For each string in `strings`, emit true iff the string is title-cased,
+i.e. it has at least one cased character, each uppercase character
+follows an uncased character, and each lowercase character follows
+an uppercase character.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_is_title = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as titlecase.
+
+For each string in `strings`, emit true iff the string is title-cased,
+i.e. it has at least one cased character, each uppercase character
+follows an uncased character, and each lowercase character follows
+an uppercase character.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+string_is_ascii = _clone_signature(ascii_is_alnum)
+"""
+Classify strings as ASCII.
+
+For each string in `strings`, emit true iff the string consists only
+of ASCII characters.  Null strings emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.11 String transforms =========================
+def ascii_capitalize(
+    strings: _StringScalarT | _StringArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT | _StringArrayT | Expression:
+    """
+    Capitalize the first character of ASCII input.
+
+    For each string in `strings`, return a capitalized version.
+
+    This function assumes the input is fully ASCII.  If it may contain
+    non-ASCII characters, use "utf8_capitalize" instead.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_lower = _clone_signature(ascii_capitalize)
+"""
+Transform ASCII input to lowercase.
+
+For each string in `strings`, return a lowercase version.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_lower" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_reverse = _clone_signature(ascii_capitalize)
+"""
+Reverse ASCII input.
+
+For each ASCII string in `strings`, return a reversed version.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_reverse" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_swapcase = _clone_signature(ascii_capitalize)
+"""
+Transform ASCII input by inverting casing.
+
+For each string in `strings`, return a string with opposite casing.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_swapcase" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_title = _clone_signature(ascii_capitalize)
+"""
+Titlecase each word of ASCII input.
+
+For each string in `strings`, return a titlecased version.
+Each word in the output will start with an uppercase character and its
+remaining characters will be lowercase.
+
+This function assumes the input is fully ASCII.  If it may contain
+non-ASCII characters, use "utf8_title" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_upper = _clone_signature(ascii_capitalize)
+"""
+Transform ASCII input to uppercase.
+
+For each string in `strings`, return an uppercase version.
+
+This function assumes the input is fully ASCII.  It it may contain
+non-ASCII characters, use "utf8_upper" instead.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def binary_length(
+    strings: lib.BinaryScalar | lib.StringScalar | lib.LargeBinaryScalar | lib.LargeStringScalar
+    | lib.BinaryArray | lib.StringArray
+    | lib.ChunkedArray[lib.BinaryScalar] | lib.ChunkedArray[lib.StringScalar]
+    | lib.LargeBinaryArray | lib.LargeStringArray
+    | lib.ChunkedArray[lib.LargeBinaryScalar] | lib.ChunkedArray[lib.LargeStringScalar]
+    | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression:
+    """
+    Compute string lengths.
+
+    For each string in `strings`, emit its length of bytes.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def binary_repeat(
+    strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
+    num_repeats: int | list[int] | list[int | None],
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT | lib.Array[_StringOrBinaryScalarT] | _StringOrBinaryArrayT | Expression:
+    """
+    Repeat a binary string.
+
+    For each binary string in `strings`, return a replicated version.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    num_repeats : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def binary_replace_slice(
+    strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression:
+    """
+    Replace a slice of a binary string.
+
+    For each string in `strings`, replace a slice of the string defined by `start`
+    and `stop` indices with the given `replacement`. `start` is inclusive
+    and `stop` is exclusive, and both are measured in bytes.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int
+        Index to stop slicing at (exclusive).
+    replacement : str
+        What to replace the slice with.
+    options : pyarrow.compute.ReplaceSliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def binary_reverse(
+    strings: _BinaryScalarT | _BinaryArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _BinaryScalarT | _BinaryArrayT | Expression:
+    """
+    Reverse binary input.
+
+    For each binary string in `strings`, return a reversed version.
+
+    This function reverses the binary data at a byte-level.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def replace_substring(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    pattern: str | bytes,
+    replacement: str | bytes,
+    *,
+    max_replacements: int | None = None,
+    options: ReplaceSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression:
+    """
+    Replace matching non-overlapping substrings with replacement.
+
+    For each string in `strings`, replace non-overlapping substrings that match
+    the given literal `pattern` with the given `replacement`.
+    If `max_replacements` is given and not equal to -1, it limits the
+    maximum amount replacements per input, counted from the left.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Substring pattern to look for inside input values.
+    replacement : str
+        What to replace the pattern with.
+    max_replacements : int or None, default None
+        The maximum number of strings to replace in each
+        input value (unlimited if None).
+    options : pyarrow.compute.ReplaceSubstringOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+replace_substring_regex = _clone_signature(replace_substring)
+"""
+Replace matching non-overlapping substrings with replacement.
+
+For each string in `strings`, replace non-overlapping substrings that match
+the given regular expression `pattern` with the given `replacement`.
+If `max_replacements` is given and not equal to -1, it limits the
+maximum amount replacements per input, counted from the left.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+replacement : str
+    What to replace the pattern with.
+max_replacements : int or None, default None
+    The maximum number of strings to replace in each
+    input value (unlimited if None).
+options : pyarrow.compute.ReplaceSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def utf8_capitalize(
+    strings: _StringScalarT | _StringArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> _StringScalarT | _StringArrayT | Expression:
+    """
+    Capitalize the first character of input.
+
+    For each string in `strings`, return a capitalized version,
+    with the first character uppercased and the others lowercased.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def utf8_length(
+    strings: lib.StringScalar | lib.LargeStringScalar | lib.StringArray | lib.ChunkedArray[lib.StringScalar]
+    | lib.LargeStringArray | lib.ChunkedArray[lib.LargeStringScalar] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression:
+    """
+    Compute UTF8 string lengths.
+
+    For each string in `strings`, emit its length in UTF8 characters.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+utf8_lower = _clone_signature(utf8_capitalize)
+"""
+Transform input to lowercase.
+
+For each string in `strings`, return a lowercase version.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def utf8_replace_slice(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    start: int,
+    stop: int,
+    replacement: str | bytes,
+    *,
+    options: ReplaceSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression:
+    """
+    Replace a slice of a string.
+
+    For each string in `strings`, replace a slice of the string defined by `start`
+    and `stop` indices with the given `replacement`. `start` is inclusive
+    and `stop` is exclusive, and both are measured in UTF8 characters.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int
+        Index to stop slicing at (exclusive).
+    replacement : str
+        What to replace the slice with.
+    options : pyarrow.compute.ReplaceSliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+utf8_reverse = _clone_signature(utf8_capitalize)
+"""
+Reverse input.
+
+For each string in `strings`, return a reversed version.
+
+This function operates on Unicode codepoints, not grapheme
+clusters. Hence, it will not correctly reverse grapheme clusters
+composed of multiple codepoints.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_swapcase = _clone_signature(utf8_capitalize)
+"""
+Transform input lowercase characters to uppercase and uppercase characters to lowercase.
+
+For each string in `strings`, return an opposite case version.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_title = _clone_signature(utf8_capitalize)
+"""
+Titlecase each word of input.
+
+For each string in `strings`, return a titlecased version.
+Each word in the output will start with an uppercase character and its
+remaining characters will be lowercase.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_upper = _clone_signature(utf8_capitalize)
+"""
+Transform input to uppercase.
+
+For each string in `strings`, return an uppercase version.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory poo
+"""
+
+# ========================= 2.12 String padding =========================
+def ascii_center(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    width: int,
+    padding: str = " ",
+    lean_left_on_odd_padding: bool = True,
+    *,
+    options: PadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression:
+    """
+    Center strings by padding with a given character.
+
+    For each string in `strings`, emit a centered string by padding both sides
+    with the given ASCII character.
+    Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    width : int
+        Desired string length.
+    padding : str, default " "
+        What to pad the string with. Should be one byte or codepoint.
+    lean_left_on_odd_padding : bool, default True
+        What to do if there is an odd number of padding characters (in case
+        of centered padding). Defaults to aligning on the left (i.e. adding
+        the extra padding character on the right).
+    options : pyarrow.compute.PadOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_lpad = _clone_signature(ascii_center)
+"""
+Right-align strings by padding with a given character.
+
+For each string in `strings`, emit a right-aligned string by prepending
+the given ASCII character.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_rpad = _clone_signature(ascii_center)
+"""
+Left-align strings by padding with a given character.
+
+For each string in `strings`, emit a left-aligned string by appending
+the given ASCII character.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_center = _clone_signature(ascii_center)
+"""
+Center strings by padding with a given character.
+
+For each string in `strings`, emit a centered string by padding both sides
+with the given UTF8 codeunit.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_lpad = _clone_signature(ascii_center)
+"""
+Right-align strings by padding with a given character.
+
+For each string in `strings`, emit a right-aligned string by prepending
+the given UTF8 codeunit.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_rpad = _clone_signature(ascii_center)
+"""
+Left-align strings by padding with a given character.
+
+For each string in `strings`, emit a left-aligned string by appending
+the given UTF8 codeunit.
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+width : int
+    Desired string length.
+padding : str, default " "
+    What to pad the string with. Should be one byte or codepoint.
+lean_left_on_odd_padding : bool, default True
+    What to do if there is an odd number of padding characters (in case
+    of centered padding). Defaults to aligning on the left (i.e. adding
+    the extra padding character on the right).
+options : pyarrow.compute.PadOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.13 String trimming =========================
+def ascii_ltrim(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    characters: str,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression:
+    """
+    Trim leading characters.
+
+    For each string in `strings`, remove any leading characters
+    from the `characters` option (as given in TrimOptions).
+    Null values emit null.
+    Both the `strings` and the `characters` are interpreted as
+    ASCII; to trim non-ASCII characters, use `utf8_ltrim`.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    characters : str
+        Individual characters to be trimmed from the string.
+    options : pyarrow.compute.TrimOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_rtrim = _clone_signature(ascii_ltrim)
+"""
+Trim trailing characters.
+
+For each string in `strings`, remove any trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+Both the `strings` and the `characters` are interpreted as
+ASCII; to trim non-ASCII characters, use `utf8_rtrim`.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_trim = _clone_signature(ascii_ltrim)
+"""
+Trim leading and trailing characters.
+
+For each string in `strings`, remove any leading or trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+Both the `strings` and the `characters` are interpreted as
+ASCII; to trim non-ASCII characters, use `utf8_trim`.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_ltrim = _clone_signature(ascii_ltrim)
+"""
+Trim leading characters.
+
+For each string in `strings`, remove any leading characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_rtrim = _clone_signature(ascii_ltrim)
+"""
+Trim trailing characters.
+
+For each string in `strings`, remove any trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_trim = _clone_signature(ascii_ltrim)
+"""
+Trim leading and trailing characters.
+
+For each string in `strings`, remove any leading or trailing characters
+from the `characters` option (as given in TrimOptions).
+Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+characters : str
+    Individual characters to be trimmed from the string.
+options : pyarrow.compute.TrimOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def ascii_ltrim_whitespace(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    *,
+    options: TrimOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression:
+    """
+    Trim leading ASCII whitespace characters.
+
+    For each string in `strings`, emit a string with leading ASCII whitespace
+    characters removed.  Use `utf8_ltrim_whitespace` to trim leading Unicode
+    whitespace characters. Null values emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+ascii_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim trailing ASCII whitespace characters.
+
+For each string in `strings`, emit a string with trailing ASCII whitespace
+characters removed. Use `utf8_rtrim_whitespace` to trim trailing Unicode
+whitespace characters. Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+ascii_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim leading and trailing ASCII whitespace characters.
+
+For each string in `strings`, emit a string with leading and trailing ASCII
+whitespace characters removed. Use `utf8_trim_whitespace` to trim Unicode
+whitespace characters. Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_ltrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim leading whitespace characters.
+
+For each string in `strings`, emit a string with leading whitespace
+characters removed, where whitespace characters are defined by the Unicode
+standard.  Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim trailing whitespace characters.
+
+For each string in `strings`, emit a string with trailing whitespace
+characters removed, where whitespace characters are defined by the Unicode
+standard.  Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
+"""
+Trim leading and trailing whitespace characters.
+
+For each string in `strings`, emit a string with leading and trailing
+whitespace characters removed, where whitespace characters are defined
+by the Unicode standard.  Null values emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.14 String splitting =========================
+def ascii_split_whitespace(
+    strings: _StringScalarT | lib.Array[lib.Scalar[_DataTypeT]] | Expression,
+    /,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[_StringScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] | Expression:
+    """
+    Split string according to any ASCII whitespace.
+
+    Split each string according any non-zero length sequence of ASCII
+    whitespace characters.  The output for each string input is a list
+    of strings.
+
+    The maximum number of splits and direction of splitting
+    (forward, reverse) can optionally be defined in SplitOptions.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    options : pyarrow.compute.SplitOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def split_pattern(
+    strings: _StringOrBinaryScalarT | lib.Array[lib.Scalar[_DataTypeT]] | Expression,
+    /,
+    pattern: str,
+    *,
+    max_splits: int | None = None,
+    reverse: bool = False,
+    options: SplitOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[_StringOrBinaryScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] | Expression:
+    """
+    Split string according to separator.
+
+    Split each string according to the exact `pattern` defined in
+    SplitPatternOptions.  The output for each string input is a list
+    of strings.
+
+    The maximum number of splits and direction of splitting
+    (forward, reverse) can optionally be defined in SplitPatternOptions.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        String pattern to split on.
+    max_splits : int or None, default None
+        Maximum number of splits for each input value (unlimited if None).
+    reverse : bool, default False
+        Whether to start splitting from the end of each input value.
+        This only has an effect if `max_splits` is not None.
+    options : pyarrow.compute.SplitPatternOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+split_pattern_regex = _clone_signature(split_pattern)
+"""
+Split string according to regex pattern.
+
+Split each string according to the regex `pattern` defined in
+SplitPatternOptions.  The output for each string input is a list
+of strings.
+
+The maximum number of splits and direction of splitting
+(forward, reverse) can optionally be defined in SplitPatternOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    String pattern to split on.
+max_splits : int or None, default None
+    Maximum number of splits for each input value (unlimited if None).
+reverse : bool, default False
+    Whether to start splitting from the end of each input value.
+    This only has an effect if `max_splits` is not None.
+options : pyarrow.compute.SplitPatternOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+utf8_split_whitespace = _clone_signature(ascii_split_whitespace)
+"""
+Split string according to any Unicode whitespace.
+
+Split each string according any non-zero length sequence of Unicode
+whitespace characters.  The output for each string input is a list
+of strings.
+
+The maximum number of splits and direction of splitting
+(forward, reverse) can optionally be defined in SplitOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+max_splits : int or None, default None
+    Maximum number of splits for each input value (unlimited if None).
+reverse : bool, default False
+    Whether to start splitting from the end of each input value.
+    This only has an effect if `max_splits` is not None.
+options : pyarrow.compute.SplitOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.15 String component extraction =========================
+def extract_regex(
+    strings: StringOrBinaryScalar | StringOrBinaryArray | Expression,
+    /,
+    pattern: str,
+    *,
+    options: ExtractRegexOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar | lib.StructArray | Expression:
+    """
+    Extract substrings captured by a regex pattern.
+
+    For each string in `strings`, match the regular expression and, if
+    successful, emit a struct with field names and values coming from the
+    regular expression's named capture groups. If the input is null or the
+    regular expression fails matching, a null output value is emitted.
+
+    Regular expression matching is done using the Google RE2 library.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Regular expression with named capture fields.
+    options : pyarrow.compute.ExtractRegexOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.16 String join =========================
+def binary_join(
+    strings, separator, /, *, memory_pool: lib.MemoryPool | None = None
+) -> StringScalar | StringArray:
+    """
+    Join a list of strings together with a separator.
+
+    Concatenate the strings in `list`. The `separator` is inserted
+    between each given string.
+    Any null input and any null `list` element emits a null output.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    separator : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def binary_join_element_wise(
+    *strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
+    null_handling: Literal["emit_null", "skip", "replace"] = "emit_null",
+    null_replacement: str = "",
+    options: JoinOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression:
+    """
+    Join string arguments together, with the last argument as separator.
+
+    Concatenate the `strings` except for the last one. The last argument
+    in `strings` is inserted between each given string.
+    Any null separator element emits a null output. Null elements either
+    emit a null (the default), are skipped, or replaced with a given string.
+
+    Parameters
+    ----------
+    *strings : Array-like or scalar-like
+        Argument to compute function.
+    null_handling : str, default "emit_null"
+        How to handle null values in the inputs.
+        Accepted values are "emit_null", "skip", "replace".
+    null_replacement : str, default ""
+        Replacement string to emit for null inputs if `null_handling`
+        is "replace".
+    options : pyarrow.compute.JoinOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.17 String Slicing =========================
+def binary_slice(
+    strings: _BinaryScalarT | _BinaryArrayT | Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _BinaryScalarT | _BinaryArrayT | Expression:
+    """
+    Slice binary string.
+
+    For each binary string in `strings`, emit the substring defined by
+    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
+    inclusive and `stop` is exclusive. All three values are measured in
+    bytes.
+    If `step` is negative, the string will be advanced in reversed order.
+    An error is raised if `step` is zero.
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int or None, default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end.
+    step : int, default 1
+        Slice step.
+    options : pyarrow.compute.SliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def utf8_slice_codeunits(
+    strings: _StringScalarT | _StringArrayT | Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    *,
+    options: SliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _StringScalarT | _StringArrayT | Expression:
+    """
+    Slice string.
+
+    For each string in `strings`, emit the substring defined by
+    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
+    inclusive and `stop` is exclusive. All three values are measured in
+    UTF8 codeunits.
+    If `step` is negative, the string will be advanced in reversed order.
+    An error is raised if `step` is zero.
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing at (inclusive).
+    stop : int or None, default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end.
+    step : int, default 1
+        Slice step.
+    options : pyarrow.compute.SliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.18 Containment tests =========================
+def count_substring(
+    strings: lib.StringScalar | lib.BinaryScalar | lib.LargeStringScalar | lib.LargeBinaryScalar
+    | lib.StringArray | lib.BinaryArray
+    | lib.ChunkedArray[lib.StringScalar] | lib.ChunkedArray[lib.BinaryScalar]
+    | lib.LargeStringArray | lib.LargeBinaryArray
+    | lib.ChunkedArray[lib.LargeStringScalar] | lib.ChunkedArray[lib.LargeBinaryScalar]
+    | Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression:
+    """
+    Count occurrences of substring.
+
+    For each string in `strings`, emit the number of occurrences of the given
+    literal pattern.
+    Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Substring pattern to look for inside input values.
+    ignore_case : bool, default False
+        Whether to perform a case-insensitive match.
+    options : pyarrow.compute.MatchSubstringOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+count_substring_regex = _clone_signature(count_substring)
+"""
+Count occurrences of substring.
+
+For each string in `strings`, emit the number of occurrences of the given
+regular expression pattern.
+Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def ends_with(
+    strings: StringScalar | BinaryScalar | StringArray | BinaryArray | Expression,
+    /,
+    pattern: str,
+    *,
+    ignore_case: bool = False,
+    options: MatchSubstringOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression:
+    """
+    Check if strings end with a literal pattern.
+
+    For each string in `strings`, emit true iff it ends with a given pattern.
+    The pattern must be given in MatchSubstringOptions.
+    If ignore_case is set, only simple case folding is performed.
+
+    Null inputs emit null.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    pattern : str
+        Substring pattern to look for inside input values.
+    ignore_case : bool, default False
+        Whether to perform a case-insensitive match.
+    options : pyarrow.compute.MatchSubstringOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+find_substring = _clone_signature(count_substring)
+"""
+Find first occurrence of substring.
+
+For each string in `strings`, emit the index in bytes of the first occurrence
+of the given literal pattern, or -1 if not found.
+Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+find_substring_regex = _clone_signature(count_substring)
+"""
+Find location of first match of regex pattern.
+
+For each string in `strings`, emit the index in bytes of the first occurrence
+of the given literal pattern, or -1 if not found.
+Null inputs emit null. The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def index_in(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray | Expression,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Scalar | lib.Int32Array | Expression:
+    """
+    Return index of each element in a set of values.
+
+    For each element in `values`, return its index in a given set of
+    values, or null if it is not found there.
+    The set of values to look for must be given in SetLookupOptions.
+    By default, nulls are matched against the value set, this can be
+    changed in SetLookupOptions.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    value_set : Array
+        Set of values to look for in the input.
+    skip_nulls : bool, default False
+        If False, nulls in the input are matched in the value_set just
+        like regular values.
+        If True, nulls in the input always fail matching.
+    options : pyarrow.compute.SetLookupOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def is_in(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /,
+    value_set: lib.Array | lib.ChunkedArray | Expression,
+    *,
+    skip_nulls: bool = False,
+    options: SetLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray:
+    """
+    Find each element in a set of values.
+
+    For each element in `values`, return true if it is found in a given
+    set of values, false otherwise.
+    The set of values to look for must be given in SetLookupOptions.
+    By default, nulls are matched against the value set, this can be
+    changed in SetLookupOptions.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    value_set : Array
+        Set of values to look for in the input.
+    skip_nulls : bool, default False
+        If False, nulls in the input are matched in the value_set just
+        like regular values.
+        If True, nulls in the input always fail matching.
+    options : pyarrow.compute.SetLookupOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+match_like = _clone_signature(ends_with)
+"""
+Match strings against SQL-style LIKE pattern.
+
+For each string in `strings`, emit true iff it matches a given pattern
+at any position. '%' will match any number of characters, '_' will
+match exactly one character, and any other character matches itself.
+To match a literal '%', '_', or '\', precede the character with a backslash.
+Null inputs emit null.  The pattern must be given in MatchSubstringOptions.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+match_substring = _clone_signature(ends_with)
+"""
+Match strings against literal pattern.
+
+For each string in `strings`, emit true iff it contains a given pattern.
+Null inputs emit null.
+The pattern must be given in MatchSubstringOptions.
+If ignore_case is set, only simple case folding is performed.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+match_substring_regex = _clone_signature(ends_with)
+"""
+Match strings against regex pattern.
+
+For each string in `strings`, emit true iff it matches a given pattern
+at any position. The pattern must be given in MatchSubstringOptions.
+If ignore_case is set, only simple case folding is performed.
+
+Null inputs emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+starts_with = _clone_signature(ends_with)
+"""
+Check if strings start with a literal pattern.
+
+For each string in `strings`, emit true iff it starts with a given pattern.
+The pattern must be given in MatchSubstringOptions.
+If ignore_case is set, only simple case folding is performed.
+
+Null inputs emit null.
+
+Parameters
+----------
+strings : Array-like or scalar-like
+    Argument to compute function.
+pattern : str
+    Substring pattern to look for inside input values.
+ignore_case : bool, default False
+    Whether to perform a case-insensitive match.
+options : pyarrow.compute.MatchSubstringOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.19 Categorizations =========================
+def is_finite(
+    values: NumericScalar | lib.NullScalar | NumericArray | lib.NullArray | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression:
+    """
+    Return true if value is finite.
+
+    For each input value, emit true iff the value is finite
+    (i.e. neither NaN, inf, nor -inf).
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+is_inf = _clone_signature(is_finite)
+"""
+Return true if infinity.
+
+For each input value, emit true iff the value is infinite (inf or -inf).
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+is_nan = _clone_signature(is_finite)
+"""
+Return true if NaN.
+
+For each input value, emit true iff the value is NaN.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def is_null(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /,
+    *,
+    nan_is_null: bool = False,
+    options: NullOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression:
+    """
+    Return true if null (and optionally NaN).
+
+    For each input value, emit true iff the value is null.
+    True may also be emitted for NaN values by setting the `nan_is_null` flag.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    nan_is_null : bool, default False
+        Whether floating-point NaN values are considered null.
+    options : pyarrow.compute.NullOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def is_valid(
+    values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression:
+    """
+    Return true if non-null.
+
+    For each input value, emit true iff the value is valid (i.e. non-null).
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+true_unless_null = _clone_signature(is_valid)
+"""
+Return true if non-null, else return null.
+
+For each input value, emit true iff the value
+is valid (non-null), otherwise emit null.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.20 Selecting / multiplexing =========================
+def case_when(cond, /, *cases, memory_pool: lib.MemoryPool | None = None):
+    """
+    Choose values based on multiple conditions.
+
+    `cond` must be a struct of Boolean values. `cases` can be a mix
+    of scalar and array arguments (of any type, but all must be the
+    same type or castable to a common type), with either exactly one
+    datum per child of `cond`, or one more `cases` than children of
+    `cond` (in which case we have an "else" value).
+
+    Each row of the output will be the corresponding value of the
+    first datum in `cases` for which the corresponding child of `cond`
+    is true, or otherwise the "else" value (if given), or null.
+
+    Essentially, this implements a switch-case or if-else, if-else... statement.
+
+    Parameters
+    ----------
+    cond : Array-like or scalar-like
+        Argument to compute function.
+    *cases : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def choose(indices, /, *values, memory_pool: lib.MemoryPool | None = None):
+    """
+    Choose values from several arrays.
+
+    For each row, the value of the first argument is used as a 0-based index
+    into the list of `values` arrays (i.e. index 0 selects the first of the
+    `values` arrays). The output value is the corresponding value of the
+    selected argument.
+
+    If an index is null, the output will be null.
+
+    Parameters
+    ----------
+    indices : Array-like or scalar-like
+        Argument to compute function.
+    *values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def coalesce(
+    *values: _ScalarOrArrayT, memory_pool: lib.MemoryPool | None = None
+) -> _ScalarOrArrayT:
+    """
+    Select the first non-null value.
+
+    Each row of the output will be the value from the first corresponding input
+    for which the value is not null. If all inputs are null in a row, the output
+    will be null.
+
+    Parameters
+    ----------
+    *values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+fill_null = coalesce
+"""Replace each null element in values with a corresponding
+element from fill_value.
+
+If fill_value is scalar-like, then every null element in values
+will be replaced with fill_value. If fill_value is array-like,
+then the i-th element in values will be replaced with the i-th
+element in fill_value.
+
+The fill_value's type must be the same as that of values, or it
+must be able to be implicitly casted to the array's type.
+
+This is an alias for :func:`coalesce`.
+
+Parameters
+----------
+values : Array, ChunkedArray, or Scalar-like object
+    Each null element is replaced with the corresponding value
+    from fill_value.
+fill_value : Array, ChunkedArray, or Scalar-like object
+    If not same type as values, will attempt to cast.
+
+Returns
+-------
+result : depends on inputs
+    Values with all null elements replaced
+
+Examples
+--------
+>>> import pyarrow as pa
+>>> arr = pa.array([1, 2, None, 3], type=pa.int8())
+>>> fill_value = pa.scalar(5, type=pa.int8())
+>>> arr.fill_null(fill_value)
+<pyarrow.lib.Int8Array object at ...>
+[
+    1,
+    2,
+    5,
+    3
+]
+>>> arr = pa.array([1, 2, None, 4, None])
+>>> arr.fill_null(pa.array([10, 20, 30, 40, 50]))
+<pyarrow.lib.Int64Array object at ...>
+[
+    1,
+    2,
+    30,
+    4,
+    50
+]
+"""
+
+def if_else(
+    cond: ArrayLike | ScalarLike,
+    left: ArrayLike | ScalarLike,
+    right: ArrayLike | ScalarLike,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> ArrayLike | ScalarLike:
+    """
+    Choose values based on a condition.
+
+    `cond` must be a Boolean scalar/ array.
+    `left` or `right` must be of the same type scalar/ array.
+    `null` values in `cond` will be promoted to the output.
+
+    Parameters
+    ----------
+    cond : Array-like or scalar-like
+        Argument to compute function.
+    left : Array-like or scalar-like
+        Argument to compute function.
+    right : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.21 Structural transforms =========================
+
+def list_value_length(
+    lists: _ListArray[Any] | _LargeListArray[Any] | ListArray[Any] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int32Array | lib.Int64Array | Expression:
+    """
+    Compute list lengths.
+
+    `lists` must have a list-like type.
+    For each non-null value in `lists`, its length is emitted.
+    Null values emit a null in the output.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def make_struct(
+    *args: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
+    field_names: list[str] | tuple[str, ...] = (),
+    field_nullability: bool | None = None,
+    field_metadata: list[lib.KeyValueMetadata] | None = None,
+    options: MakeStructOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StructScalar | lib.StructArray | Expression:
+    """
+    Wrap Arrays into a StructArray.
+
+    Names of the StructArray's fields are
+    specified through MakeStructOptions.
+
+    Parameters
+    ----------
+    *args : Array-like or scalar-like
+        Argument to compute function.
+    field_names : sequence of str
+        Names of the struct fields to create.
+    field_nullability : sequence of bool, optional
+        Nullability information for each struct field.
+        If omitted, all fields are nullable.
+    field_metadata : sequence of KeyValueMetadata, optional
+        Metadata for each struct field.
+    options : pyarrow.compute.MakeStructOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.22 Conversions =========================
+def ceil_temporal(
+    timestamps: _TemporalScalarT | _TemporalArrayT | Expression,
+    /,
+    multiple: int = 1,
+    unit: Literal[
+        "year",
+        "quarter",
+        "month",
+        "week",
+        "day",
+        "hour",
+        "minute",
+        "second",
+        "millisecond",
+        "microsecond",
+        "nanosecond",
+    ] = "day",
+    *,
+    week_starts_monday: bool = True,
+    ceil_is_strictly_greater: bool = False,
+    calendar_based_origin: bool = False,
+    options: RoundTemporalOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _TemporalScalarT | _TemporalArrayT | Expression:
+    """
+    Round temporal values up to nearest multiple of specified time unit.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    timestamps : Array-like or scalar-like
+        Argument to compute function.
+    multiple : int, default 1
+        Number of units to round to.
+    unit : str, default "day"
+        The unit in which `multiple` is expressed.
+        Accepted values are "year", "quarter", "month", "week", "day",
+        "hour", "minute", "second", "millisecond", "microsecond",
+        "nanosecond".
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    ceil_is_strictly_greater : bool, default False
+        If True, ceil returns a rounded value that is strictly greater than the
+        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+        if set to False.
+        This applies to the ceil_temporal function only.
+    calendar_based_origin : bool, default False
+        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+        rounding origin will be beginning of one less precise calendar unit.
+        E.g.: rounding to hours will use beginning of day as origin.
+
+        By default time is rounded to a multiple of units since
+        1970-01-01T00:00:00. By setting calendar_based_origin to true,
+        time will be rounded to number of units since the last greater
+        calendar unit.
+        For example: rounding to multiple of days since the beginning of the
+        month or to hours since the beginning of the day.
+        Exceptions: week and quarter are not used as greater units,
+        therefore days will be rounded to the beginning of the month not
+        week. Greater unit of week is a year.
+        Note that ceiling and rounding might change sorting order of an array
+        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+        order of an already ordered array.
+    options : pyarrow.compute.RoundTemporalOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+floor_temporal = _clone_signature(ceil_temporal)
+"""
+Round temporal values down to nearest multiple of specified time unit.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+timestamps : Array-like or scalar-like
+    Argument to compute function.
+multiple : int, default 1
+    Number of units to round to.
+unit : str, default "day"
+    The unit in which `multiple` is expressed.
+    Accepted values are "year", "quarter", "month", "week", "day",
+    "hour", "minute", "second", "millisecond", "microsecond",
+    "nanosecond".
+week_starts_monday : bool, default True
+    If True, weeks start on Monday; if False, on Sunday.
+ceil_is_strictly_greater : bool, default False
+    If True, ceil returns a rounded value that is strictly greater than the
+    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+    if set to False.
+    This applies to the ceil_temporal function only.
+calendar_based_origin : bool, default False
+    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+    rounding origin will be beginning of one less precise calendar unit.
+    E.g.: rounding to hours will use beginning of day as origin.
+
+    By default time is rounded to a multiple of units since
+    1970-01-01T00:00:00. By setting calendar_based_origin to true,
+    time will be rounded to number of units since the last greater
+    calendar unit.
+    For example: rounding to multiple of days since the beginning of the
+    month or to hours since the beginning of the day.
+    Exceptions: week and quarter are not used as greater units,
+    therefore days will be rounded to the beginning of the month not
+    week. Greater unit of week is a year.
+    Note that ceiling and rounding might change sorting order of an array
+    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+    order of an already ordered array.
+options : pyarrow.compute.RoundTemporalOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+round_temporal = _clone_signature(ceil_temporal)
+"""
+Round temporal values to the nearest multiple of specified time unit.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+timestamps : Array-like or scalar-like
+    Argument to compute function.
+multiple : int, default 1
+    Number of units to round to.
+unit : str, default "day"
+    The unit in which `multiple` is expressed.
+    Accepted values are "year", "quarter", "month", "week", "day",
+    "hour", "minute", "second", "millisecond", "microsecond",
+    "nanosecond".
+week_starts_monday : bool, default True
+    If True, weeks start on Monday; if False, on Sunday.
+ceil_is_strictly_greater : bool, default False
+    If True, ceil returns a rounded value that is strictly greater than the
+    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
+    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
+    if set to False.
+    This applies to the ceil_temporal function only.
+calendar_based_origin : bool, default False
+    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
+    rounding origin will be beginning of one less precise calendar unit.
+    E.g.: rounding to hours will use beginning of day as origin.
+
+    By default time is rounded to a multiple of units since
+    1970-01-01T00:00:00. By setting calendar_based_origin to true,
+    time will be rounded to number of units since the last greater
+    calendar unit.
+    For example: rounding to multiple of days since the beginning of the
+    month or to hours since the beginning of the day.
+    Exceptions: week and quarter are not used as greater units,
+    therefore days will be rounded to the beginning of the month not
+    week. Greater unit of week is a year.
+    Note that ceiling and rounding might change sorting order of an array
+    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
+    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
+    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
+    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
+    order of an already ordered array.
+options : pyarrow.compute.RoundTemporalOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def cast(
+    arr: lib.Scalar | lib.Array | lib.ChunkedArray,
+    target_type: _DataTypeT,
+    safe: bool | None = None,
+    options: CastOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Scalar[_DataTypeT] | lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]]:
+    """
+    Cast array values to another data type. Can also be invoked as an array
+    instance method.
+
+    Parameters
+    ----------
+    arr : Array-like
+    target_type : DataType or str
+        Type to cast to
+    safe : bool, default True
+        Check for overflows or other unsafe conversions
+    options : CastOptions, default None
+        Additional checks pass by CastOptions
+    memory_pool : MemoryPool, optional
+        memory pool to use for allocations during function execution.
+
+    Examples
+    --------
+    >>> from datetime import datetime
+    >>> import pyarrow as pa
+    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
+    >>> arr.type
+    TimestampType(timestamp[us])
+
+    You can use ``pyarrow.DataType`` objects to specify the target type:
+
+    >>> cast(arr, pa.timestamp("ms"))
+    <pyarrow.lib.TimestampArray object at ...>
+    [
+      2010-01-01 00:00:00.000,
+      2015-01-01 00:00:00.000
+    ]
+
+    >>> cast(arr, pa.timestamp("ms")).type
+    TimestampType(timestamp[ms])
+
+    Alternatively, it is also supported to use the string aliases for these
+    types:
+
+    >>> arr.cast("timestamp[ms]")
+    <pyarrow.lib.TimestampArray object at ...>
+    [
+      2010-01-01 00:00:00.000,
+      2015-01-01 00:00:00.000
+    ]
+    >>> arr.cast("timestamp[ms]").type
+    TimestampType(timestamp[ms])
+
+    Returns
+    -------
+    casted : Array
+        The cast result as a new Array
+    """
+
+def strftime(
+    timestamps: TemporalScalar | TemporalArray | Expression,
+    /,
+    format: str = "%Y-%m-%dT%H:%M:%S",
+    locale: str = "C",
+    *,
+    options: StrftimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.StringScalar | lib.StringArray | Expression:
+    """
+    Format temporal values according to a format string.
+
+    For each input value, emit a formatted string.
+    The time format string and locale can be set using StrftimeOptions.
+    The output precision of the "%S" (seconds) format code depends on
+    the input time precision: it is an integer for timestamps with
+    second precision, a real number with the required number of fractional
+    digits for higher precisions.
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database, or if the specified locale
+    does not exist on this system.
+
+    Parameters
+    ----------
+    timestamps : Array-like or scalar-like
+        Argument to compute function.
+    format : str, default "%Y-%m-%dT%H:%M:%S"
+        Pattern for formatting input values.
+    locale : str, default "C"
+        Locale to use for locale-specific format specifiers.
+    options : pyarrow.compute.StrftimeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def strptime(
+    strings: StringScalar | StringArray | Expression,
+    /,
+    format: str,
+    unit: Literal["s", "ms", "us", "ns"],
+    error_is_null: bool = False,
+    *,
+    options: StrptimeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampScalar | lib.TimestampArray | Expression:
+    """
+    Parse timestamps.
+
+    For each string in `strings`, parse it as a timestamp.
+    The timestamp unit and the expected string pattern must be given
+    in StrptimeOptions. Null inputs emit null. If a non-null string
+    fails parsing, an error is returned by default.
+
+    Parameters
+    ----------
+    strings : Array-like or scalar-like
+        Argument to compute function.
+    format : str
+        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
+        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
+        There are differences in behavior, for example how the "%y" placeholder
+        handles years with less than four digits.
+    unit : str
+        Timestamp unit of the output.
+        Accepted values are "s", "ms", "us", "ns".
+    error_is_null : boolean, default False
+        Return null on parsing errors if true or raise if false.
+    options : pyarrow.compute.StrptimeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.23 Temporal component extraction =========================
+def day(
+    values: TemporalScalar | TemporalArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array | Expression:
+    """
+    Extract day number.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def day_of_week(
+    values: TemporalScalar | TemporalArray | Expression,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array | Expression:
+    """
+    Extract day of the week number.
+
+    By default, the week starts on Monday represented by 0 and ends on Sunday
+    represented by 6.
+    `DayOfWeekOptions.week_start` can be used to set another starting day using
+    the ISO numbering convention (1=start week on Monday, 7=start week on Sunday).
+    Day numbers can start at 0 or 1 based on `DayOfWeekOptions.count_from_zero`.
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    count_from_zero : bool, default True
+        If True, number days from 0, otherwise from 1.
+    week_start : int, default 1
+        Which day does the week start with (Monday=1, Sunday=7).
+        How this value is numbered is unaffected by `count_from_zero`.
+    options : pyarrow.compute.DayOfWeekOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+day_of_year = _clone_signature(day)
+"""
+Extract day of year number.
+
+January 1st maps to day number 1, February 1st to 32, etc.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def hour(
+    values: lib.TimestampScalar[Any] | lib.Time32Scalar[Any] | lib.Time64Scalar[Any]
+    | lib.TimestampArray[Any] | lib.Time32Array[Any] | lib.Time64Array[Any]
+    | lib.ChunkedArray[lib.TimestampScalar[Any]]
+    | lib.ChunkedArray[lib.Time32Scalar[Any]]
+    | lib.ChunkedArray[lib.Time64Scalar[Any]] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array | Expression:
+    """
+    Extract hour value.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def is_dst(
+    values: lib.TimestampScalar | lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.BooleanScalar | lib.BooleanArray | Expression:
+    """
+    Extracts if currently observing daylight savings.
+
+    IsDaylightSavings returns true if a timestamp has a daylight saving
+    offset in the given timezone.
+    Null values emit null.
+    An error is returned if the values do not have a defined timezone.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def iso_week(
+    values: lib.TimestampScalar | lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar[Any]] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array | Expression:
+    """
+    Extract ISO week of year number.
+
+    First ISO week has the majority (4 or more) of its days in January.
+    ISO week starts on Monday. The week number starts with 1 and can run
+    up to 53.
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+iso_year = _clone_signature(iso_week)
+"""
+Extract ISO year number.
+
+First week of an ISO year has the majority (4 or more) of its days in January.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def is_leap_year(
+    values: lib.TimestampScalar[Any] | lib.Date32Scalar | lib.Date64Scalar | lib.TimestampArray
+    | lib.Date32Array
+    | lib.Date64Array
+    | lib.ChunkedArray[lib.TimestampScalar]
+    | lib.ChunkedArray[lib.Date32Scalar]
+    | lib.ChunkedArray[lib.Date64Scalar] | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.BooleanScalar | lib.BooleanArray | Expression:
+    """
+    Extract if year is a leap year.
+
+    Null values emit null.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+microsecond = _clone_signature(iso_week)
+"""
+Extract microsecond values.
+
+Microsecond returns number of microseconds since the last full millisecond.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+millisecond = _clone_signature(iso_week)
+"""
+Extract millisecond values.
+
+Millisecond returns number of milliseconds since the last full second.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+minute = _clone_signature(iso_week)
+"""
+Extract minute values.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+month = _clone_signature(day_of_week)
+"""
+Extract month number.
+
+Month is encoded as January=1, December=12.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+nanosecond = _clone_signature(hour)
+"""
+Extract nanosecond values.
+
+Nanosecond returns number of nanoseconds since the last full microsecond.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+quarter = _clone_signature(day_of_week)
+"""
+Extract quarter of year number.
+
+First quarter maps to 1 and forth quarter maps to 4.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+second = _clone_signature(hour)
+"""
+Extract second values.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+subsecond = _clone_signature(hour)
+"""
+Extract subsecond values.
+
+Subsecond returns the fraction of a second since the last full second.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+us_week = _clone_signature(iso_week)
+"""
+Extract US week of year number.
+
+First US week has the majority (4 or more) of its days in January.
+US week starts on Monday. The week number starts with 1 and can run
+up to 53.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+us_year = _clone_signature(iso_week)
+"""
+Extract US epidemiological year number.
+
+First week of US epidemiological year has the majority (4 or more) of
+it's days in January. Last week of US epidemiological year has the
+year's last Wednesday in it. US epidemiological week starts on Sunday.
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+year = _clone_signature(iso_week)
+"""
+Extract year number.
+
+Null values emit null.
+An error is returned if the values have a defined timezone but it
+cannot be found in the timezone database.
+
+Parameters
+----------
+values : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def week(
+    values: lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /,
+    *,
+    week_starts_monday: bool = True,
+    count_from_zero: bool = False,
+    first_week_is_fully_in_year: bool = False,
+    options: WeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array | Expression:
+    """
+    Extract week of year number.
+
+    First week has the majority (4 or more) of its days in January.
+    Year can have 52 or 53 weeks. Week numbering can start with 0 or 1 using
+    DayOfWeekOptions.count_from_zero.
+    An error is returned if the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    week_starts_monday : bool, default True
+        If True, weeks start on Monday; if False, on Sunday.
+    count_from_zero : bool, default False
+        If True, dates at the start of a year that fall into the last week
+        of the previous year emit 0.
+        If False, they emit 52 or 53 (the week number of the last week
+        of the previous year).
+    first_week_is_fully_in_year : bool, default False
+        If True, week number 0 is fully in January.
+        If False, a week that begins on December 29, 30 or 31 is considered
+        to be week number 0 of the following year.
+    options : pyarrow.compute.WeekOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def year_month_day(
+    values: TemporalScalar | TemporalArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructScalar | lib.StructArray | Expression:
+    """
+    Extract (year, month, day) struct.
+
+    Null values emit null.
+    An error is returned in the values have a defined timezone but it
+    cannot be found in the timezone database.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.24 Temporal difference =========================
+def day_time_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Compute the number of days and milliseconds between two timestamps.
+
+    Returns the number of days and milliseconds from `start` to `end`.
+    That is, first the difference in days is computed as if both
+    timestamps were truncated to the day, then the difference between time times
+    of the two timestamps is computed as if both times were truncated to the
+    millisecond.
+    Null values return null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def days_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Scalar | lib.Int64Array:
+    """
+    Compute the number of days between two timestamps.
+
+    Returns the number of day boundaries crossed from `start` to `end`.
+    That is, the difference is calculated as if the timestamps were
+    truncated to the day.
+    Null values emit null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+hours_between = _clone_signature(days_between)
+"""
+Compute the number of hours between two timestamps.
+
+Returns the number of hour boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the hour.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+microseconds_between = _clone_signature(days_between)
+"""
+Compute the number of microseconds between two timestamps.
+
+Returns the number of microsecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the microsecond.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+milliseconds_between = _clone_signature(days_between)
+"""
+Compute the number of millisecond boundaries between two timestamps.
+
+Returns the number of millisecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the millisecond.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+minutes_between = _clone_signature(days_between)
+"""
+Compute the number of millisecond boundaries between two timestamps.
+
+Returns the number of millisecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the millisecond.
+Null values emit null.
+In [152]: print(pc.minutes_between.__doc__)
+Compute the number of minute boundaries between two timestamps.
+
+Returns the number of minute boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the minute.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def month_day_nano_interval_between(
+    start, end, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.MonthDayNanoIntervalScalar | lib.MonthDayNanoIntervalArray:
+    """
+    Compute the number of months, days and nanoseconds between two timestamps.
+
+    Returns the number of months, days, and nanoseconds from `start` to `end`.
+    That is, first the difference in months is computed as if both timestamps
+    were truncated to the months, then the difference between the days
+    is computed, and finally the difference between the times of the two
+    timestamps is computed as if both times were truncated to the nanosecond.
+    Null values return null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def month_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Compute the number of months between two timestamps.
+
+    Returns the number of month boundaries crossed from `start` to `end`.
+    That is, the difference is calculated as if the timestamps were
+    truncated to the month.
+    Null values emit null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+nanoseconds_between = _clone_signature(days_between)
+"""
+Compute the number of nanoseconds between two timestamps.
+
+Returns the number of nanosecond boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the nanosecond.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+quarters_between = _clone_signature(days_between)
+"""
+Compute the number of quarters between two timestamps.
+
+Returns the number of quarter start boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the quarter.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+seconds_between = _clone_signature(days_between)
+"""
+Compute the number of seconds between two timestamps.
+
+Returns the number of second boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the second.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+def weeks_between(
+    start,
+    end,
+    /,
+    *,
+    count_from_zero: bool = True,
+    week_start: int = 1,
+    options: DayOfWeekOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Int64Scalar | lib.Int64Array:
+    """
+    Compute the number of weeks between two timestamps.
+
+    Returns the number of week boundaries crossed from `start` to `end`.
+    That is, the difference is calculated as if the timestamps were
+    truncated to the week.
+    Null values emit null.
+
+    Parameters
+    ----------
+    start : Array-like or scalar-like
+        Argument to compute function.
+    end : Array-like or scalar-like
+        Argument to compute function.
+    count_from_zero : bool, default True
+        If True, number days from 0, otherwise from 1.
+    week_start : int, default 1
+        Which day does the week start with (Monday=1, Sunday=7).
+        How this value is numbered is unaffected by `count_from_zero`.
+    options : pyarrow.compute.DayOfWeekOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+years_between = _clone_signature(days_between)
+"""
+Compute the number of years between two timestamps.
+
+Returns the number of year boundaries crossed from `start` to `end`.
+That is, the difference is calculated as if the timestamps were
+truncated to the year.
+Null values emit null.
+
+Parameters
+----------
+start : Array-like or scalar-like
+    Argument to compute function.
+end : Array-like or scalar-like
+    Argument to compute function.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+
+# ========================= 2.25 Timezone handling =========================
+def assume_timezone(
+    timestamps: lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /,
+    timezone: str,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: AssumeTimezoneOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression:
+    """
+    Convert naive timestamp to timezone-aware timestamp.
+
+    Input timestamps are assumed to be relative to the timezone given in the
+    `timezone` option. They are converted to UTC-relative timestamps and
+    the output type has its timezone set to the value of the `timezone`
+    option. Null values emit null.
+    This function is meant to be used when an external system produces
+    "timezone-naive" timestamps which need to be converted to
+    "timezone-aware" timestamps. An error is returned if the timestamps
+    already have a defined timezone.
+
+    Parameters
+    ----------
+    timestamps : Array-like or scalar-like
+        Argument to compute function.
+    timezone : str
+        Timezone to assume for the input.
+    ambiguous : str, default "raise"
+        How to handle timestamps that are ambiguous in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    nonexistent : str, default "raise"
+        How to handle timestamps that don't exist in the assumed timezone.
+        Accepted values are "raise", "earliest", "latest".
+    options : pyarrow.compute.AssumeTimezoneOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def local_timestamp(
+    timestamps: lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression,
+    /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.TimestampScalar | lib.TimestampArray | Expression:
+    """
+    Convert timestamp to a timezone-naive local time timestamp.
+
+    LocalTimestamp converts timezone-aware timestamp to local timestamp
+    of the given timestamp's timezone and removes timezone metadata.
+    Alternative name for this timestamp is also wall clock time.
+    If input is in UTC or without timezone, then unchanged input values
+    without timezone metadata are returned.
+    Null values emit null.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 2.26 Random number generation =========================
+def random(
+    n: int,
+    *,
+    initializer: Literal["system"] | int = "system",
+    options: RandomOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.DoubleArray:
+    """
+    Generate numbers in the range [0, 1).
+
+    Generated values are uniformly-distributed, double-precision
+    in range [0, 1). Algorithm and seed can be changed via RandomOptions.
+
+    Parameters
+    ----------
+    n : int
+        Number of values to generate, must be greater than or equal to 0
+    initializer : int or str
+        How to initialize the underlying random generator.
+        If an integer is given, it is used as a seed.
+        If "system" is given, the random generator is initialized with
+        a system-specific source of (hopefully true) randomness.
+        Other values are invalid.
+    options : pyarrow.compute.RandomOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3. Array-wise (“vector”) functions =========================
+
+# ========================= 3.1 Cumulative Functions =========================
+def cumulative_sum(
+    values: _NumericArrayT | Expression,
+    /,
+    start: lib.Scalar | None = None,
+    *,
+    skip_nulls: bool = False,
+    options: CumulativeSumOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericArrayT | Expression:
+    """
+    Compute the cumulative sum over a numeric input.
+
+    `values` must be numeric. Return an array/chunked array which is the
+    cumulative sum computed over `values`. Results will wrap around on
+    integer overflow. Use function "cumulative_sum_checked" if you want
+    overflow to return an error. The default start is 0.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    start : Scalar, default None
+        Starting value for the cumulative operation. If none is given,
+        a default value depending on the operation and input type is used.
+    skip_nulls : bool, default False
+        When false, the first encountered null is propagated.
+    options : pyarrow.compute.CumulativeOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+cumulative_sum_checked = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative sum over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative sum computed over `values`. This function returns an error
+on overflow. For a variant that doesn't fail on overflow, use
+function "cumulative_sum". The default start is 0.
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_prod = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative product over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative product computed over `values`. Results will wrap around on
+integer overflow. Use function "cumulative_prod_checked" if you want
+overflow to return an error. The default start is 1.
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_prod_checked = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative product over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative product computed over `values`. This function returns an error
+on overflow. For a variant that doesn't fail on overflow, use
+function "cumulative_prod". The default start is 1.
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_max = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative max over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative max computed over `values`. The default start is the minimum
+value of input type (so that any other value will replace the
+start as the new maximum).
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_min = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative min over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative min computed over `values`. The default start is the maximum
+value of input type (so that any other value will replace the
+start as the new minimum).
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+cumulative_mean = _clone_signature(cumulative_sum)
+"""
+Compute the cumulative max over a numeric input.
+
+`values` must be numeric. Return an array/chunked array which is the
+cumulative max computed over `values`. The default start is the minimum
+value of input type (so that any other value will replace the
+start as the new maximum).
+
+Parameters
+----------
+values : Array-like
+    Argument to compute function.
+start : Scalar, default None
+    Starting value for the cumulative operation. If none is given,
+    a default value depending on the operation and input type is used.
+skip_nulls : bool, default False
+    When false, the first encountered null is propagated.
+options : pyarrow.compute.CumulativeOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
+# ========================= 3.2 Associative transforms =========================
+
+def dictionary_encode(
+    array: _ScalarOrArrayT | Expression,
+    /,
+    null_encoding: Literal["mask", "encode"] = "mask",
+    *,
+    options=None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ScalarOrArrayT | Expression: ...
+def unique(array: _ArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT | Expression: ...
+def value_counts(
+    array: lib.Array | lib.ChunkedArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.StructArray | Expression: ...
+
+# ========================= 3.3 Selections =========================
+@overload
+def array_filter(
+    array: _ArrayT,
+    selection_filter: list[bool] | list[bool | None] | BooleanArray,
+    /,
+    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    *,
+    options: FilterOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT: ...
+@overload
+def array_filter(
+    array: Expression,
+    selection_filter: list[bool] | list[bool | None] | BooleanArray,
+    /,
+    null_selection_behavior: Literal["drop", "emit_null"] = "drop",
+    *,
+    options: FilterOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def array_take(
+    array: _ArrayT,
+    indices: list[int]
+    | list[int | None]
+    | lib.Int16Array
+    | lib.Int32Array
+    | lib.Int64Array
+    | lib.ChunkedArray[lib.Int16Scalar]
+    | lib.ChunkedArray[lib.Int32Scalar]
+    | lib.ChunkedArray[lib.Int64Scalar],
+    /,
+    *,
+    boundscheck: bool = True,
+    options: TakeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _ArrayT: ...
+@overload
+def array_take(
+    array: Expression,
+    indices: list[int]
+    | list[int | None]
+    | lib.Int16Array
+    | lib.Int32Array
+    | lib.Int64Array
+    | lib.ChunkedArray[lib.Int16Scalar]
+    | lib.ChunkedArray[lib.Int32Scalar]
+    | lib.ChunkedArray[lib.Int64Scalar],
+    /,
+    *,
+    boundscheck: bool = True,
+    options: TakeOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> Expression: ...
+@overload
+def drop_null(input: _ArrayT, /, *, memory_pool: lib.MemoryPool | None = None) -> _ArrayT: ...
+@overload
+def drop_null(
+    input: Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> Expression: ...
+
+filter = array_filter
+take = array_take
+"""
+Select values (or records) from array- or table-like data given integer
+selection indices.
+
+The result will be of the same type(s) as the input, with elements taken
+from the input array (or record batch / table fields) at the given
+indices. If an index is null then the corresponding value in the output
+will be null.
+
+Parameters
+----------
+data : Array, ChunkedArray, RecordBatch, or Table
+indices : Array, ChunkedArray
+    Must be of integer type
+boundscheck : boolean, default True
+    Whether to boundscheck the indices. If False and there is an out of
+    bounds index, will likely cause the process to crash.
+memory_pool : MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+
+Returns
+-------
+result : depends on inputs
+    Selected values for the given indices
+
+Examples
+--------
+>>> import pyarrow as pa
+>>> arr = pa.array(["a", "b", "c", None, "e", "f"])
+>>> indices = pa.array([0, None, 4, 3])
+>>> arr.take(indices)
+<pyarrow.lib.StringArray object at ...>
+[
+    "a",
+    null,
+    "e",
+    null
+]
+"""
+
+# ========================= 3.4 Containment tests  =========================
+def indices_nonzero(
+    values: lib.BooleanArray
+    | lib.NullArray
+    | NumericArray
+    | lib.Decimal128Array
+    | lib.Decimal256Array | Expression,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression:
+    """
+    Return the indices of the values in the array that are non-zero.
+
+    For each input value, check if it's zero, false or null. Emit the index
+    of the value in the array if it's none of the those.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3.5 Sorts and partitions  =========================
+def array_sort_indices(
+    array: lib.Array | lib.ChunkedArray | Expression,
+    /,
+    order: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    options: ArraySortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression:
+    """
+    Return the indices that would sort an array.
+
+    This function computes an array of indices that define a stable sort
+    of the input array.  By default, Null values are considered greater
+    than any other value and are therefore sorted at the end of the array.
+    For floating-point types, NaNs are considered greater than any
+    other non-null value, but smaller than null values.
+
+    The handling of nulls and NaNs can be changed in ArraySortOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    order : str, default "ascending"
+        Which order to sort values in.
+        Accepted values are "ascending", "descending".
+    null_placement : str, default "at_end"
+        Where nulls in the input should be sorted.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.ArraySortOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def partition_nth_indices(
+    array: lib.Array | lib.ChunkedArray | Expression,
+    /,
+    pivot: int,
+    *,
+    null_placement: _Placement = "at_end",
+    options: PartitionNthOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression:
+    """
+    Return the indices that would partition an array around a pivot.
+
+    This functions computes an array of indices that define a non-stable
+    partial sort of the input array.
+
+    The output is such that the `N`'th index points to the `N`'th element
+    of the input in sorted order, and all indices before the `N`'th point
+    to elements in the input less or equal to elements at or after the `N`'th.
+
+    By default, null values are considered greater than any other value
+    and are therefore partitioned towards the end of the array.
+    For floating-point types, NaNs are considered greater than any
+    other non-null value, but smaller than null values.
+
+    The pivot index `N` must be given in PartitionNthOptions.
+    The handling of nulls and NaNs can also be changed in PartitionNthOptions.
+
+    Parameters
+    ----------
+    array : Array-like
+        Argument to compute function.
+    pivot : int
+        Index into the equivalent sorted array of the pivot element.
+    null_placement : str, default "at_end"
+        Where nulls in the input should be partitioned.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.PartitionNthOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def rank(
+    input: lib.Array | lib.ChunkedArray,
+    /,
+    sort_keys: _Order = "ascending",
+    *,
+    null_placement: _Placement = "at_end",
+    tiebreaker: Literal["min", "max", "first", "dense"] = "first",
+    options: RankOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array:
+    """
+    Compute ordinal ranks of an array (1-based).
+
+    This function computes a rank of the input array.
+    By default, null values are considered greater than any other value and
+    are therefore sorted at the end of the input. For floating-point types,
+    NaNs are considered greater than any other non-null value, but smaller
+    than null values. The default tiebreaker is to assign ranks in order of
+    when ties appear in the input.
+
+    The handling of nulls, NaNs and tiebreakers can be changed in RankOptions.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    tiebreaker : str, default "first"
+        Configure how ties between equal values are handled.
+        Accepted values are:
+
+        - "min": Ties get the smallest possible rank in sorted order.
+        - "max": Ties get the largest possible rank in sorted order.
+        - "first": Ranks are assigned in order of when ties appear in the
+                   input. This ensures the ranks are a stable permutation
+                   of the input.
+        - "dense": The ranks span a dense [1, M] interval where M is the
+                   number of distinct values in the input.
+    options : pyarrow.compute.RankOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def select_k_unstable(
+    input: lib.Array | lib.ChunkedArray | Expression,
+    /,
+    k: int,
+    sort_keys: list[tuple[str, _Order]],
+    *,
+    options: SelectKOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression:
+    """
+    Select the indices of the first `k` ordered elements from the input.
+
+    This function selects an array of indices of the first `k` ordered elements
+    from the `input` array, record batch or table specified in the column keys
+    (`options.sort_keys`). Output is not guaranteed to be stable.
+    Null values are considered greater than any other value and are
+    therefore ordered at the end. For floating-point types, NaNs are considered
+    greater than any other non-null value, but smaller than null values.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    k : int
+        Number of leading values to select in sorted order
+        (i.e. the largest values if sort order is "descending",
+        the smallest otherwise).
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    options : pyarrow.compute.SelectKOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def sort_indices(
+    input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table | Expression,
+    /,
+    sort_keys: Sequence[tuple[str, _Order]] = (),
+    *,
+    null_placement: _Placement = "at_end",
+    options: SortOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.UInt64Array | Expression:
+    """
+    Return the indices that would sort an array, record batch or table.
+
+    This function computes an array of indices that define a stable sort
+    of the input array, record batch or table.  By default, null values are
+    considered greater than any other value and are therefore sorted at the
+    end of the input. For floating-point types, NaNs are considered greater
+    than any other non-null value, but smaller than null values.
+
+    The handling of nulls and NaNs can be changed in SortOptions.
+
+    Parameters
+    ----------
+    input : Array-like or scalar-like
+        Argument to compute function.
+    sort_keys : sequence of (name, order) tuples
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted, only applying to
+        columns/fields mentioned in `sort_keys`.
+        Accepted values are "at_start", "at_end".
+    options : pyarrow.compute.SortOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3.6 Structural transforms =========================
+def list_element(
+    lists: lib.Array[ListScalar[_DataTypeT]] | lib.ChunkedArray[ListScalar[_DataTypeT]] | ListScalar[_DataTypeT] | Expression,
+    index: ScalarLike, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]] | _DataTypeT | Expression:
+    """
+    Compute elements using of nested list values using an index.
+
+    `lists` must have a list-like type.
+    For each value in each list of `lists`, the element at `index`
+    is emitted. Null values emit a null in the output.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    index : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def list_flatten(
+    lists: ArrayOrChunkedArray[ListScalar[Any]] | Expression,
+    /,
+    recursive: bool = False,
+    *,
+    options: ListFlattenOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any] | Expression:
+    """
+    Flatten list values.
+
+    `lists` must have a list-like type (lists, list-views, and
+    fixed-size lists).
+    Return an array with the top list level flattened unless
+    `recursive` is set to true in ListFlattenOptions. When that
+    is that case, flattening happens recursively until a non-list
+    array is formed.
+
+    Null list values do not emit anything to the output.
+
+    Parameters
+    ----------
+    lists : Array-like
+        Argument to compute function.
+    recursive : bool, default False
+        When True, the list array is flattened recursively until an array
+        of non-list values is formed.
+    options : pyarrow.compute.ListFlattenOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def list_parent_indices(
+    lists: ArrayOrChunkedArray[Any] | Expression, /, *, memory_pool: lib.MemoryPool | None = None
+) -> lib.Int64Array | Expression:
+    """
+    Compute parent indices of nested list values.
+
+    `lists` must have a list-like or list-view type.
+    For each value in each list of `lists`, the top-level list index
+    is emitted.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def list_slice(
+    lists: ArrayOrChunkedArray[Any] | Expression,
+    /,
+    start: int,
+    stop: int | None = None,
+    step: int = 1,
+    return_fixed_size_list: bool | None = None,
+    *,
+    options: ListSliceOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.ListArray[Any] | Expression:
+    """
+    Compute slice of list-like array.
+
+    `lists` must have a list-like type.
+    For each list element, compute a slice, returning a new list array.
+    A variable or fixed size list array is returned, depending on options.
+
+    Parameters
+    ----------
+    lists : Array-like or scalar-like
+        Argument to compute function.
+    start : int
+        Index to start slicing inner list elements (inclusive).
+    stop : Optional[int], default None
+        If given, index to stop slicing at (exclusive).
+        If not given, slicing will stop at the end. (NotImplemented)
+    step : int, default 1
+        Slice step.
+    return_fixed_size_list : Optional[bool], default None
+        Whether to return a FixedSizeListArray. If true _and_ stop is after
+        a list element's length, nulls will be appended to create the
+        requested slice size. The default of `None` will return the same
+        type which was passed in.
+    options : pyarrow.compute.ListSliceOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def map_lookup(
+    container,
+    /,
+    query_key,
+    occurrence: str,
+    *,
+    options: MapLookupOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+):
+    """
+    Find the items corresponding to a given key in a Map.
+
+    For a given query key (passed via MapLookupOptions), extract
+    either the FIRST, LAST or ALL items from a Map that have
+    matching keys.
+
+    Parameters
+    ----------
+    container : Array-like or scalar-like
+        Argument to compute function.
+    query_key : Scalar or Object can be converted to Scalar
+        The key to search for.
+    occurrence : str
+        The occurrence(s) to return from the Map
+        Accepted values are "first", "last", or "all".
+    options : pyarrow.compute.MapLookupOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def struct_field(
+    values,
+    /,
+    indices,
+    *,
+    options: StructFieldOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+):
+    """
+    Extract children of a struct or union by index.
+
+    Given a list of indices (passed via StructFieldOptions), extract
+    the child array or scalar with the given child index, recursively.
+
+    For union inputs, nulls are emitted for union values that reference
+    a different child than specified. Also, the indices are always
+    in physical order, not logical type codes - for example, the first
+    child is always index 0.
+
+    An empty list of indices returns the argument unchanged.
+
+    Parameters
+    ----------
+    values : Array-like or scalar-like
+        Argument to compute function.
+    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
+        List of indices for chained field lookup, for example `[4, 1]`
+        will look up the second nested field in the fifth outer field.
+    options : pyarrow.compute.StructFieldOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def fill_null_backward(values, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Carry non-null values backward to fill null slots.
+
+    Given an array, propagate next valid observation backward to previous valid
+    or nothing if all next values are null.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def fill_null_forward(values, /, *, memory_pool: lib.MemoryPool | None = None):
+    """
+    Carry non-null values forward to fill null slots.
+
+    Given an array, propagate last valid observation forward to next valid
+    or nothing if all previous values are null.
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+def replace_with_mask(
+    values,
+    mask: list[bool] | list[bool | None] | BooleanArray,
+    replacements,
+    /,
+    *,
+    memory_pool: lib.MemoryPool | None = None,
+):
+    """
+    Replace items selected with a mask.
+
+    Given an array and a boolean mask (either scalar or of equal length),
+    along with replacement values (either scalar or array),
+    each element of the array for which the corresponding mask element is
+    true will be replaced by the next value from the replacements,
+    or with null if the mask is null.
+    Hence, for replacement arrays, len(replacements) == sum(mask == true).
+
+    Parameters
+    ----------
+    values : Array-like
+        Argument to compute function.
+    mask : Array-like
+        Argument to compute function.
+    replacements : Array-like
+        Argument to compute function.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+# ========================= 3.7 Pairwise functions =========================
+def pairwise_diff(
+    input: _NumericOrTemporalArrayT | Expression,
+    /,
+    period: int = 1,
+    *,
+    options: PairwiseOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> _NumericOrTemporalArrayT | Expression:
+    """
+    Compute first order difference of an array.
+
+    Computes the first order difference of an array, It internally calls
+    the scalar function "subtract" to compute
+     differences, so its
+    behavior and supported types are the same as
+    "subtract". The period can be specified in :struct:`PairwiseOptions`.
+
+    Results will wrap around on integer overflow. Use function
+    "pairwise_diff_checked" if you want overflow to return an error.
+
+    Parameters
+    ----------
+    input : Array-like
+        Argument to compute function.
+    period : int, default 1
+        Period for applying the period function.
+    options : pyarrow.compute.PairwiseOptions, optional
+        Alternative way of passing options.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the default memory pool.
+    """
+
+pairwise_diff_checked = _clone_signature(pairwise_diff)
+"""
+Compute first order difference of an array.
+
+Computes the first order difference of an array, It internally calls
+the scalar function "subtract_checked" (or the checked variant) to compute
+differences, so its behavior and supported types are the same as
+"subtract_checked". The period can be specified in :struct:`PairwiseOptions`.
+
+This function returns an error on overflow. For a variant that doesn't
+fail on overflow, use function "pairwise_diff".
+
+Parameters
+----------
+input : Array-like
+    Argument to compute function.
+period : int, default 1
+    Period for applying the period function.
+options : pyarrow.compute.PairwiseOptions, optional
+    Alternative way of passing options.
+memory_pool : pyarrow.MemoryPool, optional
+    If not passed, will allocate memory from the default memory pool.
+"""
diff --git a/python/pyarrow-stubs/config.pyi b/python/pyarrow-stubs/config.pyi
new file mode 100644
index 00000000000..166e10c9734
--- /dev/null
+++ b/python/pyarrow-stubs/config.pyi
@@ -0,0 +1,41 @@
+from typing import NamedTuple
+
+class VersionInfo(NamedTuple):
+    major: int
+    minor: int
+    patch: int
+
+class BuildInfo(NamedTuple):
+    version: str
+    version_info: VersionInfo
+    so_version: str
+    full_so_version: str
+    compiler_id: str
+    compiler_version: str
+    compiler_flags: str
+    git_id: str
+    git_description: str
+    package_kind: str
+    build_type: str
+
+class RuntimeInfo(NamedTuple):
+    simd_level: str
+    detected_simd_level: str
+
+cpp_build_info: BuildInfo
+cpp_version: str
+cpp_version_info: VersionInfo
+
+def runtime_info() -> RuntimeInfo: ...
+def set_timezone_db_path(path: str) -> None: ...
+
+__all__ = [
+    "VersionInfo",
+    "BuildInfo",
+    "RuntimeInfo",
+    "cpp_build_info",
+    "cpp_version",
+    "cpp_version_info",
+    "runtime_info",
+    "set_timezone_db_path",
+]
diff --git a/python/pyarrow-stubs/dataset.pyi b/python/pyarrow-stubs/dataset.pyi
new file mode 100644
index 00000000000..a145437bb52
--- /dev/null
+++ b/python/pyarrow-stubs/dataset.pyi
@@ -0,0 +1,229 @@
+# from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
+#
+# from _typeshed import StrPath
+# from pyarrow._dataset import (
+#     CsvFileFormat,
+#     CsvFragmentScanOptions,
+#     Dataset,
+#     DatasetFactory,
+#     DirectoryPartitioning,
+#     FeatherFileFormat,
+#     FileFormat,
+#     FileFragment,
+#     FilenamePartitioning,
+#     FileSystemDataset,
+#     FileSystemDatasetFactory,
+#     FileSystemFactoryOptions,
+#     FileWriteOptions,
+#     Fragment,
+#     FragmentScanOptions,
+#     HivePartitioning,
+#     InMemoryDataset,
+#     IpcFileFormat,
+#     IpcFileWriteOptions,
+#     JsonFileFormat,
+#     JsonFragmentScanOptions,
+#     Partitioning,
+#     PartitioningFactory,
+#     Scanner,
+#     TaggedRecordBatch,
+#     UnionDataset,
+#     UnionDatasetFactory,
+#     WrittenFile,
+#     get_partition_keys,
+# )
+# from pyarrow._dataset_orc import OrcFileFormat
+# from pyarrow._dataset_parquet import (
+#     ParquetDatasetFactory,
+#     ParquetFactoryOptions,
+#     ParquetFileFormat,
+#     ParquetFileFragment,
+#     ParquetFileWriteOptions,
+#     ParquetFragmentScanOptions,
+#     ParquetReadOptions,
+#     RowGroupInfo,
+# )
+# from pyarrow._dataset_parquet_encryption import (
+#     ParquetDecryptionConfig,
+#     ParquetEncryptionConfig,
+# )
+# from pyarrow.compute import Expression, field, scalar
+# from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
+#
+# from ._fs import SupportedFileSystem
+#
+# _orc_available: bool
+# _parquet_available: bool
+#
+# __all__ = [
+#     "CsvFileFormat",
+#     "CsvFragmentScanOptions",
+#     "Dataset",
+#     "DatasetFactory",
+#     "DirectoryPartitioning",
+#     "FeatherFileFormat",
+#     "FileFormat",
+#     "FileFragment",
+#     "FilenamePartitioning",
+#     "FileSystemDataset",
+#     "FileSystemDatasetFactory",
+#     "FileSystemFactoryOptions",
+#     "FileWriteOptions",
+#     "Fragment",
+#     "FragmentScanOptions",
+#     "HivePartitioning",
+#     "InMemoryDataset",
+#     "IpcFileFormat",
+#     "IpcFileWriteOptions",
+#     "JsonFileFormat",
+#     "JsonFragmentScanOptions",
+#     "Partitioning",
+#     "PartitioningFactory",
+#     "Scanner",
+#     "TaggedRecordBatch",
+#     "UnionDataset",
+#     "UnionDatasetFactory",
+#     "WrittenFile",
+#     "get_partition_keys",
+#     # Orc
+#     "OrcFileFormat",
+#     # Parquet
+#     "ParquetDatasetFactory",
+#     "ParquetFactoryOptions",
+#     "ParquetFileFormat",
+#     "ParquetFileFragment",
+#     "ParquetFileWriteOptions",
+#     "ParquetFragmentScanOptions",
+#     "ParquetReadOptions",
+#     "RowGroupInfo",
+#     # Parquet Encryption
+#     "ParquetDecryptionConfig",
+#     "ParquetEncryptionConfig",
+#     # Compute
+#     "Expression",
+#     "field",
+#     "scalar",
+#     # Dataset
+#     "partitioning",
+#     "parquet_dataset",
+#     "write_dataset",
+# ]
+#
+# _DatasetFormat: TypeAlias = Literal["parquet", "ipc", "arrow", "feather", "csv"]
+#
+# @overload
+# def partitioning(
+#     schema: Schema,
+# ) -> Partitioning: ...
+# @overload
+# def partitioning(
+#     schema: Schema,
+#     *,
+#     flavor: Literal["filename"],
+#     dictionaries: dict[str, Array] | None = None,
+# ) -> Partitioning: ...
+# @overload
+# def partitioning(
+#     schema: Schema,
+#     *,
+#     flavor: Literal["filename"],
+#     dictionaries: Literal["infer"],
+# ) -> PartitioningFactory: ...
+# @overload
+# def partitioning(
+#     field_names: list[str],
+#     *,
+#     flavor: Literal["filename"],
+# ) -> PartitioningFactory: ...
+# @overload
+# def partitioning(
+#     schema: Schema,
+#     *,
+#     flavor: Literal["hive"],
+#     dictionaries: Literal["infer"],
+# ) -> PartitioningFactory: ...
+# @overload
+# def partitioning(
+#     *,
+#     flavor: Literal["hive"],
+# ) -> PartitioningFactory: ...
+# @overload
+# def partitioning(
+#     schema: Schema,
+#     *,
+#     flavor: Literal["hive"],
+#     dictionaries: dict[str, Array] | None = None,
+# ) -> Partitioning: ...
+# def parquet_dataset(
+#     metadata_path: StrPath,
+#     schema: Schema | None = None,
+#     filesystem: SupportedFileSystem | None = None,
+#     format: ParquetFileFormat | None = None,
+#     partitioning: Partitioning | PartitioningFactory | None = None,
+#     partition_base_dir: str | None = None,
+# ) -> FileSystemDataset: ...
+# @overload
+# def dataset(
+#     source: StrPath | Sequence[StrPath],
+#     schema: Schema | None = None,
+#     format: FileFormat | _DatasetFormat | None = None,
+#     filesystem: SupportedFileSystem | str | None = None,
+#     partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+#     partition_base_dir: str | None = None,
+#     exclude_invalid_files: bool | None = None,
+#     ignore_prefixes: list[str] | None = None,
+# ) -> FileSystemDataset: ...
+# @overload
+# def dataset(
+#     source: list[Dataset],
+#     schema: Schema | None = None,
+#     format: FileFormat | _DatasetFormat | None = None,
+#     filesystem: SupportedFileSystem | str | None = None,
+#     partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+#     partition_base_dir: str | None = None,
+#     exclude_invalid_files: bool | None = None,
+#     ignore_prefixes: list[str] | None = None,
+# ) -> UnionDataset: ...
+# @overload
+# def dataset(
+#     source: Iterable[RecordBatch] | Iterable[Table] | RecordBatchReader,
+#     schema: Schema | None = None,
+#     format: FileFormat | _DatasetFormat | None = None,
+#     filesystem: SupportedFileSystem | str | None = None,
+#     partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+#     partition_base_dir: str | None = None,
+#     exclude_invalid_files: bool | None = None,
+#     ignore_prefixes: list[str] | None = None,
+# ) -> InMemoryDataset: ...
+# @overload
+# def dataset(
+#     source: RecordBatch | Table,
+#     schema: Schema | None = None,
+#     format: FileFormat | _DatasetFormat | None = None,
+#     filesystem: SupportedFileSystem | str | None = None,
+#     partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+#     partition_base_dir: str | None = None,
+#     exclude_invalid_files: bool | None = None,
+#     ignore_prefixes: list[str] | None = None,
+# ) -> InMemoryDataset: ...
+# def write_dataset(
+#     data: Dataset | Table | RecordBatch | RecordBatchReader | list[Table] | Iterable[RecordBatch],
+#     base_dir: StrPath,
+#     *,
+#     basename_template: str | None = None,
+#     format: FileFormat | _DatasetFormat | None = None,
+#     partitioning: Partitioning | list[str] | None = None,
+#     partitioning_flavor: str | None = None,
+#     schema: Schema | None = None,
+#     filesystem: SupportedFileSystem | None = None,
+#     file_options: FileWriteOptions | None = None,
+#     use_threads: bool = True,
+#     max_partitions: int = 1024,
+#     max_open_files: int = 1024,
+#     max_rows_per_file: int = 0,
+#     min_rows_per_group: int = 0,
+#     max_rows_per_group: int = 1024 * 1024,
+#     file_visitor: Callable[[str], None] | None = None,
+#     existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"] = "error",
+#     create_dir: bool = True,
+# ): ...
diff --git a/python/pyarrow-stubs/device.pyi b/python/pyarrow-stubs/device.pyi
new file mode 100644
index 00000000000..d1b9f39eedd
--- /dev/null
+++ b/python/pyarrow-stubs/device.pyi
@@ -0,0 +1,88 @@
+import enum
+
+from pyarrow.lib import _Weakrefable
+
+class DeviceAllocationType(enum.Flag):
+    CPU = enum.auto()
+    CUDA = enum.auto()
+    CUDA_HOST = enum.auto()
+    OPENCL = enum.auto()
+    VULKAN = enum.auto()
+    METAL = enum.auto()
+    VPI = enum.auto()
+    ROCM = enum.auto()
+    ROCM_HOST = enum.auto()
+    EXT_DEV = enum.auto()
+    CUDA_MANAGED = enum.auto()
+    ONEAPI = enum.auto()
+    WEBGPU = enum.auto()
+    HEXAGON = enum.auto()
+
+class Device(_Weakrefable):
+    """
+    Abstract interface for hardware devices
+
+    This object represents a device with access to some memory spaces.
+    When handling a Buffer or raw memory address, it allows deciding in which
+    context the raw memory address should be interpreted
+    (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
+    """
+
+    @property
+    def type_name(self) -> str:
+        """
+        A shorthand for this device's type.
+        """
+    @property
+    def device_id(self) -> int:
+        """
+        A device ID to identify this device if there are multiple of this type.
+
+        If there is no "device_id" equivalent (such as for the main CPU device on
+        non-numa systems) returns -1.
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether this device is the main CPU device.
+
+        This shorthand method is very useful when deciding whether a memory address
+        is CPU-accessible.
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        Return the DeviceAllocationType of this device.
+        """
+
+class MemoryManager(_Weakrefable):
+    """
+    An object that provides memory management primitives.
+
+    A MemoryManager is always tied to a particular Device instance.
+    It can also have additional parameters (such as a MemoryPool to
+    allocate CPU memory).
+
+    """
+    @property
+    def device(self) -> Device:
+        """
+        The device this MemoryManager is tied to.
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether this MemoryManager is tied to the main CPU device.
+
+        This shorthand method is very useful when deciding whether a memory
+        address is CPU-accessible.
+        """
+
+def default_cpu_memory_manager() -> MemoryManager:
+    """
+    Return the default CPU MemoryManager instance.
+
+    The returned singleton instance uses the default MemoryPool.
+    """
+
+__all__ = ["DeviceAllocationType", "Device", "MemoryManager", "default_cpu_memory_manager"]
diff --git a/python/pyarrow-stubs/error.pyi b/python/pyarrow-stubs/error.pyi
new file mode 100644
index 00000000000..981ed51e680
--- /dev/null
+++ b/python/pyarrow-stubs/error.pyi
@@ -0,0 +1,53 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+class ArrowException(Exception): ...
+class ArrowInvalid(ValueError, ArrowException): ...
+class ArrowMemoryError(MemoryError, ArrowException): ...
+class ArrowKeyError(KeyError, ArrowException): ...
+class ArrowTypeError(TypeError, ArrowException): ...
+class ArrowNotImplementedError(NotImplementedError, ArrowException): ...
+class ArrowCapacityError(ArrowException): ...
+class ArrowIndexError(IndexError, ArrowException): ...
+class ArrowSerializationError(ArrowException): ...
+
+class ArrowCancelled(ArrowException):
+    signum: int | None
+    def __init__(self, message: str, signum: int | None = None) -> None: ...
+
+ArrowIOError = IOError
+
+class StopToken: ...
+
+def enable_signal_handlers(enable: bool) -> None: ...
+
+have_signal_refcycle: bool
+
+class SignalStopHandler:
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, exc_tb) -> None: ...
+    def __dealloc__(self) -> None: ...
+    @property
+    def stop_token(self) -> StopToken: ...
+
+__all__ = [
+    "ArrowException",
+    "ArrowInvalid",
+    "ArrowMemoryError",
+    "ArrowKeyError",
+    "ArrowTypeError",
+    "ArrowNotImplementedError",
+    "ArrowCapacityError",
+    "ArrowIndexError",
+    "ArrowSerializationError",
+    "ArrowCancelled",
+    "ArrowIOError",
+    "StopToken",
+    "enable_signal_handlers",
+    "have_signal_refcycle",
+    "SignalStopHandler",
+]
diff --git a/python/pyarrow-stubs/interchange/__init__.pyi b/python/pyarrow-stubs/interchange/__init__.pyi
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pyarrow-stubs/interchange/buffer.pyi b/python/pyarrow-stubs/interchange/buffer.pyi
new file mode 100644
index 00000000000..46673961a75
--- /dev/null
+++ b/python/pyarrow-stubs/interchange/buffer.pyi
@@ -0,0 +1,58 @@
+import enum
+
+from pyarrow.lib import Buffer
+
+class DlpackDeviceType(enum.IntEnum):
+    """Integer enum for device type codes matching DLPack."""
+
+    CPU = 1
+    CUDA = 2
+    CPU_PINNED = 3
+    OPENCL = 4
+    VULKAN = 7
+    METAL = 8
+    VPI = 9
+    ROCM = 10
+
+class _PyArrowBuffer:
+    """
+    Data in the buffer is guaranteed to be contiguous in memory.
+
+    Note that there is no dtype attribute present, a buffer can be thought of
+    as simply a block of memory. However, if the column that the buffer is
+    attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
+    implemented, then that dtype information will be contained in the return
+    value from ``__dlpack__``.
+
+    This distinction is useful to support both data exchange via DLPack on a
+    buffer and (b) dtypes like variable-length strings which do not have a
+    fixed number of bytes per element.
+    """
+    def __init__(self, x: Buffer, allow_copy: bool = True) -> None: ...
+    @property
+    def bufsize(self) -> int:
+        """
+        Buffer size in bytes.
+        """
+    @property
+    def ptr(self) -> int:
+        """
+        Pointer to start of the buffer as an integer.
+        """
+    def __dlpack__(self):
+        """
+        Produce DLPack capsule (see array API standard).
+
+        Raises:
+            - TypeError : if the buffer contains unsupported dtypes.
+            - NotImplementedError : if DLPack support is not implemented
+
+        Useful to have to connect to array libraries. Support optional because
+        it's not completely trivial to implement for a Python-only library.
+        """
+    def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]:
+        """
+        Device type and device ID for where the data in the buffer resides.
+        Uses device type codes matching DLPack.
+        Note: must be implemented even if ``__dlpack__`` is not.
+        """
diff --git a/python/pyarrow-stubs/interchange/column.pyi b/python/pyarrow-stubs/interchange/column.pyi
new file mode 100644
index 00000000000..e6662867b6b
--- /dev/null
+++ b/python/pyarrow-stubs/interchange/column.pyi
@@ -0,0 +1,252 @@
+import enum
+
+from typing import Any, Iterable, TypeAlias, TypedDict
+
+from pyarrow.lib import Array, ChunkedArray
+
+from .buffer import _PyArrowBuffer
+
+class DtypeKind(enum.IntEnum):
+    """
+    Integer enum for data types.
+
+    Attributes
+    ----------
+    INT : int
+        Matches to signed integer data type.
+    UINT : int
+        Matches to unsigned integer data type.
+    FLOAT : int
+        Matches to floating point data type.
+    BOOL : int
+        Matches to boolean data type.
+    STRING : int
+        Matches to string data type (UTF-8 encoded).
+    DATETIME : int
+        Matches to datetime data type.
+    CATEGORICAL : int
+        Matches to categorical data type.
+    """
+
+    INT = 0
+    UINT = 1
+    FLOAT = 2
+    BOOL = 20
+    STRING = 21  # UTF-8
+    DATETIME = 22
+    CATEGORICAL = 23
+
+Dtype: TypeAlias = tuple[DtypeKind, int, str, str]
+
+class ColumnNullType(enum.IntEnum):
+    """
+    Integer enum for null type representation.
+
+    Attributes
+    ----------
+    NON_NULLABLE : int
+        Non-nullable column.
+    USE_NAN : int
+        Use explicit float NaN value.
+    USE_SENTINEL : int
+        Sentinel value besides NaN.
+    USE_BITMASK : int
+        The bit is set/unset representing a null on a certain position.
+    USE_BYTEMASK : int
+        The byte is set/unset representing a null on a certain position.
+    """
+
+    NON_NULLABLE = 0
+    USE_NAN = 1
+    USE_SENTINEL = 2
+    USE_BITMASK = 3
+    USE_BYTEMASK = 4
+
+class ColumnBuffers(TypedDict):
+    data: tuple[_PyArrowBuffer, Dtype]
+    validity: tuple[_PyArrowBuffer, Dtype] | None
+    offsets: tuple[_PyArrowBuffer, Dtype] | None
+
+class CategoricalDescription(TypedDict):
+    is_ordered: bool
+    is_dictionary: bool
+    categories: _PyArrowColumn | None
+
+class Endianness(enum.Enum):
+    LITTLE = "<"
+    BIG = ">"
+    NATIVE = "="
+    NA = "|"
+
+class NoBufferPresent(Exception):
+    """Exception to signal that there is no requested buffer."""
+
+class _PyArrowColumn:
+    """
+    A column object, with only the methods and properties required by the
+    interchange protocol defined.
+
+    A column can contain one or more chunks. Each chunk can contain up to three
+    buffers - a data buffer, a mask buffer (depending on null representation),
+    and an offsets buffer (if variable-size binary; e.g., variable-length
+    strings).
+
+    TBD: Arrow has a separate "null" dtype, and has no separate mask concept.
+         Instead, it seems to use "children" for both columns with a bit mask,
+         and for nested dtypes. Unclear whether this is elegant or confusing.
+         This design requires checking the null representation explicitly.
+
+         The Arrow design requires checking:
+         1. the ARROW_FLAG_NULLABLE (for sentinel values)
+         2. if a column has two children, combined with one of those children
+            having a null dtype.
+
+         Making the mask concept explicit seems useful. One null dtype would
+         not be enough to cover both bit and byte masks, so that would mean
+         even more checking if we did it the Arrow way.
+
+    TBD: there's also the "chunk" concept here, which is implicit in Arrow as
+         multiple buffers per array (= column here). Semantically it may make
+         sense to have both: chunks were meant for example for lazy evaluation
+         of data which doesn't fit in memory, while multiple buffers per column
+         could also come from doing a selection operation on a single
+         contiguous buffer.
+
+         Given these concepts, one would expect chunks to be all of the same
+         size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),
+         while multiple buffers could have data-dependent lengths. Not an issue
+         in pandas if one column is backed by a single NumPy array, but in
+         Arrow it seems possible.
+         Are multiple chunks *and* multiple buffers per column necessary for
+         the purposes of this interchange protocol, or must producers either
+         reuse the chunk concept for this or copy the data?
+
+    Note: this Column object can only be produced by ``__dataframe__``, so
+          doesn't need its own version or ``__column__`` protocol.
+    """
+    def __init__(self, column: Array | ChunkedArray, allow_copy: bool = True) -> None: ...
+    def size(self) -> int:
+        """
+        Size of the column, in elements.
+
+        Corresponds to DataFrame.num_rows() if column is a single chunk;
+        equal to size of this current chunk otherwise.
+
+        Is a method rather than a property because it may cause a (potentially
+        expensive) computation for some dataframe implementations.
+        """
+    @property
+    def offset(self) -> int:
+        """
+        Offset of first element.
+
+        May be > 0 if using chunks; for example for a column with N chunks of
+        equal size M (only the last chunk may be shorter),
+        ``offset = n * M``, ``n = 0 .. N-1``.
+        """
+    @property
+    def dtype(self) -> tuple[DtypeKind, int, str, str]:
+        """
+        Dtype description as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+
+        Bit-width : the number of bits as an integer
+        Format string : data type description format string in Apache Arrow C
+                        Data Interface format.
+        Endianness : current only native endianness (``=``) is supported
+
+        Notes:
+            - Kind specifiers are aligned with DLPack where possible (hence the
+              jump to 20, leave enough room for future extension)
+            - Masks must be specified as boolean with either bit width 1 (for
+              bit masks) or 8 (for byte masks).
+            - Dtype width in bits was preferred over bytes
+            - Endianness isn't too useful, but included now in case in the
+              future we need to support non-native endianness
+            - Went with Apache Arrow format strings over NumPy format strings
+              because they're more complete from a dataframe perspective
+            - Format strings are mostly useful for datetime specification, and
+              for categoricals.
+            - For categoricals, the format string describes the type of the
+              categorical in the data buffer. In case of a separate encoding of
+              the categorical (e.g. an integer to string mapping), this can
+              be derived from ``self.describe_categorical``.
+            - Data types not included: complex, Arrow-style null, binary,
+              decimal, and nested (list, struct, map, union) dtypes.
+        """
+    @property
+    def describe_categorical(self) -> CategoricalDescription:
+        """
+        If the dtype is categorical, there are two options:
+        - There are only values in the data buffer.
+        - There is a separate non-categorical Column encoding categorical
+          values.
+
+        Raises TypeError if the dtype is not categorical
+
+        Returns the dictionary with description on how to interpret the
+        data buffer:
+            - "is_ordered" : bool, whether the ordering of dictionary indices
+                             is semantically meaningful.
+            - "is_dictionary" : bool, whether a mapping of
+                                categorical values to other objects exists
+            - "categories" : Column representing the (implicit) mapping of
+                             indices to category values (e.g. an array of
+                             cat1, cat2, ...). None if not a dictionary-style
+                             categorical.
+
+        TBD: are there any other in-memory representations that are needed?
+        """
+    @property
+    def describe_null(self) -> tuple[ColumnNullType, Any]:
+        """
+        Return the missing value (or "null") representation the column dtype
+        uses, as a tuple ``(kind, value)``.
+
+        Value : if kind is "sentinel value", the actual value. If kind is a bit
+        mask or a byte mask, the value (0 or 1) indicating a missing value.
+        None otherwise.
+        """
+    @property
+    def null_count(self) -> int:
+        """
+        Number of null elements, if known.
+
+        Note: Arrow uses -1 to indicate "unknown", but None seems cleaner.
+        """
+    @property
+    def metadata(self) -> dict[str, Any]:
+        """
+        The metadata for the column. See `DataFrame.metadata` for more details.
+        """
+    def num_chunks(self) -> int:
+        """
+        Return the number of chunks the column consists of.
+        """
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[_PyArrowColumn]:
+        """
+        Return an iterator yielding the chunks.
+
+        See `DataFrame.get_chunks` for details on ``n_chunks``.
+        """
+    def get_buffers(self) -> ColumnBuffers:
+        """
+        Return a dictionary containing the underlying buffers.
+
+        The returned dictionary has the following contents:
+
+            - "data": a two-element tuple whose first element is a buffer
+                      containing the data and whose second element is the data
+                      buffer's associated dtype.
+            - "validity": a two-element tuple whose first element is a buffer
+                          containing mask values indicating missing data and
+                          whose second element is the mask value buffer's
+                          associated dtype. None if the null representation is
+                          not a bit or byte mask.
+            - "offsets": a two-element tuple whose first element is a buffer
+                         containing the offset values for variable-size binary
+                         data (e.g., variable-length strings) and whose second
+                         element is the offsets buffer's associated dtype. None
+                         if the data buffer does not have an associated offsets
+                         buffer.
+        """
diff --git a/python/pyarrow-stubs/interchange/dataframe.pyi b/python/pyarrow-stubs/interchange/dataframe.pyi
new file mode 100644
index 00000000000..526a58926a9
--- /dev/null
+++ b/python/pyarrow-stubs/interchange/dataframe.pyi
@@ -0,0 +1,102 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Any, Iterable, Sequence
+
+from pyarrow.interchange.column import _PyArrowColumn
+from pyarrow.lib import RecordBatch, Table
+
+class _PyArrowDataFrame:
+    """
+    A data frame class, with only the methods required by the interchange
+    protocol defined.
+
+    A "data frame" represents an ordered collection of named columns.
+    A column's "name" must be a unique string.
+    Columns may be accessed by name or by position.
+
+    This could be a public data frame class, or an object with the methods and
+    attributes defined on this DataFrame class could be returned from the
+    ``__dataframe__`` method of a public data frame class in a library adhering
+    to the dataframe interchange protocol specification.
+    """
+
+    def __init__(
+        self, df: Table | RecordBatch, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> None: ...
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame:
+        """
+        Construct a new exchange object, potentially changing the parameters.
+        ``nan_as_null`` is a keyword intended for the consumer to tell the
+        producer to overwrite null values in the data with ``NaN``.
+        It is intended for cases where the consumer does not support the bit
+        mask or byte mask that is the producer's native representation.
+        ``allow_copy`` is a keyword that defines whether or not the library is
+        allowed to make a copy of the data. For example, copying data would be
+        necessary if a library supports strided buffers, given that this
+        protocol specifies contiguous buffers.
+        """
+    @property
+    def metadata(self) -> dict[str, Any]:
+        """
+        The metadata for the data frame, as a dictionary with string keys. The
+        contents of `metadata` may be anything, they are meant for a library
+        to store information that it needs to, e.g., roundtrip losslessly or
+        for two implementations to share data that is not (yet) part of the
+        interchange protocol specification. For avoiding collisions with other
+        entries, please add name the keys with the name of the library
+        followed by a period and the desired name, e.g, ``pandas.indexcol``.
+        """
+    def num_columns(self) -> int:
+        """
+        Return the number of columns in the DataFrame.
+        """
+    def num_rows(self) -> int:
+        """
+        Return the number of rows in the DataFrame, if available.
+        """
+    def num_chunks(self) -> int:
+        """
+        Return the number of chunks the DataFrame consists of.
+        """
+    def column_names(self) -> Iterable[str]:
+        """
+        Return an iterator yielding the column names.
+        """
+    def get_column(self, i: int) -> _PyArrowColumn:
+        """
+        Return the column at the indicated position.
+        """
+    def get_column_by_name(self, name: str) -> _PyArrowColumn:
+        """
+        Return the column whose name is the indicated name.
+        """
+    def get_columns(self) -> Iterable[_PyArrowColumn]:
+        """
+        Return an iterator yielding the columns.
+        """
+    def select_columns(self, indices: Sequence[int]) -> Self:
+        """
+        Create a new DataFrame by selecting a subset of columns by index.
+        """
+    def select_columns_by_name(self, names: Sequence[str]) -> Self:
+        """
+        Create a new DataFrame by selecting a subset of columns by name.
+        """
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[Self]:
+        """
+        Return an iterator yielding the chunks.
+
+        By default (None), yields the chunks that the data is stored as by the
+        producer. If given, ``n_chunks`` must be a multiple of
+        ``self.num_chunks()``, meaning the producer must subdivide each chunk
+        before yielding it.
+
+        Note that the producer must ensure that all columns are chunked the
+        same way.
+        """
diff --git a/python/pyarrow-stubs/interchange/from_dataframe.pyi b/python/pyarrow-stubs/interchange/from_dataframe.pyi
new file mode 100644
index 00000000000..b04b6268975
--- /dev/null
+++ b/python/pyarrow-stubs/interchange/from_dataframe.pyi
@@ -0,0 +1,244 @@
+from typing import Any, Protocol, TypeAlias
+
+from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
+
+from .column import (
+    ColumnBuffers,
+    ColumnNullType,
+    Dtype,
+    DtypeKind,
+)
+
+class DataFrameObject(Protocol):
+    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True) -> Any: ...
+
+ColumnObject: TypeAlias = Any
+
+def from_dataframe(df: DataFrameObject, allow_copy=True) -> Table:
+    """
+    Build a ``pa.Table`` from any DataFrame supporting the interchange protocol.
+
+    Parameters
+    ----------
+    df : DataFrameObject
+        Object supporting the interchange protocol, i.e. `__dataframe__`
+        method.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Table
+
+    Examples
+    --------
+    >>> import pyarrow
+    >>> from pyarrow.interchange import from_dataframe
+
+    Convert a pandas dataframe to a pyarrow table:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "n_attendees": [100, 10, 1],
+    ...         "country": ["Italy", "Spain", "Slovenia"],
+    ...     }
+    ... )
+    >>> df
+       n_attendees   country
+    0          100     Italy
+    1           10     Spain
+    2            1  Slovenia
+    >>> from_dataframe(df)
+    pyarrow.Table
+    n_attendees: int64
+    country: large_string
+    ----
+    n_attendees: [[100,10,1]]
+    country: [["Italy","Spain","Slovenia"]]
+    """
+
+def protocol_df_chunk_to_pyarrow(df: DataFrameObject, allow_copy: bool = True) -> RecordBatch:
+    """
+    Convert interchange protocol chunk to ``pa.RecordBatch``.
+
+    Parameters
+    ----------
+    df : DataFrameObject
+        Object supporting the interchange protocol, i.e. `__dataframe__`
+        method.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.RecordBatch
+    """
+
+def column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
+    """
+    Convert a column holding one of the primitive dtypes to a PyArrow array.
+    A primitive type is one of: int, uint, float, bool (1 bit).
+
+    Parameters
+    ----------
+    col : ColumnObject
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Array
+    """
+
+def bool_column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
+    """
+    Convert a column holding boolean dtype to a PyArrow array.
+
+    Parameters
+    ----------
+    col : ColumnObject
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Array
+    """
+
+def categorical_column_to_dictionary(
+    col: ColumnObject, allow_copy: bool = True
+) -> DictionaryArray:
+    """
+    Convert a column holding categorical data to a pa.DictionaryArray.
+
+    Parameters
+    ----------
+    col : ColumnObject
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.DictionaryArray
+    """
+
+def parse_datetime_format_str(format_str: str) -> tuple[str, str]:
+    """Parse datetime `format_str` to interpret the `data`."""
+
+def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> DataType:
+    """Map column date type to pyarrow date type."""
+
+def buffers_to_array(
+    buffers: ColumnBuffers,
+    data_type: tuple[DtypeKind, int, str, str],
+    length: int,
+    describe_null: ColumnNullType,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Array:
+    """
+    Build a PyArrow array from the passed buffer.
+
+    Parameters
+    ----------
+    buffer : ColumnBuffers
+        Dictionary containing tuples of underlying buffers and
+        their associated dtype.
+    data_type : Tuple[DtypeKind, int, str, str],
+        Dtype description of the column as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+    length : int
+        The number of values in the array.
+    describe_null: ColumnNullType
+        Null representation the column dtype uses,
+        as a tuple ``(kind, value)``
+    offset : int, default: 0
+        Number of elements to offset from the start of the buffer.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Array
+
+    Notes
+    -----
+    The returned array doesn't own the memory. The caller of this function
+    is responsible for keeping the memory owner object alive as long as
+    the returned PyArrow array is being used.
+    """
+
+def validity_buffer_from_mask(
+    validity_buff: Buffer,
+    validity_dtype: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer:
+    """
+    Build a PyArrow buffer from the passed mask buffer.
+
+    Parameters
+    ----------
+    validity_buff : BufferObject
+        Tuple of underlying validity buffer and associated dtype.
+    validity_dtype : Dtype
+        Dtype description as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+    describe_null : ColumnNullType
+        Null representation the column dtype uses,
+        as a tuple ``(kind, value)``
+    length : int
+        The number of values in the array.
+    offset : int, default: 0
+        Number of elements to offset from the start of the buffer.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Buffer
+    """
+
+def validity_buffer_nan_sentinel(
+    data_pa_buffer: Buffer,
+    data_type: Dtype,
+    describe_null: ColumnNullType,
+    length: int,
+    offset: int = 0,
+    allow_copy: bool = True,
+) -> Buffer:
+    """
+    Build a PyArrow buffer from NaN or sentinel values.
+
+    Parameters
+    ----------
+    data_pa_buffer : pa.Buffer
+        PyArrow buffer for the column data.
+    data_type : Dtype
+        Dtype description as a tuple ``(kind, bit-width, format string,
+        endianness)``.
+    describe_null : ColumnNullType
+        Null representation the column dtype uses,
+        as a tuple ``(kind, value)``
+    length : int
+        The number of values in the array.
+    offset : int, default: 0
+        Number of elements to offset from the start of the buffer.
+    allow_copy : bool, default: True
+        Whether to allow copying the memory to perform the conversion
+        (if false then zero-copy approach is requested).
+
+    Returns
+    -------
+    pa.Buffer
+    """
diff --git a/python/pyarrow-stubs/io.pyi b/python/pyarrow-stubs/io.pyi
index b8404225e18..3d630498a1d 100644
--- a/python/pyarrow-stubs/io.pyi
+++ b/python/pyarrow-stubs/io.pyi
@@ -37,7 +37,7 @@ import builtins
 from pyarrow._stubs_typing import Compression, SupportPyBuffer
 from pyarrow.lib import MemoryPool, _Weakrefable
 
-# from .device import Device, DeviceAllocationType, MemoryManager
+from .device import Device, DeviceAllocationType, MemoryManager
 from ._types import KeyValueMetadata
 
 def have_libhdfs() -> bool:
@@ -605,34 +605,33 @@ class Buffer(_Weakrefable):
         """
         Whether the buffer is CPU-accessible.
         """
-    # TODO
-    # @property
-    # def device(self) -> Device:
-    #     """
-    #     The device where the buffer resides.
-    #
-    #     Returns
-    #     -------
-    #     Device
-    #     """
-    # @property
-    # def memory_manager(self) -> MemoryManager:
-    #     """
-    #     The memory manager associated with the buffer.
-    #
-    #     Returns
-    #     -------
-    #     MemoryManager
-    #     """
-    # @property
-    # def device_type(self) -> DeviceAllocationType:
-    #     """
-    #     The device type where the buffer resides.
-    #
-    #     Returns
-    #     -------
-    #     DeviceAllocationType
-    #     """
+    @property
+    def device(self) -> Device:
+        """
+        The device where the buffer resides.
+
+        Returns
+        -------
+        Device
+        """
+    @property
+    def memory_manager(self) -> MemoryManager:
+        """
+        The memory manager associated with the buffer.
+
+        Returns
+        -------
+        MemoryManager
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        The device type where the buffer resides.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
     @property
     def parent(self) -> Buffer | None: ...
     def __getitem__(self, key: builtins.slice | int) -> Self | int:
diff --git a/python/pyarrow-stubs/ipc.pyi b/python/pyarrow-stubs/ipc.pyi
new file mode 100644
index 00000000000..c7f2af004d4
--- /dev/null
+++ b/python/pyarrow-stubs/ipc.pyi
@@ -0,0 +1,123 @@
+from io import IOBase
+
+import pandas as pd
+import pyarrow.lib as lib
+
+from pyarrow.lib import (
+    IpcReadOptions,
+    IpcWriteOptions,
+    Message,
+    MessageReader,
+    MetadataVersion,
+    ReadStats,
+    RecordBatchReader,
+    WriteStats,
+    _ReadPandasMixin,
+    get_record_batch_size,
+    get_tensor_size,
+    read_message,
+    read_record_batch,
+    read_schema,
+    read_tensor,
+    write_tensor,
+)
+
+class RecordBatchStreamReader(lib._RecordBatchStreamReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        *,
+        options: IpcReadOptions | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+class RecordBatchFileReader(lib._RecordBatchFileReader):
+    def __init__(
+        self,
+        source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+        footer_offset: int | None = None,
+        *,
+        options: IpcReadOptions | None,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+class RecordBatchFileWriter(lib._RecordBatchFileWriter):
+    def __init__(
+        self,
+        sink: str | lib.NativeFile | IOBase,
+        schema: lib.Schema,
+        *,
+        use_legacy_format: bool | None = None,
+        options: IpcWriteOptions | None = None,
+    ) -> None: ...
+
+def new_stream(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchStreamWriter: ...
+def open_stream(
+    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+    *,
+    options: IpcReadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchStreamReader: ...
+def new_file(
+    sink: str | lib.NativeFile | IOBase,
+    schema: lib.Schema,
+    *,
+    use_legacy_format: bool | None = None,
+    options: IpcWriteOptions | None = None,
+) -> RecordBatchFileWriter: ...
+def open_file(
+    source: bytes | lib.Buffer | lib.NativeFile | IOBase,
+    footer_offset: int | None = None,
+    *,
+    options: IpcReadOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> RecordBatchFileReader: ...
+def serialize_pandas(
+    df: pd.DataFrame, *, nthreads: int | None = None, preserve_index: bool | None = None
+) -> lib.Buffer: ...
+def deserialize_pandas(buf: lib.Buffer, *, use_threads: bool = True) -> pd.DataFrame: ...
+
+__all__ = [
+    "IpcReadOptions",
+    "IpcWriteOptions",
+    "Message",
+    "MessageReader",
+    "MetadataVersion",
+    "ReadStats",
+    "RecordBatchReader",
+    "WriteStats",
+    "_ReadPandasMixin",
+    "get_record_batch_size",
+    "get_tensor_size",
+    "read_message",
+    "read_record_batch",
+    "read_schema",
+    "read_tensor",
+    "write_tensor",
+    "RecordBatchStreamReader",
+    "RecordBatchStreamWriter",
+    "RecordBatchFileReader",
+    "RecordBatchFileWriter",
+    "new_stream",
+    "open_stream",
+    "new_file",
+    "open_file",
+    "serialize_pandas",
+    "deserialize_pandas",
+]
diff --git a/python/pyarrow-stubs/lib.pyi b/python/pyarrow-stubs/lib.pyi
index 527f946b53a..c0a3cd08386 100644
--- a/python/pyarrow-stubs/lib.pyi
+++ b/python/pyarrow-stubs/lib.pyi
@@ -26,15 +26,15 @@ from .array import *
 # from .benchmark import *
 # from .builder import *
 # from .compat import *
-# from .config import *
-# from .device import *
-# from .error import *
+from .config import *
+from .device import *
+from .error import *
 from .io import *
-# from .__lib_pxi.ipc import *
+from ._ipc import *
 from .memory import *
 # from .pandas_shim import *
 from .scalar import *
-# from .table import *
+from .table import *
 from .tensor import *
 from ._types import *
 
diff --git a/python/pyarrow-stubs/table.pyi b/python/pyarrow-stubs/table.pyi
new file mode 100644
index 00000000000..685ae725d4b
--- /dev/null
+++ b/python/pyarrow-stubs/table.pyi
@@ -0,0 +1,5154 @@
+import datetime as dt
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import (
+    Any,
+    Collection,
+    Generator,
+    Generic,
+    Iterable,
+    Iterator,
+    Literal,
+    Mapping,
+    Sequence,
+    TypeVar,
+)
+import builtins
+
+import numpy as np
+import pandas as pd
+
+from numpy.typing import NDArray
+from pyarrow._compute import (
+    CastOptions,
+    CountOptions,
+    FunctionOptions,
+    ScalarAggregateOptions,
+    TDigestOptions,
+    VarianceOptions,
+)
+from pyarrow._stubs_typing import (
+    Indices,
+    Mask,
+    NullEncoding,
+    NullSelectionBehavior,
+    Order,
+    SupportArrowArray,
+    SupportArrowDeviceArray,
+    SupportArrowStream,
+)
+from pyarrow.compute import ArrayOrChunkedArray, Expression
+from pyarrow.interchange.dataframe import _PyArrowDataFrame
+from pyarrow.lib import Device, MemoryManager, MemoryPool, Schema
+from pyarrow.lib import Field as _Field
+
+from .array import Array, StructArray, _CastAs, _PandasConvertible
+from .device import DeviceAllocationType
+from .io import Buffer
+from ._ipc import RecordBatchReader
+from .scalar import BooleanScalar, Int64Scalar, Scalar, StructScalar
+from .tensor import Tensor
+from ._stubs_typing import NullableCollection
+from ._types import DataType, _AsPyType, _BasicDataType, _DataTypeT
+
+Field: TypeAlias = _Field[DataType]
+_ScalarT = TypeVar("_ScalarT", bound=Scalar)
+_Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
+
+_Aggregation: TypeAlias = Literal[
+    "all",
+    "any",
+    "approximate_median",
+    "count",
+    "count_all",
+    "count_distinct",
+    "distinct",
+    "first",
+    "first_last",
+    "last",
+    "list",
+    "max",
+    "mean",
+    "min",
+    "min_max",
+    "one",
+    "product",
+    "stddev",
+    "sum",
+    "tdigest",
+    "variance",
+]
+_AggregationPrefixed: TypeAlias = Literal[
+    "hash_all",
+    "hash_any",
+    "hash_approximate_median",
+    "hash_count",
+    "hash_count_all",
+    "hash_count_distinct",
+    "hash_distinct",
+    "hash_first",
+    "hash_first_last",
+    "hash_last",
+    "hash_list",
+    "hash_max",
+    "hash_mean",
+    "hash_min",
+    "hash_min_max",
+    "hash_one",
+    "hash_product",
+    "hash_stddev",
+    "hash_sum",
+    "hash_tdigest",
+    "hash_variance",
+]
+Aggregation: TypeAlias = _Aggregation | _AggregationPrefixed
+AggregateOptions: TypeAlias = (
+    ScalarAggregateOptions | CountOptions | TDigestOptions | VarianceOptions | FunctionOptions
+)
+
+UnarySelector: TypeAlias = str
+NullarySelector: TypeAlias = tuple[()]
+NarySelector: TypeAlias = list[str] | tuple[str, ...]
+ColumnSelector: TypeAlias = UnarySelector | NullarySelector | NarySelector
+
+class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
+    """
+    An array-like composed from a (possibly empty) collection of pyarrow.Arrays
+
+    Warnings
+    --------
+    Do not call this class's constructor directly.
+
+    Examples
+    --------
+    To construct a ChunkedArray object use :func:`pyarrow.chunked_array`:
+
+    >>> import pyarrow as pa
+    >>> pa.chunked_array([], type=pa.int8())
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+    ...
+    ]
+
+    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+      [
+        2,
+        2,
+        4
+      ],
+      [
+        4,
+        5,
+        100
+      ]
+    ]
+    >>> isinstance(pa.chunked_array([[2, 2, 4], [4, 5, 100]]), pa.ChunkedArray)
+    True
+    """
+
+    @property
+    def data(self) -> Self: ...
+    @property
+    def type(self: ChunkedArray[Scalar[_DataTypeT]]) -> _DataTypeT:
+        """
+        Return data type of a ChunkedArray.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.type
+        DataType(int64)
+        """
+    def length(self) -> int:
+        """
+        Return length of a ChunkedArray.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.length()
+        6
+        """
+    __len__ = length
+    def to_string(
+        self,
+        *,
+        indent: int = 0,
+        window: int = 5,
+        container_window: int = 2,
+        skip_new_lines: bool = False,
+    ) -> str:
+        """
+        Render a "pretty-printed" string representation of the ChunkedArray
+
+        Parameters
+        ----------
+        indent : int
+            How much to indent right the content of the array,
+            by default ``0``.
+        window : int
+            How many items to preview within each chunk at the begin and end
+            of the chunk when the chunk is bigger than the window.
+            The other elements will be ellipsed.
+        container_window : int
+            How many chunks to preview at the begin and end
+            of the array when the array is bigger than the window.
+            The other elements will be ellipsed.
+            This setting also applies to list columns.
+        skip_new_lines : bool
+            If the array should be rendered as a single line of text
+            or if each element should be on its own line.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.to_string(skip_new_lines=True)
+        '[[2,2,4],[4,5,100]]'
+        """
+    format = to_string
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    @property
+    def null_count(self) -> int:
+        """
+        Number of null entries
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.null_count
+        1
+        """
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the chunked array.
+
+        In other words, the sum of bytes from all buffer ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will only be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.nbytes
+        49
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the chunked array.
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.get_total_buffer_size()
+        49
+        """
+    def __sizeof__(self) -> int: ...
+    def __getitem__(self, key: int | builtins.slice) -> Self | _Scalar_co:
+        """
+        Slice or return value at given index
+
+        Parameters
+        ----------
+        key : integer or slice
+            Slices with step not equal to 1 (or None) will produce a copy
+            rather than a zero-copy view
+
+        Returns
+        -------
+        value : Scalar (index) or ChunkedArray (slice)
+        """
+    def getitem(self, i: int) -> Scalar: ...
+    def is_null(self, *, nan_is_null: bool = False) -> ChunkedArray[BooleanScalar]:
+        """
+        Return boolean array indicating the null values.
+
+        Parameters
+        ----------
+        nan_is_null : bool (optional, default False)
+            Whether floating-point NaN values should also be considered null.
+
+        Returns
+        -------
+        array : boolean Array or ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.is_null()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            false,
+            false,
+            false,
+            false,
+            true,
+            false
+          ]
+        ]
+        """
+    def is_nan(self) -> ChunkedArray[BooleanScalar]:
+        """
+        Return boolean array indicating the NaN values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arr = pa.chunked_array([[2, np.nan, 4], [4, None, 100]])
+        >>> arr.is_nan()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            false,
+            true,
+            false,
+            false,
+            null,
+            false
+          ]
+        ]
+        """
+    def is_valid(self) -> ChunkedArray[BooleanScalar]:
+        """
+        Return boolean array indicating the non-null values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.is_valid()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            true,
+            true,
+            true
+          ],
+          [
+            true,
+            false,
+            true
+          ]
+        ]
+        """
+    def fill_null(self, fill_value: Scalar[_DataTypeT]) -> Self:
+        """
+        Replace each null element in values with fill_value.
+
+        See :func:`pyarrow.compute.fill_null` for full usage.
+
+        Parameters
+        ----------
+        fill_value : any
+            The replacement value for null entries.
+
+        Returns
+        -------
+        result : Array or ChunkedArray
+            A new array with nulls replaced by the given value.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> fill_value = pa.scalar(5, type=pa.int8())
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.fill_null(fill_value)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4,
+            4,
+            5,
+            100
+          ]
+        ]
+        """
+    def equals(self, other: Self) -> bool:
+        """
+        Return whether the contents of two chunked arrays are equal.
+
+        Parameters
+        ----------
+        other : pyarrow.ChunkedArray
+            Chunked array to compare against.
+
+        Returns
+        -------
+        are_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> animals = pa.chunked_array(
+        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
+        ... )
+        >>> n_legs.equals(n_legs)
+        True
+        >>> n_legs.equals(animals)
+        False
+        """
+    def to_numpy(self, zero_copy_only: bool = False) -> np.ndarray:
+        """
+        Return a NumPy copy of this array (experimental).
+
+        Parameters
+        ----------
+        zero_copy_only : bool, default False
+            Introduced for signature consistence with pyarrow.Array.to_numpy.
+            This must be False here since NumPy arrays' buffer must be contiguous.
+
+        Returns
+        -------
+        array : numpy.ndarray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.to_numpy()
+        array([  2,   2,   4,   4,   5, 100])
+        """
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    def cast(
+        self,
+        target_type: None | _CastAs = None,
+        safe: bool | None = None,
+        options: CastOptions | None = None,
+    ) -> Self | ChunkedArray[Scalar[_CastAs]]:
+        """
+        Cast array values to another data type
+
+        See :func:`pyarrow.compute.cast` for usage.
+
+        Parameters
+        ----------
+        target_type : DataType, None
+            Type to cast array to.
+        safe : boolean, default True
+            Whether to check for conversion errors such as overflow.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        cast : Array or ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs.type
+        DataType(int64)
+
+        Change the data type of an array:
+
+        >>> n_legs_seconds = n_legs.cast(pa.duration("s"))
+        >>> n_legs_seconds.type
+        DurationType(duration[s])
+        """
+    def dictionary_encode(self, null_encoding: NullEncoding = "mask") -> Self:
+        """
+        Compute dictionary-encoded representation of array.
+
+        See :func:`pyarrow.compute.dictionary_encode` for full usage.
+
+        Parameters
+        ----------
+        null_encoding : str, default "mask"
+            How to handle null entries.
+
+        Returns
+        -------
+        encoded : ChunkedArray
+            A dictionary-encoded version of this array.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> animals = pa.chunked_array(
+        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
+        ... )
+        >>> animals.dictionary_encode()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ],
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              3,
+              4,
+              5
+            ]
+        ]
+        """
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[ChunkedArray[Any]]:
+        """
+        Flatten this ChunkedArray.  If it has a struct type, the column is
+        flattened into one array per struct field.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : list of ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> c_arr = pa.chunked_array(n_legs.value_counts())
+        >>> c_arr
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          -- is_valid: all not null
+          -- child 0 type: int64
+            [
+              2,
+              4,
+              5,
+              100
+            ]
+          -- child 1 type: int64
+            [
+              2,
+              2,
+              1,
+              1
+            ]
+        ]
+        >>> c_arr.flatten()
+        [<pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            4,
+            5,
+            100
+          ]
+        ], <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            1,
+            1
+          ]
+        ]]
+        >>> c_arr.type
+        StructType(struct<values: int64, counts: int64>)
+        >>> n_legs.type
+        DataType(int64)
+        """
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Array[_Scalar_co]:
+        """
+        Flatten this ChunkedArray into a single non-chunked array.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.combine_chunks()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          2,
+          4,
+          4,
+          5,
+          100
+        ]
+        """
+    def unique(self) -> ChunkedArray[_Scalar_co]:
+        """
+        Compute distinct elements in array
+
+        Returns
+        -------
+        pyarrow.Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.unique()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          4,
+          5,
+          100
+        ]
+        """
+    def value_counts(self) -> StructArray:
+        """
+        Compute counts of unique elements in array.
+
+        Returns
+        -------
+        An array of  <input type "Values", int64_t "Counts"> structs
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.value_counts()
+        <pyarrow.lib.StructArray object at ...>
+        -- is_valid: all not null
+        -- child 0 type: int64
+          [
+            2,
+            4,
+            5,
+            100
+          ]
+        -- child 1 type: int64
+          [
+            2,
+            2,
+            1,
+            1
+          ]
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this ChunkedArray
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of array to slice
+        length : int, default None
+            Length of slice (default is until end of batch starting from
+            offset)
+
+        Returns
+        -------
+        sliced : ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.slice(2, 2)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            4
+          ],
+          [
+            4
+          ]
+        ]
+        """
+    def filter(self, mask: Mask, null_selection_behavior: NullSelectionBehavior = "drop") -> Self:
+        """
+        Select values from the chunked array.
+
+        See :func:`pyarrow.compute.filter` for full usage.
+
+        Parameters
+        ----------
+        mask : Array or array-like
+            The boolean mask to filter the chunked array with.
+        null_selection_behavior : str, default "drop"
+            How nulls in the mask should be handled.
+
+        Returns
+        -------
+        filtered : Array or ChunkedArray
+            An array of the same type, with only the elements selected by
+            the boolean mask.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> mask = pa.array([True, False, None, True, False, True])
+        >>> n_legs.filter(mask)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2
+          ],
+          [
+            4,
+            100
+          ]
+        ]
+        >>> n_legs.filter(mask, null_selection_behavior="emit_null")
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            null
+          ],
+          [
+            4,
+            100
+          ]
+        ]
+        """
+    def index(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        value: Scalar[_DataTypeT] | _AsPyType,
+        start: int | None = None,
+        end: int | None = None,
+        *,
+        memory_pool: MemoryPool | None = None,
+    ) -> Int64Scalar:
+        """
+        Find the first index of a value.
+
+        See :func:`pyarrow.compute.index` for full usage.
+
+        Parameters
+        ----------
+        value : Scalar or object
+            The value to look for in the array.
+        start : int, optional
+            The start index where to look for `value`.
+        end : int, optional
+            The end index where to look for `value`.
+        memory_pool : MemoryPool, optional
+            A memory pool for potential memory allocations.
+
+        Returns
+        -------
+        index : Int64Scalar
+            The index of the value in the array (-1 if not found).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.index(4)
+        <pyarrow.Int64Scalar: 2>
+        >>> n_legs.index(4, start=3)
+        <pyarrow.Int64Scalar: 3>
+        """
+    def take(self, indices: Indices) -> Self:
+        """
+        Select values from the chunked array.
+
+        See :func:`pyarrow.compute.take` for full usage.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices in the array whose values will be returned.
+
+        Returns
+        -------
+        taken : Array or ChunkedArray
+            An array with the same datatype, containing the taken values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            4
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.take([1, 4, 5])
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            5,
+            100
+          ]
+        ]
+        """
+    def drop_null(self) -> Self:
+        """
+        Remove missing values from a chunked array.
+        See :func:`pyarrow.compute.drop_null` for full description.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            null
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.drop_null()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        """
+    def sort(self, order: Order = "ascending", **kwargs) -> Self:
+        """
+        Sort the ChunkedArray
+
+        Parameters
+        ----------
+        order : str, default "ascending"
+            Which order to sort values in.
+            Accepted values are "ascending", "descending".
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        result : ChunkedArray
+        """
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Unify dictionaries across all chunks.
+
+        This method returns an equivalent chunked array, but where all
+        chunks share the same dictionary values.  Dictionary indices are
+        transposed accordingly.
+
+        If there are no dictionaries in the chunked array, it is returned
+        unchanged.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : ChunkedArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
+        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
+        >>> c_arr = pa.chunked_array([arr_1, arr_2])
+        >>> c_arr
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ],
+        ...
+          -- dictionary:
+            [
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ]
+        ]
+        >>> c_arr.unify_dictionaries()
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              0,
+              1,
+              2
+            ],
+        ...
+          -- dictionary:
+            [
+              "Flamingo",
+              "Parrot",
+              "Dog",
+              "Horse",
+              "Brittle stars",
+              "Centipede"
+            ]
+          -- indices:
+            [
+              3,
+              4,
+              5
+            ]
+        ]
+        """
+    @property
+    def num_chunks(self) -> int:
+        """
+        Number of underlying chunks.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs.num_chunks
+        2
+        """
+    def chunk(self, i: int) -> ChunkedArray[_Scalar_co]:
+        """
+        Select a chunk by its index.
+
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        pyarrow.Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs.chunk(1)
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          4,
+          5,
+          100
+        ]
+        """
+    @property
+    def chunks(self) -> list[Array[_Scalar_co]]:
+        """
+        Convert to a list of single-chunked arrays.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
+        >>> n_legs
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            2,
+            null
+          ],
+          [
+            4,
+            5,
+            100
+          ]
+        ]
+        >>> n_legs.chunks
+        [<pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          2,
+          null
+        ], <pyarrow.lib.Int64Array object at ...>
+        [
+          4,
+          5,
+          100
+        ]]
+        """
+    def iterchunks(
+        self: ArrayOrChunkedArray[_ScalarT],
+    ) -> Generator[Array, None, None]:
+        """
+        Convert to an iterator of ChunkArrays.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> for i in n_legs.iterchunks():
+        ...     print(i.null_count)
+        0
+        1
+
+        """
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+    def to_pylist(
+        self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
+        *,
+        maps_as_pydicts: Literal["lossy", "strict"] | None = None,
+    ) -> list[_AsPyType | None]:
+        """
+        Convert to a list of native Python objects.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
+        >>> n_legs.to_pylist()
+        [2, 2, 4, 4, None, 100]
+        """
+    def __arrow_c_stream__(self, requested_schema=None) -> Any:
+        """
+        Export to a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+            A capsule containing a C ArrowArrayStream struct.
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, stream) -> Self:
+        """
+        Import ChunkedArray from a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        stream: PyCapsule
+            A capsule containing a C ArrowArrayStream PyCapsule.
+
+        Returns
+        -------
+        ChunkedArray
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether all chunks in the ChunkedArray are CPU-accessible.
+        """
+
+def chunked_array(
+    arrays: Iterable[NullableCollection[Any]] | Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray] | Iterable[Array[_ScalarT]],
+    type: DataType | str | None = None,
+) -> ChunkedArray[Scalar[Any]] | ChunkedArray[_ScalarT]:
+    """
+    Construct chunked array from list of array-like objects
+
+    Parameters
+    ----------
+    arrays : Array, list of Array, or array-like
+        Must all be the same data type. Can be empty only if type also passed.
+        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
+        (has an ``__arrow_c_array__`` or ``__arrow_c_stream__`` method) can be
+        passed as well.
+    type : DataType or string coercible to DataType
+
+    Returns
+    -------
+    ChunkedArray
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.chunked_array([], type=pa.int8())
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+    ...
+    ]
+
+    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+    <pyarrow.lib.ChunkedArray object at ...>
+    [
+      [
+        2,
+        2,
+        4
+      ],
+      [
+        4,
+        5,
+        100
+      ]
+    ]
+    """
+
+_ColumnT = TypeVar("_ColumnT", bound=ArrayOrChunkedArray[Any])
+
+class _Tabular(_PandasConvertible[pd.DataFrame], Generic[_ColumnT]):
+    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
+    def __dataframe__(
+        self, nan_as_null: bool = False, allow_copy: bool = True
+    ) -> _PyArrowDataFrame:
+        """
+        Return the dataframe interchange object implementing the interchange protocol.
+
+        Parameters
+        ----------
+        nan_as_null : bool, default False
+            Whether to tell the DataFrame to overwrite null values in the data
+            with ``NaN`` (or ``NaT``).
+        allow_copy : bool, default True
+            Whether to allow memory copying when exporting. If set to False
+            it would cause non-zero-copy exports to fail.
+
+        Returns
+        -------
+        DataFrame interchange object
+            The object which consuming library can use to ingress the dataframe.
+
+        Notes
+        -----
+        Details on the interchange protocol:
+        https://data-apis.org/dataframe-protocol/latest/index.html
+        `nan_as_null` currently has no effect; once support for nullable extension
+        dtypes is added, this value should be propagated to columns.
+        """
+    def __getitem__(self, key: int | str | slice) -> _ColumnT | Self:
+        """
+        Slice or return column at given index or column name
+
+        Parameters
+        ----------
+        key : integer, str, or slice
+            Slices with step not equal to 1 (or None) will produce a copy
+            rather than a zero-copy view
+
+        Returns
+        -------
+        Array (from RecordBatch) or ChunkedArray (from Table) for column input.
+        RecordBatch or Table for slice input.
+        """
+    def __len__(self) -> int: ...
+    def column(self, i: int | str) -> _ColumnT:
+        """
+        Select single column from Table or RecordBatch.
+
+        Parameters
+        ----------
+        i : int or string
+            The index or name of the column to retrieve.
+
+        Returns
+        -------
+        column : Array (for RecordBatch) or ChunkedArray (for Table)
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Select a column by numeric index:
+
+        >>> table.column(0)
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            2,
+            4,
+            5,
+            100
+          ]
+        ]
+
+        Select a column by its name:
+
+        >>> table.column("animals")
+        <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            "Flamingo",
+            "Horse",
+            "Brittle stars",
+            "Centipede"
+          ]
+        ]
+        """
+    @property
+    def column_names(self) -> list[str]:
+        """
+        Names of the Table or RecordBatch columns.
+
+        Returns
+        -------
+        list of str
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> table = pa.Table.from_arrays(
+        ...     [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
+        ...     names=["n_legs", "animals"],
+        ... )
+        >>> table.column_names
+        ['n_legs', 'animals']
+        """
+    @property
+    def columns(self) -> list[_ColumnT]:
+        """
+        List of all columns in numerical order.
+
+        Returns
+        -------
+        columns : list of Array (for RecordBatch) or list of ChunkedArray (for Table)
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.columns
+        [<pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            null,
+            4,
+            5,
+            null
+          ]
+        ], <pyarrow.lib.ChunkedArray object at ...>
+        [
+          [
+            "Flamingo",
+            "Horse",
+            null,
+            "Centipede"
+          ]
+        ]]
+        """
+    def drop_null(self) -> Self:
+        """
+        Remove rows that contain missing values from a Table or RecordBatch.
+
+        See :func:`pyarrow.compute.drop_null` for full usage.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object with the same schema, with rows containing
+            no missing values.
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [None, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", None, "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.drop_null()
+        pyarrow.Table
+        year: double
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2022,2021]]
+        n_legs: [[4,100]]
+        animals: [["Horse","Centipede"]]
+        """
+    def field(self, i: int | str) -> Field:
+        """
+        Select a schema field by its column name or numeric index.
+
+        Parameters
+        ----------
+        i : int or string
+            The index or name of the field to retrieve.
+
+        Returns
+        -------
+        Field
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.field(0)
+        pyarrow.Field<n_legs: int64>
+        >>> table.field(1)
+        pyarrow.Field<animals: string>
+        """
+    @classmethod
+    def from_pydict(
+        cls,
+        mapping: Mapping[str, ArrayOrChunkedArray[Any] | list[Any] | np.ndarray],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self:
+        """
+        Construct a Table or RecordBatch from Arrow arrays or columns.
+
+        Parameters
+        ----------
+        mapping : dict or Mapping
+            A mapping of strings to Arrays or Python lists.
+        schema : Schema, default None
+            If not passed, will be inferred from the Mapping values.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        Table or RecordBatch
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> pydict = {"n_legs": n_legs, "animals": animals}
+
+        Construct a Table from a dictionary of arrays:
+
+        >>> pa.Table.from_pydict(pydict)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> pa.Table.from_pydict(pydict).schema
+        n_legs: int64
+        animals: string
+
+        Construct a Table from a dictionary of arrays with metadata:
+
+        >>> my_metadata = {"n_legs": "Number of legs per animal"}
+        >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Construct a Table from a dictionary of arrays with pyarrow schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.Table.from_pydict(pydict, schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    @classmethod
+    def from_pylist(
+        cls,
+        mapping: Sequence[Mapping[str, Any]],
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self:
+        """
+        Construct a Table or RecordBatch from list of rows / dictionaries.
+
+        Parameters
+        ----------
+        mapping : list of dicts of rows
+            A mapping of strings to row values.
+        schema : Schema, default None
+            If not passed, will be inferred from the first row of the
+            mapping values.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        Table or RecordBatch
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
+
+        Construct a Table from a list of rows:
+
+        >>> pa.Table.from_pylist(pylist)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4]]
+        animals: [["Flamingo","Dog"]]
+
+        Construct a Table from a list of rows with metadata:
+
+        >>> my_metadata = {"n_legs": "Number of legs per animal"}
+        >>> pa.Table.from_pylist(pylist, metadata=my_metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Construct a Table from a list of rows with pyarrow schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    def itercolumns(self) -> Generator[_ColumnT, None, None]:
+        """
+        Iterator over all columns in their numerical order.
+
+        Yields
+        ------
+        Array (for RecordBatch) or ChunkedArray (for Table)
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> for i in table.itercolumns():
+        ...     print(i.null_count)
+        2
+        1
+        """
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def shape(self) -> tuple[int, int]:
+        """
+        Dimensions of the table or record batch: (#rows, #columns).
+
+        Returns
+        -------
+        (int, int)
+            Number of rows and number of columns.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table.shape
+        (4, 2)
+        """
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def nbytes(self) -> int: ...
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> Self:
+        """
+        Sort the Table or RecordBatch by one or multiple columns.
+
+        Parameters
+        ----------
+        sorting : str or list[tuple(name, order)]
+            Name of the column to use to sort (ascending), or
+            a list of multiple sorting conditions where
+            each entry is a tuple with column name
+            and sorting order ("ascending" or "descending")
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        Table or RecordBatch
+            A new tabular object sorted according to the sort keys.
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.sort_by("animal")
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        year: [[2019,2021,2021,2020,2022,2022]]
+        n_legs: [[5,100,4,2,4,2]]
+        animal: [["Brittle stars","Centipede","Dog","Flamingo","Horse","Parrot"]]
+        """
+    def take(self, indices: Indices) -> Self:
+        """
+        Select rows from a Table or RecordBatch.
+
+        See :func:`pyarrow.compute.take` for full usage.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices in the tabular object whose rows will be returned.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object with the same schema, containing the taken rows.
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.take([1, 3])
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2022,2021]]
+        n_legs: [[4,100]]
+        animals: [["Horse","Centipede"]]
+        """
+    def filter(
+        self, mask: Mask | Expression, null_selection_behavior: NullSelectionBehavior = "drop"
+    ) -> Self:
+        """
+        Select rows from the table or record batch based on a boolean mask.
+
+        The Table can be filtered based on a mask, which will be passed to
+        :func:`pyarrow.compute.filter` to perform the filtering, or it can
+        be filtered through a boolean :class:`.Expression`
+
+        Parameters
+        ----------
+        mask : Array or array-like or .Expression
+            The boolean mask or the :class:`.Expression` to filter the table with.
+        null_selection_behavior : str, default "drop"
+            How nulls in the mask should be handled, does nothing if
+            an :class:`.Expression` is used.
+
+        Returns
+        -------
+        filtered : Table or RecordBatch
+            A tabular object of the same schema, with only the rows selected
+            by applied filtering
+
+        Examples
+        --------
+        Using a Table (works similarly for RecordBatch):
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+
+        Define an expression and select rows:
+
+        >>> import pyarrow.compute as pc
+        >>> expr = pc.field("year") <= 2020
+        >>> table.filter(expr)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2019]]
+        n_legs: [[2,5]]
+        animals: [["Flamingo","Brittle stars"]]
+
+        Define a mask and select rows:
+
+        >>> mask = [True, True, False, None]
+        >>> table.filter(mask)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2022]]
+        n_legs: [[2,4]]
+        animals: [["Flamingo","Horse"]]
+        >>> table.filter(mask, null_selection_behavior="emit_null")
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2022,null]]
+        n_legs: [[2,4,null]]
+        animals: [["Flamingo","Horse",null]]
+        """
+    def to_pydict(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> dict[str, list[Any]]:
+        """
+        Convert the Table or RecordBatch to a dict or OrderedDict.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Returns
+        -------
+        dict
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> table = pa.Table.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> table.to_pydict()
+        {'n_legs': [2, 2, 4, 4, 5, 100], 'animals': ['Flamingo', 'Parrot', ..., 'Centipede']}
+        """
+    def to_pylist(
+        self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
+    ) -> list[dict[str, Any]]:
+        """
+        Convert the Table or RecordBatch to a list of rows / dictionaries.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
+        Returns
+        -------
+        list
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> data = [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]]
+        >>> table = pa.table(data, names=["n_legs", "animals"])
+        >>> table.to_pylist()
+        [{'n_legs': 2, 'animals': 'Flamingo'}, {'n_legs': 4, 'animals': 'Horse'}, ...
+        """
+    def to_string(self, *, show_metadata: bool = False, preview_cols: int = 0) -> str:
+        """
+        Return human-readable string representation of Table or RecordBatch.
+
+        Parameters
+        ----------
+        show_metadata : bool, default False
+            Display Field-level and Schema-level KeyValueMetadata.
+        preview_cols : int, default 0
+            Display values of the columns for the first N columns.
+
+        Returns
+        -------
+        str
+        """
+    def remove_column(self, i: int) -> Self: ...
+    def drop_columns(self, columns: str | list[str]) -> Self:
+        """
+        Drop one or more columns and return a new Table or RecordBatch.
+
+        Parameters
+        ----------
+        columns : str or list[str]
+            Field name(s) referencing existing column(s).
+
+        Raises
+        ------
+        KeyError
+            If any of the passed column names do not exist.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object without the column(s).
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Drop one column:
+
+        >>> table.drop_columns("animals")
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,4,5,100]]
+
+        Drop one or more columns:
+
+        >>> table.drop_columns(["n_legs", "animals"])
+        pyarrow.Table
+        ...
+        ----
+        """
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self: ...
+    def append_column(
+        self, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self:
+        """
+        Append column at end of columns.
+
+        Parameters
+        ----------
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        Table or RecordBatch
+            New table or record batch with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Append column at the end:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.append_column("year", [year])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        year: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        year: [[2021,2022,2019,2021]]
+        """
+
+class RecordBatch(_Tabular[Array]):
+    """
+    Batch of rows of columns of equal length
+
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the
+    ``RecordBatch.from_*`` functions instead.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Constructing a RecordBatch from arrays:
+
+    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    Constructing a RecordBatch from pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022],
+    ...         "month": [3, 5, 7, 9],
+    ...         "day": [1, 5, 9, 13],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.RecordBatch.from_pandas(df)
+    pyarrow.RecordBatch
+    year: int64
+    month: int64
+    day: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [2020,2022,2021,2022]
+    month: [3,5,7,9]
+    day: [1,5,9,13]
+    n_legs: [2,4,5,100]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+    >>> pa.RecordBatch.from_pandas(df).to_pandas()
+       year  month  day  n_legs        animals
+    0  2020      3    1       2       Flamingo
+    1  2022      5    5       4          Horse
+    2  2021      7    9       5  Brittle stars
+    3  2022      9   13     100      Centipede
+
+    Constructing a RecordBatch from pylist:
+
+    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
+    >>> pa.RecordBatch.from_pylist(pylist).to_pandas()
+       n_legs   animals
+    0       2  Flamingo
+    1       4       Dog
+
+    You can also construct a RecordBatch using :func:`pyarrow.record_batch`:
+
+    >>> pa.record_batch([n_legs, animals], names=names).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    >>> pa.record_batch(df)
+    pyarrow.RecordBatch
+    year: int64
+    month: int64
+    day: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [2020,2022,2021,2022]
+    month: [3,5,7,9]
+    day: [1,5,9,13]
+    n_legs: [2,4,5,100]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+    """
+
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    def replace_schema_metadata(
+        self, metadata: dict[str | bytes, str | bytes] | None = None
+    ) -> Self:
+        """
+        Create shallow copy of record batch by replacing schema
+        key-value metadata with the indicated new metadata (which may be None,
+        which deletes any existing metadata
+
+        Parameters
+        ----------
+        metadata : dict, default None
+
+        Returns
+        -------
+        shallow_copy : RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+
+        Constructing a RecordBatch with schema and metadata:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64())], metadata={"n_legs": "Number of legs per animal"}
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs], schema=my_schema)
+        >>> batch.schema
+        n_legs: int64
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Shallow copy of a RecordBatch with deleted schema metadata:
+
+        >>> batch.replace_schema_metadata().schema
+        n_legs: int64
+        """
+    @property
+    def num_columns(self) -> int:
+        """
+        Number of columns
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.num_columns
+        2
+        """
+
+    @property
+    def num_rows(self) -> int:
+        """
+        Number of rows
+
+        Due to the definition of a RecordBatch, all columns have the same
+        number of rows.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.num_rows
+        6
+        """
+    @property
+    def schema(self) -> Schema:
+        """
+        Schema of the RecordBatch and its columns
+
+        Returns
+        -------
+        pyarrow.Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.schema
+        n_legs: int64
+        animals: string
+        """
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the record batch.
+
+        In other words, the sum of bytes from all buffer ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will only be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.nbytes
+        116
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the record batch
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.get_total_buffer_size()
+        120
+        """
+
+    def __sizeof__(self) -> int: ...
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
+    ) -> Self:
+        """
+        Add column to RecordBatch at position i.
+
+        A new record batch is returned with the column added, the original record batch
+        object is left unchanged.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        RecordBatch
+            New record batch with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+
+        Add column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> batch.add_column(0, "year", year)
+        pyarrow.RecordBatch
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [2021,2022,2019,2021]
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Original record batch is left unchanged:
+
+        >>> batch
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+    def remove_column(self, i: int) -> Self:
+        """
+        Create new RecordBatch with the indicated column removed.
+
+        Parameters
+        ----------
+        i : int
+            Index of column to remove.
+
+        Returns
+        -------
+        Table
+            New record batch without the column.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> batch.remove_column(1)
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,4,5,100]
+        """
+    def set_column(self, i: int, field_: str | Field, column: Array | list) -> Self:
+        """
+        Replace column in RecordBatch at position.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        RecordBatch
+            New record batch with the passed column set.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+
+        Replace a column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> batch.set_column(1, "year", year)
+        pyarrow.RecordBatch
+        n_legs: int64
+        year: int64
+        ----
+        n_legs: [2,4,5,100]
+        year: [2021,2022,2019,2021]
+        """
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self:
+        """
+        Create new record batch with columns renamed to provided names.
+
+        Parameters
+        ----------
+        names : list[str] or dict[str, str]
+            List of new column names or mapping of old column names to new column names.
+
+            If a mapping of old to new column names is passed, then all columns which are
+            found to match a provided old column name will be renamed to the new column name.
+            If any column names are not found in the mapping, a KeyError will be raised.
+
+        Raises
+        ------
+        KeyError
+            If any of the column names passed in the names mapping do not exist.
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> new_names = ["n", "name"]
+        >>> batch.rename_columns(new_names)
+        pyarrow.RecordBatch
+        n: int64
+        name: string
+        ----
+        n: [2,4,5,100]
+        name: ["Flamingo","Horse","Brittle stars","Centipede"]
+        >>> new_names = {"n_legs": "n", "animals": "name"}
+        >>> batch.rename_columns(new_names)
+        pyarrow.RecordBatch
+        n: int64
+        name: string
+        ----
+        n: [2,4,5,100]
+        name: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer:
+        """
+        Write RecordBatch to Buffer as encapsulated IPC message, which does not
+        include a Schema.
+
+        To reconstruct a RecordBatch from the encapsulated IPC message Buffer
+        returned by this function, a Schema must be passed separately. See
+        Examples.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            Uses default memory pool if not specified
+
+        Returns
+        -------
+        serialized : Buffer
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> buf = batch.serialize()
+        >>> buf
+        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
+
+        Reconstruct RecordBatch from IPC message Buffer and original Schema
+
+        >>> pa.ipc.read_record_batch(buf, batch.schema)
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,2,4,4,5,100]
+        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this RecordBatch
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of record batch to slice
+        length : int, default None
+            Length of slice (default is until end of batch starting from
+            offset)
+
+        Returns
+        -------
+        sliced : RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+        >>> batch.slice(offset=3).to_pandas()
+           n_legs        animals
+        0       4          Horse
+        1       5  Brittle stars
+        2     100      Centipede
+        >>> batch.slice(length=2).to_pandas()
+           n_legs   animals
+        0       2  Flamingo
+        1       2    Parrot
+        >>> batch.slice(offset=3, length=1).to_pandas()
+           n_legs animals
+        0       4   Horse
+        """
+    def equals(self, other: Self, check_metadata: bool = False) -> bool:
+        """
+        Check if contents of two record batches are equal.
+
+        Parameters
+        ----------
+        other : pyarrow.RecordBatch
+            RecordBatch to compare against.
+        check_metadata : bool, default False
+            Whether schema metadata equality should be checked as well.
+
+        Returns
+        -------
+        are_equal : bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
+        >>> batch_0 = pa.record_batch([])
+        >>> batch_1 = pa.RecordBatch.from_arrays(
+        ...     [n_legs, animals],
+        ...     names=["n_legs", "animals"],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> batch.equals(batch)
+        True
+        >>> batch.equals(batch_0)
+        False
+        >>> batch.equals(batch_1)
+        True
+        >>> batch.equals(batch_1, check_metadata=True)
+        False
+        """
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
+        """
+        Select columns of the RecordBatch.
+
+        Returns a new RecordBatch with the specified columns, and metadata
+        preserved.
+
+        Parameters
+        ----------
+        columns : list-like
+            The column names or integer indices to select.
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> batch = pa.record_batch([n_legs, animals], names=["n_legs", "animals"])
+
+        Select columns my indices:
+
+        >>> batch.select([1])
+        pyarrow.RecordBatch
+        animals: string
+        ----
+        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+
+        Select columns by names:
+
+        >>> batch.select(["n_legs"])
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,2,4,4,5,100]
+        """
+    def cast(
+        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
+    ) -> Self:
+        """
+        Cast record batch values to another schema.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> batch.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+
+        Define new schema and cast batch values:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
+        ... )
+        >>> batch.cast(target_schema=my_schema)
+        pyarrow.RecordBatch
+        n_legs: duration[s]
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Collection[Array],
+        names: list[str] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self:
+        """
+        Construct a RecordBatch from multiple pyarrow.Arrays
+
+        Parameters
+        ----------
+        arrays : list of pyarrow.Array
+            One for each field in RecordBatch
+        names : list of str, optional
+            Names for the batch fields. If not passed, schema must be passed
+        schema : Schema, default None
+            Schema for the created batch. If not passed, names must be passed
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> names = ["n_legs", "animals"]
+
+        Construct a RecordBatch from pyarrow Arrays using names:
+
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,2,4,4,5,100]
+        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+
+        Construct a RecordBatch from pyarrow Arrays using schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        """
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: list[str] | None = None,
+    ) -> Self:
+        """
+        Convert pandas.DataFrame to an Arrow RecordBatch
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+        schema : pyarrow.Schema, optional
+            The expected schema of the RecordBatch. This can be used to
+            indicate the type of columns if we cannot infer it automatically.
+            If passed, the output will have exactly this schema. Columns
+            specified in the schema that are not found in the DataFrame columns
+            or its index will raise an error. Additional columns or index
+            levels in the DataFrame which are not specified in the schema will
+            be ignored.
+        preserve_index : bool, optional
+            Whether to store the index as an additional column in the resulting
+            ``RecordBatch``. The default of None will store the index as a
+            column, except for RangeIndex which is stored as metadata only. Use
+            ``preserve_index=True`` to force it to be stored as a column.
+        nthreads : int, default None
+            If greater than 1, convert columns to Arrow in parallel using
+            indicated number of threads. By default, this follows
+            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
+        columns : list, optional
+           List of column to be converted. If None, use all columns.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022],
+        ...         "month": [3, 5, 7, 9],
+        ...         "day": [1, 5, 9, 13],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+
+        Convert pandas DataFrame to RecordBatch:
+
+        >>> import pyarrow as pa
+        >>> pa.RecordBatch.from_pandas(df)
+        pyarrow.RecordBatch
+        year: int64
+        month: int64
+        day: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [2020,2022,2021,2022]
+        month: [3,5,7,9]
+        day: [1,5,9,13]
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Convert pandas DataFrame to RecordBatch using schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> pa.RecordBatch.from_pandas(df, schema=my_schema)
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Convert pandas DataFrame to RecordBatch specifying columns:
+
+        >>> pa.RecordBatch.from_pandas(df, columns=["n_legs"])
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,4,5,100]
+        """
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[StructScalar]
+    ) -> Self:
+        """
+        Construct a RecordBatch from a StructArray.
+
+        Each field in the StructArray will become a column in the resulting
+        ``RecordBatch``.
+
+        Parameters
+        ----------
+        struct_array : StructArray
+            Array to construct the record batch from.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
+        >>> pa.RecordBatch.from_struct_array(struct).to_pandas()
+          animals  n_legs    year
+        0  Parrot       2     NaN
+        1    None       4  2022.0
+        """
+    def to_struct_array(self) -> StructArray:
+        """
+        Convert to a struct array.
+        """
+    def to_tensor(
+        self,
+        null_to_nan: bool = False,
+        row_major: bool = True,
+        memory_pool: MemoryPool | None = None,
+    ) -> Tensor:
+        """
+        Convert to a :class:`~pyarrow.Tensor`.
+
+        RecordBatches that can be converted have fields of type signed or unsigned
+        integer or float, including all bit-widths.
+
+        ``null_to_nan`` is ``False`` by default and this method will raise an error in case
+        any nulls are present. RecordBatches with nulls can be converted with ``null_to_nan``
+        set to ``True``. In this case null values are converted to ``NaN`` and integer type
+        arrays are promoted to the appropriate float type.
+
+        Parameters
+        ----------
+        null_to_nan : bool, default False
+            Whether to write null values in the result as ``NaN``.
+        row_major : bool, default True
+            Whether resulting Tensor is row-major or column-major
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> batch = pa.record_batch(
+        ...     [
+        ...         pa.array([1, 2, 3, 4, None], type=pa.int32()),
+        ...         pa.array([10, 20, 30, 40, None], type=pa.float32()),
+        ...     ],
+        ...     names=["a", "b"],
+        ... )
+
+        >>> batch
+        pyarrow.RecordBatch
+        a: int32
+        b: float
+        ----
+        a: [1,2,3,4,null]
+        b: [10,20,30,40,null]
+
+        Convert a RecordBatch to row-major Tensor with null values
+        written as ``NaN``s
+
+        >>> batch.to_tensor(null_to_nan=True)
+        <pyarrow.Tensor>
+        type: double
+        shape: (5, 2)
+        strides: (16, 8)
+        >>> batch.to_tensor(null_to_nan=True).to_numpy()
+        array([[ 1., 10.],
+               [ 2., 20.],
+               [ 3., 30.],
+               [ 4., 40.],
+               [nan, nan]])
+
+        Convert a RecordBatch to column-major Tensor
+
+        >>> batch.to_tensor(null_to_nan=True, row_major=False)
+        <pyarrow.Tensor>
+        type: double
+        shape: (5, 2)
+        strides: (8, 40)
+        >>> batch.to_tensor(null_to_nan=True, row_major=False).to_numpy()
+        array([[ 1., 10.],
+               [ 2., 20.],
+               [ 3., 30.],
+               [ 4., 40.],
+               [nan, nan]])
+        """
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0):
+        """
+        Export to a C ArrowArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the record batch
+        schema is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c(cls, in_ptr: int, schema: Schema) -> Self:
+        """
+        Import RecordBatch from a C ArrowArray struct, given its pointer
+        and the imported schema.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        type: Schema or int
+            Either a Schema object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_array__(self, requested_schema=None):
+        """
+        Get a pair of PyCapsules containing a C ArrowArray representation of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the batch to this schema.
+            If None, the batch will be returned as-is, with a schema matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
+            respectively.
+        """
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export the batch as an Arrow C stream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+            Currently, this is not supported and will raise a
+            NotImplementedError if the schema doesn't match the current schema.
+
+        Returns
+        -------
+        PyCapsule
+        """
+    @classmethod
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self:
+        """
+        Import RecordBatch from a pair of PyCapsules containing a C ArrowSchema
+        and ArrowArray, respectively.
+
+        Parameters
+        ----------
+        schema_capsule : PyCapsule
+            A PyCapsule containing a C ArrowSchema representation of the schema.
+        array_capsule : PyCapsule
+            A PyCapsule containing a C ArrowArray representation of the array.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+        """
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
+        """
+        Export to a C ArrowDeviceArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the record batch
+        schema is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+    @classmethod
+    def _import_from_c_device(cls, in_ptr: int, schema: Schema) -> Self:
+        """
+        Import RecordBatch from a C ArrowDeviceArray struct, given its pointer
+        and the imported schema.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        type: Schema or int
+            Either a Schema object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs):
+        """
+        Get a pair of PyCapsules containing a C ArrowDeviceArray representation
+        of the object.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule | None
+            A PyCapsule containing a C ArrowSchema representation of a requested
+            schema. PyArrow will attempt to cast the batch to this data type.
+            If None, the batch will be returned as-is, with a type matching the
+            one returned by :meth:`__arrow_c_schema__()`.
+        kwargs
+            Currently no additional keyword arguments are supported, but
+            this method will accept any keyword with a value of ``None``
+            for compatibility with future keywords.
+
+        Returns
+        -------
+        Tuple[PyCapsule, PyCapsule]
+            A pair of PyCapsules containing a C ArrowSchema and ArrowDeviceArray,
+            respectively.
+        """
+    @classmethod
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self:
+        """
+        Import RecordBatch from a pair of PyCapsules containing a
+        C ArrowSchema and ArrowDeviceArray, respectively.
+
+        Parameters
+        ----------
+        schema_capsule : PyCapsule
+            A PyCapsule containing a C ArrowSchema representation of the schema.
+        array_capsule : PyCapsule
+            A PyCapsule containing a C ArrowDeviceArray representation of the array.
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+        """
+    @property
+    def device_type(self) -> DeviceAllocationType:
+        """
+        The device type where the arrays in the RecordBatch reside.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether the RecordBatch's arrays are CPU-accessible.
+        """
+    def copy_to(self, destination: MemoryManager | Device) -> Self:
+        """
+        Copy the entire RecordBatch to destination device.
+
+        This copies each column of the record batch to create
+        a new record batch where all underlying buffers for the columns have
+        been copied to the destination MemoryManager.
+
+        Parameters
+        ----------
+        destination : pyarrow.MemoryManager or pyarrow.Device
+            The destination device to copy the array to.
+
+        Returns
+        -------
+        RecordBatch
+        """
+
+def table_to_blocks(options, table: Table, categories, extension_columns): ...
+
+JoinType: TypeAlias = Literal[
+    "left semi",
+    "right semi",
+    "left anti",
+    "right anti",
+    "inner",
+    "left outer",
+    "right outer",
+    "full outer",
+]
+
+class Table(_Tabular[ChunkedArray[Any]]):
+    """
+    A collection of top-level named, equal length Arrow arrays.
+
+    Warnings
+    --------
+    Do not call this class's constructor directly, use one of the ``from_*``
+    methods instead.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Construct a Table from arrays:
+
+    >>> pa.Table.from_arrays([n_legs, animals], names=names)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from a RecordBatch:
+
+    >>> batch = pa.record_batch([n_legs, animals], names=names)
+    >>> pa.Table.from_batches([batch])
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2019, 2021],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.Table.from_pandas(df)
+    pyarrow.Table
+    year: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [[2020,2022,2019,2021]]
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from a dictionary of arrays:
+
+    >>> pydict = {"n_legs": n_legs, "animals": animals}
+    >>> pa.Table.from_pydict(pydict)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    >>> pa.Table.from_pydict(pydict).schema
+    n_legs: int64
+    animals: string
+
+    Construct a Table from a dictionary of arrays with metadata:
+
+    >>> my_metadata = {"n_legs": "Number of legs per animal"}
+    >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+
+    Construct a Table from a list of rows:
+
+    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"year": 2021, "animals": "Centipede"}]
+    >>> pa.Table.from_pylist(pylist)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,null]]
+    animals: [["Flamingo","Centipede"]]
+
+    Construct a Table from a list of rows with pyarrow schema:
+
+    >>> my_schema = pa.schema(
+    ...     [
+    ...         pa.field("year", pa.int64()),
+    ...         pa.field("n_legs", pa.int64()),
+    ...         pa.field("animals", pa.string()),
+    ...     ],
+    ...     metadata={"year": "Year of entry"},
+    ... )
+    >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
+    year: int64
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    year: 'Year of entry'
+
+    Construct a Table with :func:`pyarrow.table`:
+
+    >>> pa.table([n_legs, animals], names=names)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    """
+
+    def validate(self, *, full: bool = False) -> None:
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full : bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+    def slice(self, offset: int = 0, length: int | None = None) -> Self:
+        """
+        Compute zero-copy slice of this Table.
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of table to slice.
+        length : int, default None
+            Length of slice (default is until end of table starting from
+            offset).
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.slice(length=3)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2020,2022,2019]]
+        n_legs: [[2,4,5]]
+        animals: [["Flamingo","Horse","Brittle stars"]]
+        >>> table.slice(offset=2)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2019,2021]]
+        n_legs: [[5,100]]
+        animals: [["Brittle stars","Centipede"]]
+        >>> table.slice(offset=2, length=1)
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2019]]
+        n_legs: [[5]]
+        animals: [["Brittle stars"]]
+        """
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
+        """
+        Select columns of the Table.
+
+        Returns a new Table with the specified columns, and metadata
+        preserved.
+
+        Parameters
+        ----------
+        columns : list-like
+            The column names or integer indices to select.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.select([0, 1])
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        ----
+        year: [[2020,2022,2019,2021]]
+        n_legs: [[2,4,5,100]]
+        >>> table.select(["year"])
+        pyarrow.Table
+        year: int64
+        ----
+        year: [[2020,2022,2019,2021]]
+        """
+    def replace_schema_metadata(
+        self, metadata: dict[str | bytes, str | bytes] | None = None
+    ) -> Self:
+        """
+        Create shallow copy of table by replacing schema
+        key-value metadata with the indicated new metadata (which may be None),
+        which deletes any existing metadata.
+
+        Parameters
+        ----------
+        metadata : dict, default None
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2019, 2021],
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Constructing a Table with pyarrow schema and metadata:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"n_legs": "Number of legs per animal"},
+        ... )
+        >>> table = pa.table(df, my_schema)
+        >>> table.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+        pandas: ...
+
+        Create a shallow copy of a Table with deleted schema metadata:
+
+        >>> table.replace_schema_metadata().schema
+        n_legs: int64
+        animals: string
+
+        Create a shallow copy of a Table with new schema metadata:
+
+        >>> metadata = {"animals": "Which animal"}
+        >>> table.replace_schema_metadata(metadata=metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        animals: 'Which animal'
+        """
+    def flatten(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Flatten this Table.
+
+        Each column with a struct type is flattened
+        into one column per struct field.  Other columns are left unchanged.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
+        >>> month = pa.array([4, 6])
+        >>> table = pa.Table.from_arrays([struct, month], names=["a", "month"])
+        >>> table
+        pyarrow.Table
+        a: struct<animals: string, n_legs: int64, year: int64>
+          child 0, animals: string
+          child 1, n_legs: int64
+          child 2, year: int64
+        month: int64
+        ----
+        a: [
+          -- is_valid: all not null
+          -- child 0 type: string
+        ["Parrot",null]
+          -- child 1 type: int64
+        [2,4]
+          -- child 2 type: int64
+        [null,2022]]
+        month: [[4,6]]
+
+        Flatten the columns with struct field:
+
+        >>> table.flatten()
+        pyarrow.Table
+        a.animals: string
+        a.n_legs: int64
+        a.year: int64
+        month: int64
+        ----
+        a.animals: [["Parrot",null]]
+        a.n_legs: [[2,4]]
+        a.year: [[null,2022]]
+        month: [[4,6]]
+        """
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Make a new table by combining the chunks this table has.
+
+        All the underlying chunks in the ChunkedArray of each column are
+        concatenated into zero or one chunk.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+        >>> animals = pa.chunked_array(
+        ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
+        ... )
+        >>> names = ["n_legs", "animals"]
+        >>> table = pa.table([n_legs, animals], names=names)
+        >>> table
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,2,4],[4,5,100]]
+        animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
+        >>> table.combine_chunks()
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,2,4,4,5,100]]
+        animals: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
+        """
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
+        """
+        Unify dictionaries across all chunks.
+
+        This method returns an equivalent table, but where all chunks of
+        each column share the same dictionary values.  Dictionary indices
+        are transposed accordingly.
+
+        Columns without dictionaries are returned unchanged.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
+        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
+        >>> c_arr = pa.chunked_array([arr_1, arr_2])
+        >>> table = pa.table([c_arr], names=["animals"])
+        >>> table
+        pyarrow.Table
+        animals: dictionary<values=string, indices=int32, ordered=0>
+        ----
+        animals: [  -- dictionary:
+        ["Flamingo","Parrot","Dog"]  -- indices:
+        [0,1,2],  -- dictionary:
+        ["Horse","Brittle stars","Centipede"]  -- indices:
+        [0,1,2]]
+
+        Unify dictionaries across both chunks:
+
+        >>> table.unify_dictionaries()
+        pyarrow.Table
+        animals: dictionary<values=string, indices=int32, ordered=0>
+        ----
+        animals: [  -- dictionary:
+        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
+        [0,1,2],  -- dictionary:
+        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
+        [3,4,5]]
+        """
+    def equals(self, other: Self, check_metadata: bool = False) -> Self:
+        """
+        Check if contents of two tables are equal.
+
+        Parameters
+        ----------
+        other : pyarrow.Table
+            Table to compare against.
+        check_metadata : bool, default False
+            Whether schema metadata equality should be checked as well.
+
+        Returns
+        -------
+        bool
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+        >>> animals = pa.array(
+        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
+        ... )
+        >>> names = ["n_legs", "animals"]
+        >>> table = pa.Table.from_arrays([n_legs, animals], names=names)
+        >>> table_0 = pa.Table.from_arrays([])
+        >>> table_1 = pa.Table.from_arrays(
+        ...     [n_legs, animals], names=names, metadata={"n_legs": "Number of legs per animal"}
+        ... )
+        >>> table.equals(table)
+        True
+        >>> table.equals(table_0)
+        False
+        >>> table.equals(table_1)
+        True
+        >>> table.equals(table_1, check_metadata=True)
+        False
+        """
+    def cast(
+        self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
+    ) -> Self:
+        """
+        Cast table values to another schema.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+
+        Define new schema and cast table values:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
+        ... )
+        >>> table.cast(target_schema=my_schema)
+        pyarrow.Table
+        n_legs: duration[s]
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    @classmethod
+    def from_pandas(
+        cls,
+        df: pd.DataFrame,
+        schema: Schema | None = None,
+        preserve_index: bool | None = None,
+        nthreads: int | None = None,
+        columns: list[str] | None = None,
+        safe: bool = True,
+    ) -> Self:
+        """
+        Convert pandas.DataFrame to an Arrow Table.
+
+        The column types in the resulting Arrow Table are inferred from the
+        dtypes of the pandas.Series in the DataFrame. In the case of non-object
+        Series, the NumPy dtype is translated to its Arrow equivalent. In the
+        case of `object`, we need to guess the datatype by looking at the
+        Python objects in this Series.
+
+        Be aware that Series of the `object` dtype don't carry enough
+        information to always lead to a meaningful Arrow type. In the case that
+        we cannot infer a type, e.g. because the DataFrame is of length 0 or
+        the Series only contains None/nan objects, the type is set to
+        null. This behavior can be avoided by constructing an explicit schema
+        and passing it to this function.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+        schema : pyarrow.Schema, optional
+            The expected schema of the Arrow Table. This can be used to
+            indicate the type of columns if we cannot infer it automatically.
+            If passed, the output will have exactly this schema. Columns
+            specified in the schema that are not found in the DataFrame columns
+            or its index will raise an error. Additional columns or index
+            levels in the DataFrame which are not specified in the schema will
+            be ignored.
+        preserve_index : bool, optional
+            Whether to store the index as an additional column in the resulting
+            ``Table``. The default of None will store the index as a column,
+            except for RangeIndex which is stored as metadata only. Use
+            ``preserve_index=True`` to force it to be stored as a column.
+        nthreads : int, default None
+            If greater than 1, convert columns to Arrow in parallel using
+            indicated number of threads. By default, this follows
+            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
+        columns : list, optional
+           List of column to be converted. If None, use all columns.
+        safe : bool, default True
+           Check for overflows or other unsafe conversions.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> pa.Table.from_pandas(df)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    @classmethod
+    def from_arrays(
+        cls,
+        arrays: Collection[ArrayOrChunkedArray[Any]],
+        names: list[str] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping[str | bytes, str | bytes] | None = None,
+    ) -> Self:
+        """
+        Construct a Table from Arrow arrays.
+
+        Parameters
+        ----------
+        arrays : list of pyarrow.Array or pyarrow.ChunkedArray
+            Equal-length arrays that should form the table.
+        names : list of str, optional
+            Names for the table columns. If not passed, schema must be passed.
+        schema : Schema, default None
+            Schema for the created table. If not passed, names must be passed.
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> names = ["n_legs", "animals"]
+
+        Construct a Table from arrays:
+
+        >>> pa.Table.from_arrays([n_legs, animals], names=names)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+        Construct a Table from arrays with metadata:
+
+        >>> my_metadata = {"n_legs": "Number of legs per animal"}
+        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        n_legs: 'Number of legs per animal'
+
+        Construct a Table from arrays with pyarrow schema:
+
+        >>> my_schema = pa.schema(
+        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+        ...     metadata={"animals": "Name of the animal species"},
+        ... )
+        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema)
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema).schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        animals: 'Name of the animal species'
+        """
+    @classmethod
+    def from_struct_array(
+        cls, struct_array: StructArray | ChunkedArray[StructScalar]
+    ) -> Self:
+        """
+        Construct a Table from a StructArray.
+
+        Each field in the StructArray will become a column in the resulting
+        ``Table``.
+
+        Parameters
+        ----------
+        struct_array : StructArray or ChunkedArray
+            Array to construct the table from.
+
+        Returns
+        -------
+        pyarrow.Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
+        >>> pa.Table.from_struct_array(struct).to_pandas()
+          animals  n_legs    year
+        0  Parrot       2     NaN
+        1    None       4  2022.0
+        """
+    def to_struct_array(
+        self, max_chunksize: int | None = None
+    ) -> ChunkedArray[StructScalar]:
+        """
+        Convert to a chunked array of struct type.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum number of rows for ChunkedArray chunks. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        ChunkedArray
+        """
+    @classmethod
+    def from_batches(cls, batches: Iterable[RecordBatch], schema: Schema | None = None) -> Self:
+        """
+        Construct a Table from a sequence or iterator of Arrow RecordBatches.
+
+        Parameters
+        ----------
+        batches : sequence or iterator of RecordBatch
+            Sequence of RecordBatch to be converted, all schemas must be equal.
+        schema : Schema, default None
+            If not passed, will be inferred from the first RecordBatch.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> n_legs = pa.array([2, 4, 5, 100])
+        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+        >>> names = ["n_legs", "animals"]
+        >>> batch = pa.record_batch([n_legs, animals], names=names)
+        >>> batch.to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+
+        Construct a Table from a RecordBatch:
+
+        >>> pa.Table.from_batches([batch])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+        Construct a Table from a sequence of RecordBatches:
+
+        >>> pa.Table.from_batches([batch, batch])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100],[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    def to_batches(self, max_chunksize: int | None = None) -> list[RecordBatch]:
+        """
+        Convert Table to a list of RecordBatch objects.
+
+        Note that this method is zero-copy, it merely exposes the same data
+        under a different API.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum number of rows for each RecordBatch chunk. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        list[RecordBatch]
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Convert a Table to a RecordBatch:
+
+        >>> table.to_batches()[0].to_pandas()
+           n_legs        animals
+        0       2       Flamingo
+        1       4          Horse
+        2       5  Brittle stars
+        3     100      Centipede
+
+        Convert a Table to a list of RecordBatches:
+
+        >>> table.to_batches(max_chunksize=2)[0].to_pandas()
+           n_legs   animals
+        0       2  Flamingo
+        1       4     Horse
+        >>> table.to_batches(max_chunksize=2)[1].to_pandas()
+           n_legs        animals
+        0       5  Brittle stars
+        1     100      Centipede
+        """
+    def to_reader(self, max_chunksize: int | None = None) -> RecordBatchReader:
+        """
+        Convert the Table to a RecordBatchReader.
+
+        Note that this method is zero-copy, it merely exposes the same data
+        under a different API.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum number of rows for each RecordBatch chunk. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        RecordBatchReader
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Convert a Table to a RecordBatchReader:
+
+        >>> table.to_reader()
+        <pyarrow.lib.RecordBatchReader object at ...>
+
+        >>> reader = table.to_reader()
+        >>> reader.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+        >>> reader.read_all()
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    @property
+    def schema(self) -> Schema:
+        """
+        Schema of the table and its columns.
+
+        Returns
+        -------
+        Schema
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, "' ...
+        """
+    @property
+    def num_columns(self) -> int:
+        """
+        Number of columns in this table.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.num_columns
+        2
+        """
+    @property
+    def num_rows(self) -> int:
+        """
+        Number of rows in this table.
+
+        Due to the definition of a table, all columns have the same number of
+        rows.
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.num_rows
+        4
+        """
+    @property
+    def nbytes(self) -> int:
+        """
+        Total number of bytes consumed by the elements of the table.
+
+        In other words, the sum of bytes from all buffer ranges referenced.
+
+        Unlike `get_total_buffer_size` this method will account for array
+        offsets.
+
+        If buffers are shared between arrays then the shared
+        portion will only be counted multiple times.
+
+        The dictionary of dictionary arrays will always be counted in their
+        entirety even if the array only references a portion of the dictionary.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.nbytes
+        72
+        """
+    def get_total_buffer_size(self) -> int:
+        """
+        The sum of bytes in each buffer referenced by the table.
+
+        An array may only reference a portion of a buffer.
+        This method will overestimate in this case and return the
+        byte size of the entire buffer.
+
+        If a buffer is referenced multiple times then it will
+        only be counted once.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.get_total_buffer_size()
+        76
+        """
+    def __sizeof__(self) -> int: ...
+    def add_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self:
+        """
+        Add column to Table at position.
+
+        A new table is returned with the column added, the original table
+        object is left unchanged.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array, list of Array, or values coercible to arrays
+            Column data.
+
+        Returns
+        -------
+        Table
+            New table with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Add column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.add_column(0, "year", [year])
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [[2021,2022,2019,2021]]
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+        Original table is left unchanged:
+
+        >>> table
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    def remove_column(self, i: int) -> Self:
+        """
+        Create new Table with the indicated column removed.
+
+        Parameters
+        ----------
+        i : int
+            Index of column to remove.
+
+        Returns
+        -------
+        Table
+            New table without the column.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.remove_column(1)
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        """
+    def set_column(
+        self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
+    ) -> Self:
+        """
+        Replace column in Table at position.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array, list of Array, or values coercible to arrays
+            Column data.
+
+        Returns
+        -------
+        Table
+            New table with the passed column set.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+
+        Replace a column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.set_column(1, "year", [year])
+        pyarrow.Table
+        n_legs: int64
+        year: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        year: [[2021,2022,2019,2021]]
+        """
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self:
+        """
+        Create new table with columns renamed to provided names.
+
+        Parameters
+        ----------
+        names : list[str] or dict[str, str]
+            List of new column names or mapping of old column names to new column names.
+
+            If a mapping of old to new column names is passed, then all columns which are
+            found to match a provided old column name will be renamed to the new column name.
+            If any column names are not found in the mapping, a KeyError will be raised.
+
+        Raises
+        ------
+        KeyError
+            If any of the column names passed in the names mapping do not exist.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "n_legs": [2, 4, 5, 100],
+        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> new_names = ["n", "name"]
+        >>> table.rename_columns(new_names)
+        pyarrow.Table
+        n: int64
+        name: string
+        ----
+        n: [[2,4,5,100]]
+        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        >>> new_names = {"n_legs": "n", "animals": "name"}
+        >>> table.rename_columns(new_names)
+        pyarrow.Table
+        n: int64
+        name: string
+        ----
+        n: [[2,4,5,100]]
+        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        """
+    def drop(self, columns: str | list[str]) -> Self:
+        """
+        Drop one or more columns and return a new table.
+
+        Alias of Table.drop_columns, but kept for backwards compatibility.
+
+        Parameters
+        ----------
+        columns : str or list[str]
+            Field name(s) referencing existing column(s).
+
+        Returns
+        -------
+        Table
+            New table without the column(s).
+        """
+    def group_by(self, keys: str | list[str], use_threads: bool = True) -> TableGroupBy:
+        """
+        Declare a grouping over the columns of the table.
+
+        Resulting grouping can then be used to perform aggregations
+        with a subsequent ``aggregate()`` method.
+
+        Parameters
+        ----------
+        keys : str or list[str]
+            Name of the columns that should be used as the grouping key.
+        use_threads : bool, default True
+            Whether to use multithreading or not. When set to True (the
+            default), no stable ordering of the output is guaranteed.
+
+        Returns
+        -------
+        TableGroupBy
+
+        See Also
+        --------
+        TableGroupBy.aggregate
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> table.group_by("year").aggregate([("n_legs", "sum")])
+        pyarrow.Table
+        year: int64
+        n_legs_sum: int64
+        ----
+        year: [[2020,2022,2021,2019]]
+        n_legs_sum: [[2,6,104,5]]
+        """
+    def join(
+        self,
+        right_table: Self,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> Self:
+        """
+        Perform a join between this table and another one.
+
+        Result of the join will be a new Table, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_table : Table
+            The table to join to the current one, acting as the right table
+            in the join operation.
+        keys : str or list[str]
+            The columns from current table that should be used as keys
+            of the join operation left side.
+        right_keys : str or list[str], default None
+            The columns from the right_table that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left table.
+        join_type : str, default "left outer"
+            The kind of join that should be performed, one of
+            ("left semi", "right semi", "left anti", "right anti",
+            "inner", "left outer", "right outer", "full outer")
+        left_suffix : str, default None
+            Which suffix to add to left column names. This prevents confusion
+            when the columns in left and right tables have colliding names.
+        right_suffix : str, default None
+            Which suffix to add to the right column names. This prevents confusion
+            when the columns in left and right tables have colliding names.
+        coalesce_keys : bool, default True
+            If the duplicated keys should be omitted from one of the sides
+            in the join result.
+        use_threads : bool, default True
+            Whether to use multithreading or not.
+
+        Returns
+        -------
+        Table
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df1 = pd.DataFrame({"id": [1, 2, 3], "year": [2020, 2022, 2019]})
+        >>> df2 = pd.DataFrame(
+        ...     {"id": [3, 4], "n_legs": [5, 100], "animal": ["Brittle stars", "Centipede"]}
+        ... )
+        >>> t1 = pa.Table.from_pandas(df1)
+        >>> t2 = pa.Table.from_pandas(df2)
+
+        Left outer join:
+
+        >>> t1.join(t2, "id").combine_chunks().sort_by("year")
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[3,1,2]]
+        year: [[2019,2020,2022]]
+        n_legs: [[5,null,null]]
+        animal: [["Brittle stars",null,null]]
+
+        Full outer join:
+
+        >>> t1.join(t2, "id", join_type="full outer").combine_chunks().sort_by("year")
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[3,1,2,4]]
+        year: [[2019,2020,2022,null]]
+        n_legs: [[5,null,null,100]]
+        animal: [["Brittle stars",null,null,"Centipede"]]
+
+        Right outer join:
+
+        >>> t1.join(t2, "id", join_type="right outer").combine_chunks().sort_by("year")
+        pyarrow.Table
+        year: int64
+        id: int64
+        n_legs: int64
+        animal: string
+        ----
+        year: [[2019,null]]
+        id: [[3,4]]
+        n_legs: [[5,100]]
+        animal: [["Brittle stars","Centipede"]]
+
+        Right anti join
+
+        >>> t1.join(t2, "id", join_type="right anti")
+        pyarrow.Table
+        id: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[4]]
+        n_legs: [[100]]
+        animal: [["Centipede"]]
+        """
+    def join_asof(
+        self,
+        right_table: Self,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> Self:
+        """
+        Perform an asof join between this table and another one.
+
+        This is similar to a left-join except that we match on nearest key rather
+        than equal keys. Both tables must be sorted by the key. This type of join
+        is most useful for time series data that are not perfectly aligned.
+
+        Optionally match on equivalent keys with "by" before searching with "on".
+
+        Result of the join will be a new Table, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_table : Table
+            The table to join to the current one, acting as the right table
+            in the join operation.
+        on : str
+            The column from current table that should be used as the "on" key
+            of the join operation left side.
+
+            An inexact match is used on the "on" key, i.e. a row is considered a
+            match if and only if left_on - tolerance <= right_on <= left_on.
+
+            The input dataset must be sorted by the "on" key. Must be a single
+            field of a common type.
+
+            Currently, the "on" key must be an integer, date, or timestamp type.
+        by : str or list[str]
+            The columns from current table that should be used as the keys
+            of the join operation left side. The join operation is then done
+            only for the matches in these columns.
+        tolerance : int
+            The tolerance for inexact "on" key matching. A right row is considered
+            a match with the left row ``right.on - left.on <= tolerance``. The
+            ``tolerance`` may be:
+
+            - negative, in which case a past-as-of-join occurs;
+            - or positive, in which case a future-as-of-join occurs;
+            - or zero, in which case an exact-as-of-join occurs.
+
+            The tolerance is interpreted in the same units as the "on" key.
+        right_on : str or list[str], default None
+            The columns from the right_table that should be used as the on key
+            on the join operation right side.
+            When ``None`` use the same key name as the left table.
+        right_by : str or list[str], default None
+            The columns from the right_table that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left table.
+
+        Returns
+        -------
+        Table
+
+        Example
+        --------
+        >>> import pyarrow as pa
+        >>> t1 = pa.table({"id": [1, 3, 2, 3, 3], "year": [2020, 2021, 2022, 2022, 2023]})
+        >>> t2 = pa.table(
+        ...     {
+        ...         "id": [3, 4],
+        ...         "year": [2020, 2021],
+        ...         "n_legs": [5, 100],
+        ...         "animal": ["Brittle stars", "Centipede"],
+        ...     }
+        ... )
+
+        >>> t1.join_asof(t2, on="year", by="id", tolerance=-2)
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[1,3,2,3,3]]
+        year: [[2020,2021,2022,2022,2023]]
+        n_legs: [[null,5,null,5,null]]
+        animal: [[null,"Brittle stars",null,"Brittle stars",null]]
+        """
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export the table as an Arrow C stream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+            Currently, this is not supported and will raise a
+            NotImplementedError if the schema doesn't match the current schema.
+
+        Returns
+        -------
+        PyCapsule
+        """
+    @property
+    def is_cpu(self) -> bool:
+        """
+        Whether all ChunkedArrays are CPU-accessible.
+        """
+
+def record_batch(
+    data: dict[str, list[Any] | Array[Any]]
+    | Collection[Array[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowDeviceArray,
+    names: list[str] | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+) -> RecordBatch:
+    """
+    Create a pyarrow.RecordBatch from another Python data structure or sequence
+    of arrays.
+
+    Parameters
+    ----------
+    data : dict, list, pandas.DataFrame, Arrow-compatible table
+        A mapping of strings to Arrays or Python lists, a list of Arrays,
+        a pandas DataFame, or any tabular object implementing the
+        Arrow PyCapsule Protocol (has an ``__arrow_c_array__`` or
+        ``__arrow_c_device_array__`` method).
+    names : list, default None
+        Column names if list of arrays passed as data. Mutually exclusive with
+        'schema' argument.
+    schema : Schema, default None
+        The expected schema of the RecordBatch. If not passed, will be inferred
+        from the data. Mutually exclusive with 'names' argument.
+    metadata : dict or Mapping, default None
+        Optional metadata for the schema (if schema not passed).
+
+    Returns
+    -------
+    RecordBatch
+
+    See Also
+    --------
+    RecordBatch.from_arrays, RecordBatch.from_pandas, table
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Construct a RecordBatch from a python dictionary:
+
+    >>> pa.record_batch({"n_legs": n_legs, "animals": animals})
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+    >>> pa.record_batch({"n_legs": n_legs, "animals": animals}).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    Creating a RecordBatch from a list of arrays with names:
+
+    >>> pa.record_batch([n_legs, animals], names=names)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+
+    Creating a RecordBatch from a list of arrays with names and metadata:
+
+    >>> my_metadata = {"n_legs": "How many legs does an animal have?"}
+    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'How many legs does an animal have?'
+
+    Creating a RecordBatch from a pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022],
+    ...         "month": [3, 5, 7, 9],
+    ...         "day": [1, 5, 9, 13],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.record_batch(df)
+    pyarrow.RecordBatch
+    year: int64
+    month: int64
+    day: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [2020,2022,2021,2022]
+    month: [3,5,7,9]
+    day: [1,5,9,13]
+    n_legs: [2,4,5,100]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+    >>> pa.record_batch(df).to_pandas()
+       year  month  day  n_legs        animals
+    0  2020      3    1       2       Flamingo
+    1  2022      5    5       4          Horse
+    2  2021      7    9       5  Brittle stars
+    3  2022      9   13     100      Centipede
+
+    Creating a RecordBatch from a pandas DataFrame with schema:
+
+    >>> my_schema = pa.schema(
+    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"},
+    ... )
+    >>> pa.record_batch(df, my_schema).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+    pandas: ...
+    >>> pa.record_batch(df, my_schema).to_pandas()
+       n_legs        animals
+    0       2       Flamingo
+    1       4          Horse
+    2       5  Brittle stars
+    3     100      Centipede
+    """
+
+def table(
+    data: dict[str, list[Any] | Array[Any]]
+    | Collection[ArrayOrChunkedArray[Any]]
+    | pd.DataFrame
+    | SupportArrowArray
+    | SupportArrowStream
+    | SupportArrowDeviceArray,
+    names: list[str] | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping[str | bytes, str | bytes] | None = None,
+    nthreads: int | None = None,
+) -> Table:
+    """
+    Create a pyarrow.Table from a Python data structure or sequence of arrays.
+
+    Parameters
+    ----------
+    data : dict, list, pandas.DataFrame, Arrow-compatible table
+        A mapping of strings to Arrays or Python lists, a list of arrays or
+        chunked arrays, a pandas DataFame, or any tabular object implementing
+        the Arrow PyCapsule Protocol (has an ``__arrow_c_array__``,
+        ``__arrow_c_device_array__`` or ``__arrow_c_stream__`` method).
+    names : list, default None
+        Column names if list of arrays passed as data. Mutually exclusive with
+        'schema' argument.
+    schema : Schema, default None
+        The expected schema of the Arrow Table. If not passed, will be inferred
+        from the data. Mutually exclusive with 'names' argument.
+        If passed, the output will have exactly this schema (raising an error
+        when columns are not found in the data and ignoring additional data not
+        specified in the schema, when data is a dict or DataFrame).
+    metadata : dict or Mapping, default None
+        Optional metadata for the schema (if schema not passed).
+    nthreads : int, default None
+        For pandas.DataFrame inputs: if greater than 1, convert columns to
+        Arrow in parallel using indicated number of threads. By default,
+        this follows :func:`pyarrow.cpu_count` (may use up to system CPU count
+        threads).
+
+    Returns
+    -------
+    Table
+
+    See Also
+    --------
+    Table.from_arrays, Table.from_pandas, Table.from_pydict
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> n_legs = pa.array([2, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+    >>> names = ["n_legs", "animals"]
+
+    Construct a Table from a python dictionary:
+
+    >>> pa.table({"n_legs": n_legs, "animals": animals})
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from arrays:
+
+    >>> pa.table([n_legs, animals], names=names)
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from arrays with metadata:
+
+    >>> my_metadata = {"n_legs": "Number of legs per animal"}
+    >>> pa.table([n_legs, animals], names=names, metadata=my_metadata).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+
+    Construct a Table from pandas DataFrame:
+
+    >>> import pandas as pd
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "year": [2020, 2022, 2019, 2021],
+    ...         "n_legs": [2, 4, 5, 100],
+    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> pa.table(df)
+    pyarrow.Table
+    year: int64
+    n_legs: int64
+    animals: string
+    ----
+    year: [[2020,2022,2019,2021]]
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
+    Construct a Table from pandas DataFrame with pyarrow schema:
+
+    >>> my_schema = pa.schema(
+    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
+    ...     metadata={"n_legs": "Number of legs per animal"},
+    ... )
+    >>> pa.table(df, my_schema).schema
+    n_legs: int64
+    animals: string
+    -- schema metadata --
+    n_legs: 'Number of legs per animal'
+    pandas: '{"index_columns": [], "column_indexes": [{"name": null, ...
+
+    Construct a Table from chunked arrays:
+
+    >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
+    >>> animals = pa.chunked_array(
+    ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
+    ... )
+    >>> table = pa.table([n_legs, animals], names=names)
+    >>> table
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,2,4],[4,5,100]]
+    animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
+    """
+
+def concat_tables(
+    tables: Iterable[Table],
+    memory_pool: MemoryPool | None = None,
+    promote_options: Literal["none", "default", "permissive"] = "none",
+    **kwargs: Any,
+) -> Table:
+    """
+    Concatenate pyarrow.Table objects.
+
+    If promote_options="none", a zero-copy concatenation will be performed. The schemas
+    of all the Tables must be the same (except the metadata), otherwise an
+    exception will be raised. The result Table will share the metadata with the
+    first table.
+
+    If promote_options="default", any null type arrays will be casted to the type of other
+    arrays in the column of the same name. If a table is missing a particular
+    field, null values of the appropriate type will be generated to take the
+    place of the missing field. The new schema will share the metadata with the
+    first table. Each field in the new schema will share the metadata with the
+    first table which has the field defined. Note that type promotions may
+    involve additional allocations on the given ``memory_pool``.
+
+    If promote_options="permissive", the behavior of default plus types will be promoted
+    to the common denominator that fits all the fields.
+
+    Parameters
+    ----------
+    tables : iterable of pyarrow.Table objects
+        Pyarrow tables to concatenate into a single Table.
+    memory_pool : MemoryPool, default None
+        For memory allocations, if required, otherwise use default pool.
+    promote_options : str, default none
+        Accepts strings "none", "default" and "permissive".
+    **kwargs : dict, optional
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> t1 = pa.table(
+    ...     [
+    ...         pa.array([2, 4, 5, 100]),
+    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
+    ...     ],
+    ...     names=["n_legs", "animals"],
+    ... )
+    >>> t2 = pa.table([pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"])
+    >>> pa.concat_tables([t1, t2])
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100],[2,4]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Parrot","Dog"]]
+
+    """
+
+class TableGroupBy:
+    """
+    A grouping of columns in a table on which to perform aggregations.
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+        Input table to execute the aggregation on.
+    keys : str or list[str]
+        Name of the grouped columns.
+    use_threads : bool, default True
+        Whether to use multithreading or not. When set to True (the default),
+        no stable ordering of the output is guaranteed.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> t = pa.table(
+    ...     [
+    ...         pa.array(["a", "a", "b", "b", "c"]),
+    ...         pa.array([1, 2, 3, 4, 5]),
+    ...     ],
+    ...     names=["keys", "values"],
+    ... )
+
+    Grouping of columns:
+
+    >>> pa.TableGroupBy(t, "keys")
+    <pyarrow.lib.TableGroupBy object at ...>
+
+    Perform aggregations:
+
+    >>> pa.TableGroupBy(t, "keys").aggregate([("values", "sum")])
+    pyarrow.Table
+    keys: string
+    values_sum: int64
+    ----
+    keys: [["a","b","c"]]
+    values_sum: [[3,7,5]]
+    """
+
+    keys: str | list[str]
+    def __init__(self, table: Table, keys: str | list[str], use_threads: bool = True): ...
+    def aggregate(
+        self,
+        aggregations: Iterable[
+            tuple[ColumnSelector, Aggregation]
+            | tuple[ColumnSelector, Aggregation, AggregateOptions | None]
+        ],
+    ) -> Table:
+        """
+        Perform an aggregation over the grouped columns of the table.
+
+        Parameters
+        ----------
+        aggregations : list[tuple(str, str)] or \
+list[tuple(str, str, FunctionOptions)]
+            List of tuples, where each tuple is one aggregation specification
+            and consists of: aggregation column name followed
+            by function name and optionally aggregation function option.
+            Pass empty list to get a single row for each group.
+            The column name can be a string, an empty list or a list of
+            column names, for unary, nullary and n-ary aggregation functions
+            respectively.
+
+            For the list of function names and respective aggregation
+            function options see :ref:`py-grouped-aggrs`.
+
+        Returns
+        -------
+        Table
+            Results of the aggregation functions.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> t = pa.table([
+        ...       pa.array(["a", "a", "b", "b", "c"]),
+        ...       pa.array([1, 2, 3, 4, 5]),
+        ... ], names=["keys", "values"])
+
+        Sum the column "values" over the grouped column "keys":
+
+        >>> t.group_by("keys").aggregate([("values", "sum")])
+        pyarrow.Table
+        keys: string
+        values_sum: int64
+        ----
+        keys: [["a","b","c"]]
+        values_sum: [[3,7,5]]
+
+        Count the rows over the grouped column "keys":
+
+        >>> t.group_by("keys").aggregate([([], "count_all")])
+        pyarrow.Table
+        keys: string
+        count_all: int64
+        ----
+        keys: [["a","b","c"]]
+        count_all: [[2,2,1]]
+
+        Do multiple aggregations:
+
+        >>> t.group_by("keys").aggregate([
+        ...    ("values", "sum"),
+        ...    ("keys", "count")
+        ... ])
+        pyarrow.Table
+        keys: string
+        values_sum: int64
+        keys_count: int64
+        ----
+        keys: [["a","b","c"]]
+        values_sum: [[3,7,5]]
+        keys_count: [[2,2,1]]
+
+        Count the number of non-null values for column "values"
+        over the grouped column "keys":
+
+        >>> import pyarrow.compute as pc
+        >>> t.group_by(["keys"]).aggregate([
+        ...    ("values", "count", pc.CountOptions(mode="only_valid"))
+        ... ])
+        pyarrow.Table
+        keys: string
+        values_count: int64
+        ----
+        keys: [["a","b","c"]]
+        values_count: [[2,2,1]]
+
+        Get a single row for each group in column "keys":
+
+        >>> t.group_by("keys").aggregate([])
+        pyarrow.Table
+        keys: string
+        ----
+        keys: [["a","b","c"]]
+        """
+    def _table(self) -> Table: ...
+    @property
+    def _use_threads(self) -> bool: ...
+
+def concat_batches(
+    recordbatches: Iterable[RecordBatch], memory_pool: MemoryPool | None = None
+) -> RecordBatch:
+    """
+    Concatenate pyarrow.RecordBatch objects.
+
+    All recordbatches must share the same Schema,
+    the operation implies a copy of the data to merge
+    the arrays of the different RecordBatches.
+
+    Parameters
+    ----------
+    recordbatches : iterable of pyarrow.RecordBatch objects
+        Pyarrow record batches to concatenate into a single RecordBatch.
+    memory_pool : MemoryPool, default None
+        For memory allocations, if required, otherwise use default pool.
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> t1 = pa.record_batch(
+    ...     [
+    ...         pa.array([2, 4, 5, 100]),
+    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
+    ...     ],
+    ...     names=["n_legs", "animals"],
+    ... )
+    >>> t2 = pa.record_batch(
+    ...     [pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"]
+    ... )
+    >>> pa.concat_batches([t1, t2])
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,4,5,100,2,4]
+    animals: ["Flamingo","Horse","Brittle stars","Centipede","Parrot","Dog"]
+
+    """
+
+__all__ = [
+    "ChunkedArray",
+    "chunked_array",
+    "_Tabular",
+    "RecordBatch",
+    "table_to_blocks",
+    "Table",
+    "record_batch",
+    "table",
+    "concat_tables",
+    "TableGroupBy",
+    "concat_batches",
+]
diff --git a/python/pyarrow-stubs/util.pyi b/python/pyarrow-stubs/util.pyi
new file mode 100644
index 00000000000..c2ecf7d6b61
--- /dev/null
+++ b/python/pyarrow-stubs/util.pyi
@@ -0,0 +1,27 @@
+from collections.abc import Callable
+from os import PathLike
+from typing import Any, Protocol, Sequence, TypeVar
+
+_F = TypeVar("_F", bound=Callable)
+_N = TypeVar("_N")
+
+class _DocStringComponents(Protocol):
+    _docstring_components: list[str]
+
+def doc(
+    *docstrings: str | _DocStringComponents | Callable | None, **params: Any
+) -> Callable[[_F], _F]: ...
+def _is_iterable(obj) -> bool: ...
+def _is_path_like(path) -> bool: ...
+def _stringify_path(path: str | PathLike) -> str: ...
+def product(seq: Sequence[_N]) -> _N: ...
+def get_contiguous_span(
+    shape: tuple[int, ...], strides: tuple[int, ...], itemsize: int
+) -> tuple[int, int]: ...
+def find_free_port() -> int: ...
+def guid() -> str: ...
+def _download_urllib(url, out_path) -> None: ...
+def _download_requests(url, out_path) -> None: ...
+def download_tzdata_on_windows() -> None: ...
+def _deprecate_api(old_name, new_name, api, next_version, type=...): ...
+def _deprecate_class(old_name, new_class, next_version, instancecheck=True): ...
diff --git a/python/pyarrow/ipc.py b/python/pyarrow/_ipc.py
similarity index 100%
rename from python/pyarrow/ipc.py
rename to python/pyarrow/_ipc.py

From a50d6f8bde3b3c783c349ff5ad74d890d82d883e Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 16 Sep 2025 02:57:46 +0200
Subject: [PATCH 14/26] Remaining stubs

---
 python/pyarrow-stubs/__init__.pyi             |    2 -
 python/pyarrow-stubs/_azurefs.pyi             |   91 +
 python/pyarrow-stubs/_benchmark.pyi           |    3 +
 python/pyarrow-stubs/_csv.pyi                 |  556 ++
 python/pyarrow-stubs/_cuda.pyi                |  557 ++
 python/pyarrow-stubs/_dataset.pyi             | 4582 ++++++++---------
 python/pyarrow-stubs/_dataset_orc.pyi         |    6 +
 python/pyarrow-stubs/_dataset_parquet.pyi     |  314 ++
 .../_dataset_parquet_encryption.pyi           |   85 +
 python/pyarrow-stubs/_feather.pyi             |   29 +
 python/pyarrow-stubs/_flight.pyi              | 1380 +++++
 python/pyarrow-stubs/_fs.pyi                  | 1001 ++++
 python/pyarrow-stubs/_gcsfs.pyi               |   83 +
 python/pyarrow-stubs/_hdfs.pyi                |   75 +
 python/pyarrow-stubs/_json.pyi                |  169 +
 python/pyarrow-stubs/_orc.pyi                 |   56 +
 python/pyarrow-stubs/_parquet.pyi             |  445 ++
 python/pyarrow-stubs/_parquet_encryption.pyi  |   67 +
 python/pyarrow-stubs/_s3fs.pyi                |   75 +
 python/pyarrow-stubs/_substrait.pyi           |   39 +
 python/pyarrow-stubs/acero.pyi                |   85 +
 python/pyarrow-stubs/builder.pyi              |   89 +
 python/pyarrow-stubs/cffi.pyi                 |    4 +
 python/pyarrow-stubs/compat.pyi               |   22 +
 python/pyarrow-stubs/csv.pyi                  |   27 +
 python/pyarrow-stubs/cuda.pyi                 |   25 +
 python/pyarrow-stubs/dataset.pyi              |  458 +-
 python/pyarrow-stubs/feather.pyi              |   50 +
 python/pyarrow-stubs/flight.pyi               |   95 +
 python/pyarrow-stubs/fs.pyi                   |   77 +
 python/pyarrow-stubs/gandiva.pyi              |   65 +
 python/pyarrow-stubs/json.pyi                 |    3 +
 python/pyarrow-stubs/lib.pyi                  |    9 +-
 python/pyarrow-stubs/orc.pyi                  |  279 +
 python/pyarrow-stubs/pandas_compat.pyi        |   54 +
 python/pyarrow-stubs/pandas_shim.pyi          |   51 +
 python/pyarrow-stubs/parquet/__init__.pyi     |    1 +
 python/pyarrow-stubs/parquet/core.pyi         | 2061 ++++++++
 python/pyarrow-stubs/parquet/encryption.pyi   |   15 +
 python/pyarrow-stubs/substrait.pyi            |   21 +
 40 files changed, 10570 insertions(+), 2536 deletions(-)
 create mode 100644 python/pyarrow-stubs/_azurefs.pyi
 create mode 100644 python/pyarrow-stubs/_benchmark.pyi
 create mode 100644 python/pyarrow-stubs/_csv.pyi
 create mode 100644 python/pyarrow-stubs/_cuda.pyi
 create mode 100644 python/pyarrow-stubs/_dataset_orc.pyi
 create mode 100644 python/pyarrow-stubs/_dataset_parquet.pyi
 create mode 100644 python/pyarrow-stubs/_dataset_parquet_encryption.pyi
 create mode 100644 python/pyarrow-stubs/_feather.pyi
 create mode 100644 python/pyarrow-stubs/_flight.pyi
 create mode 100644 python/pyarrow-stubs/_fs.pyi
 create mode 100644 python/pyarrow-stubs/_gcsfs.pyi
 create mode 100644 python/pyarrow-stubs/_hdfs.pyi
 create mode 100644 python/pyarrow-stubs/_json.pyi
 create mode 100644 python/pyarrow-stubs/_orc.pyi
 create mode 100644 python/pyarrow-stubs/_parquet.pyi
 create mode 100644 python/pyarrow-stubs/_parquet_encryption.pyi
 create mode 100644 python/pyarrow-stubs/_s3fs.pyi
 create mode 100644 python/pyarrow-stubs/_substrait.pyi
 create mode 100644 python/pyarrow-stubs/acero.pyi
 create mode 100644 python/pyarrow-stubs/builder.pyi
 create mode 100644 python/pyarrow-stubs/cffi.pyi
 create mode 100644 python/pyarrow-stubs/compat.pyi
 create mode 100644 python/pyarrow-stubs/csv.pyi
 create mode 100644 python/pyarrow-stubs/cuda.pyi
 create mode 100644 python/pyarrow-stubs/feather.pyi
 create mode 100644 python/pyarrow-stubs/flight.pyi
 create mode 100644 python/pyarrow-stubs/fs.pyi
 create mode 100644 python/pyarrow-stubs/gandiva.pyi
 create mode 100644 python/pyarrow-stubs/json.pyi
 create mode 100644 python/pyarrow-stubs/orc.pyi
 create mode 100644 python/pyarrow-stubs/pandas_compat.pyi
 create mode 100644 python/pyarrow-stubs/pandas_shim.pyi
 create mode 100644 python/pyarrow-stubs/parquet/__init__.pyi
 create mode 100644 python/pyarrow-stubs/parquet/core.pyi
 create mode 100644 python/pyarrow-stubs/parquet/encryption.pyi
 create mode 100644 python/pyarrow-stubs/substrait.pyi

diff --git a/python/pyarrow-stubs/__init__.pyi b/python/pyarrow-stubs/__init__.pyi
index 3f5e3073fd8..d74b486fd55 100644
--- a/python/pyarrow-stubs/__init__.pyi
+++ b/python/pyarrow-stubs/__init__.pyi
@@ -336,8 +336,6 @@ from pyarrow.lib import (
 )
 
 from .ipc import serialize_pandas, deserialize_pandas
-# TODO?
-# import _ipc as ipc
 
 import types as types
 
diff --git a/python/pyarrow-stubs/_azurefs.pyi b/python/pyarrow-stubs/_azurefs.pyi
new file mode 100644
index 00000000000..b9a83f01c56
--- /dev/null
+++ b/python/pyarrow-stubs/_azurefs.pyi
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Literal
+
+from ._fs import FileSystem
+
+class AzureFileSystem(FileSystem):
+    """
+    Azure Blob Storage backed FileSystem implementation
+
+    This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a.
+    Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific
+    features will be used when they provide a performance advantage. Azurite emulator is
+    also supported. Note: `/` is the only supported delimiter.
+
+    The storage account is considered the root of the filesystem. When enabled, containers
+    will be created or deleted during relevant directory operations. Obviously, this also
+    requires authentication with the additional permissions.
+
+    By default `DefaultAzureCredential <https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
+    is used for authentication. This means it will try several types of authentication
+    and go with the first one that works. If any authentication parameters are provided when
+    initialising the FileSystem, they will be used instead of the default credential.
+
+    Parameters
+    ----------
+    account_name : str
+        Azure Blob Storage account name. This is the globally unique identifier for the
+        storage account.
+    account_key : str, default None
+        Account key of the storage account. If sas_token and account_key are None the
+        default credential will be used. The parameters account_key and sas_token are
+        mutually exclusive.
+    blob_storage_authority : str, default None
+        hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful
+        for connecting to a local emulator, like Azurite.
+    dfs_storage_authority : str, default None
+        hostname[:port] of the Data Lake Gen 2 Service. Defaults to
+        `.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite.
+    blob_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
+        emulator, like Azurite.
+    dfs_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
+        emulator, like Azurite.
+    sas_token : str, default None
+        SAS token for the storage account, used as an alternative to account_key. If sas_token
+        and account_key are None the default credential will be used. The parameters
+        account_key and sas_token are mutually exclusive.
+
+    Examples
+    --------
+    >>> from pyarrow import fs
+    >>> azure_fs = fs.AzureFileSystem(account_name="myaccount")
+    >>> azurite_fs = fs.AzureFileSystem(
+    ...     account_name="devstoreaccount1",
+    ...     account_key="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
+    ...     blob_storage_authority="127.0.0.1:10000",
+    ...     dfs_storage_authority="127.0.0.1:10000",
+    ...     blob_storage_scheme="http",
+    ...     dfs_storage_scheme="http",
+    ... )
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+
+    def __init__(
+        self,
+        account_name: str,
+        account_key: str | None = None,
+        blob_storage_authority: str | None = None,
+        dfs_storage_authority: str | None = None,
+        blob_storage_schema: Literal["http", "https"] = "https",
+        dfs_storage_schema: Literal["http", "https"] = "https",
+        sas_token: str | None = None,
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/_benchmark.pyi b/python/pyarrow-stubs/_benchmark.pyi
new file mode 100644
index 00000000000..048973301dc
--- /dev/null
+++ b/python/pyarrow-stubs/_benchmark.pyi
@@ -0,0 +1,3 @@
+from pyarrow.lib import benchmark_PandasObjectIsNull
+
+__all__ = ["benchmark_PandasObjectIsNull"]
diff --git a/python/pyarrow-stubs/_csv.pyi b/python/pyarrow-stubs/_csv.pyi
new file mode 100644
index 00000000000..ad52b2f380f
--- /dev/null
+++ b/python/pyarrow-stubs/_csv.pyi
@@ -0,0 +1,556 @@
+from typing import Any
+
+import cuda  # type: ignore[import-not-found]
+
+from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-not-found]
+
+from . import lib
+from ._stubs_typing import ArrayLike
+
+class Context(lib._Weakrefable):
+    """
+    CUDA driver context.
+    """
+
+    def __init__(self, device_number: int = 0, handle: int | None = None) -> None:
+        """
+        Create a CUDA driver context for a particular device.
+
+        If a CUDA context handle is passed, it is wrapped, otherwise
+        a default CUDA context for the given device is requested.
+
+        Parameters
+        ----------
+        device_number : int (default 0)
+          Specify the GPU device for which the CUDA driver context is
+          requested.
+        handle : int, optional
+          Specify CUDA handle for a shared context that has been created
+          by another library.
+        """
+    @staticmethod
+    def from_numba(context: _numba_driver.Context | None = None) -> Context:
+        """
+        Create a Context instance from a Numba CUDA context.
+
+        Parameters
+        ----------
+        context : {numba.cuda.cudadrv.driver.Context, None}
+          A Numba CUDA context instance.
+          If None, the current Numba context is used.
+
+        Returns
+        -------
+        shared_context : pyarrow.cuda.Context
+          Context instance.
+        """
+    def to_numba(self) -> _numba_driver.Context:
+        """
+        Convert Context to a Numba CUDA context.
+
+        Returns
+        -------
+        context : numba.cuda.cudadrv.driver.Context
+          Numba CUDA context instance.
+        """
+    @staticmethod
+    def get_num_devices() -> int:
+        """Return the number of GPU devices."""
+    @property
+    def device_number(self) -> int:
+        """Return context device number."""
+    @property
+    def handle(self) -> int:
+        """Return pointer to context handle."""
+    def synchronize(self) -> None:
+        """Blocks until the device has completed all preceding requested
+        tasks.
+        """
+    @property
+    def bytes_allocated(self) -> int:
+        """Return the number of allocated bytes."""
+    def get_device_address(self, address: int) -> int:
+        """Return the device address that is reachable from kernels running in
+        the context
+
+        Parameters
+        ----------
+        address : int
+          Specify memory address value
+
+        Returns
+        -------
+        device_address : int
+          Device address accessible from device context
+
+        Notes
+        -----
+        The device address is defined as a memory address accessible
+        by device. While it is often a device memory address but it
+        can be also a host memory address, for instance, when the
+        memory is allocated as host memory (using cudaMallocHost or
+        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
+        or the host memory is page-locked (using cudaHostRegister).
+        """
+    def new_buffer(self, nbytes: int) -> CudaBuffer:
+        """Return new device buffer.
+
+        Parameters
+        ----------
+        nbytes : int
+          Specify the number of bytes to be allocated.
+
+        Returns
+        -------
+        buf : CudaBuffer
+          Allocated buffer.
+        """
+    @property
+    def memory_manager(self) -> lib.MemoryManager:
+        """
+        The default memory manager tied to this context's device.
+
+        Returns
+        -------
+        MemoryManager
+        """
+    @property
+    def device(self) -> lib.Device:
+        """
+        The device instance associated with this context.
+
+        Returns
+        -------
+        Device
+        """
+    def foreign_buffer(self, address: int, size: int, base: Any | None = None) -> CudaBuffer:
+        """
+        Create device buffer from address and size as a view.
+
+        The caller is responsible for allocating and freeing the
+        memory. When `address==size==0` then a new zero-sized buffer
+        is returned.
+
+        Parameters
+        ----------
+        address : int
+          Specify the starting address of the buffer. The address can
+          refer to both device or host memory but it must be
+          accessible from device after mapping it with
+          `get_device_address` method.
+        size : int
+          Specify the size of device buffer in bytes.
+        base : {None, object}
+          Specify object that owns the referenced memory.
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device reachable memory.
+
+        """
+    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer:
+        """Open existing CUDA IPC memory handle
+
+        Parameters
+        ----------
+        ipc_handle : IpcMemHandle
+          Specify opaque pointer to CUipcMemHandle (driver API).
+
+        Returns
+        -------
+        buf : CudaBuffer
+          referencing device buffer
+        """
+    def buffer_from_data(
+        self,
+        data: CudaBuffer | HostBuffer | lib.Buffer | ArrayLike,
+        offset: int = 0,
+        size: int = -1,
+    ) -> CudaBuffer:
+        """Create device buffer and initialize with data.
+
+        Parameters
+        ----------
+        data : {CudaBuffer, HostBuffer, Buffer, array-like}
+          Specify data to be copied to device buffer.
+        offset : int
+          Specify the offset of input buffer for device data
+          buffering. Default: 0.
+        size : int
+          Specify the size of device buffer in bytes. Default: all
+          (starting from input offset)
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer with copied data.
+        """
+    def buffer_from_object(self, obj: Any) -> CudaBuffer:
+        """Create device buffer view of arbitrary object that references
+        device accessible memory.
+
+        When the object contains a non-contiguous view of device
+        accessible memory then the returned device buffer will contain
+        contiguous view of the memory, that is, including the
+        intermediate data that is otherwise invisible to the input
+        object.
+
+        Parameters
+        ----------
+        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
+          Specify an object that holds (device or host) address that
+          can be accessed from device. This includes objects with
+          types defined in pyarrow.cuda as well as arbitrary objects
+          that implement the CUDA array interface as defined by numba.
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device accessible memory.
+
+        """
+
+class IpcMemHandle(lib._Weakrefable):
+    """A serializable container for a CUDA IPC handle."""
+    @staticmethod
+    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle:
+        """Create IpcMemHandle from opaque buffer (e.g. from another
+        process)
+
+        Parameters
+        ----------
+        opaque_handle :
+          a CUipcMemHandle as a const void*
+
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+        """
+    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer:
+        """Write IpcMemHandle to a Buffer
+
+        Parameters
+        ----------
+        pool : {MemoryPool, None}
+          Specify a pool to allocate memory from
+
+        Returns
+        -------
+        buf : Buffer
+          The serialized buffer.
+        """
+
+class CudaBuffer(lib.Buffer):
+    """An Arrow buffer with data located in a GPU device.
+
+    To create a CudaBuffer instance, use Context.device_buffer().
+
+    The memory allocated in a CudaBuffer is freed when the buffer object
+    is deleted.
+    """
+
+    @staticmethod
+    def from_buffer(buf: lib.Buffer) -> CudaBuffer:
+        """Convert back generic buffer into CudaBuffer
+
+        Parameters
+        ----------
+        buf : Buffer
+          Specify buffer containing CudaBuffer
+
+        Returns
+        -------
+        dbuf : CudaBuffer
+          Resulting device buffer.
+        """
+    @staticmethod
+    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer:
+        """Create a CudaBuffer view from numba MemoryPointer instance.
+
+        Parameters
+        ----------
+        mem :  numba.cuda.cudadrv.driver.MemoryPointer
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of numba MemoryPointer.
+        """
+    def to_numba(self) -> _numba_driver.MemoryPointer:
+        """Return numba memory pointer of CudaBuffer instance."""
+    def copy_to_host(
+        self,
+        position: int = 0,
+        nbytes: int = -1,
+        buf: lib.Buffer | None = None,
+        memory_pool: lib.MemoryPool | None = None,
+        resizable: bool = False,
+    ) -> lib.Buffer:
+        """Copy memory from GPU device to CPU host
+
+        Caller is responsible for ensuring that all tasks affecting
+        the memory are finished. Use
+
+          `<CudaBuffer instance>.context.synchronize()`
+
+        when needed.
+
+        Parameters
+        ----------
+        position : int
+          Specify the starting position of the source data in GPU
+          device buffer. Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          the position until host buffer is full).
+        buf : Buffer
+          Specify a pre-allocated output buffer in host. Default: None
+          (allocate new output buffer).
+        memory_pool : MemoryPool
+        resizable : bool
+          Specify extra arguments to allocate_buffer. Used only when
+          buf is None.
+
+        Returns
+        -------
+        buf : Buffer
+          Output buffer in host.
+
+        """
+    def copy_from_host(
+        self, data: lib.Buffer | ArrayLike, position: int = 0, nbytes: int = -1
+    ) -> int:
+        """Copy data from host to device.
+
+        The device buffer must be pre-allocated.
+
+        Parameters
+        ----------
+        data : {Buffer, array-like}
+          Specify data in host. It can be array-like that is valid
+          argument to py_buffer
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+        """
+    def copy_from_device(self, buf: CudaBuffer, position: int = 0, nbytes: int = -1) -> int:
+        """Copy data from device to device.
+
+        Parameters
+        ----------
+        buf : CudaBuffer
+          Specify source device buffer.
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+
+        """
+    def export_for_ipc(self) -> IpcMemHandle:
+        """
+        Expose this device buffer as IPC memory which can be used in other
+        processes.
+
+        After calling this function, this device memory will not be
+        freed when the CudaBuffer is destructed.
+
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+          The exported IPC handle
+
+        """
+    @property
+    def context(self) -> Context:
+        """Returns the CUDA driver context of this buffer."""
+    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer:
+        """Return slice of device buffer
+
+        Parameters
+        ----------
+        offset : int, default 0
+          Specify offset from the start of device buffer to slice
+        length : int, default None
+          Specify the length of slice (default is until end of device
+          buffer starting from offset). If the length is larger than
+          the data available, the returned slice will have a size of
+          the available data starting from the offset.
+
+        Returns
+        -------
+        sliced : CudaBuffer
+          Zero-copy slice of device buffer.
+
+        """
+    def to_pybytes(self) -> bytes:
+        """Return device buffer content as Python bytes."""
+
+class HostBuffer(lib.Buffer):
+    """Device-accessible CPU memory created using cudaHostAlloc.
+
+    To create a HostBuffer instance, use
+
+      cuda.new_host_buffer(<nbytes>)
+    """
+    @property
+    def size(self) -> int: ...
+
+class BufferReader(lib.NativeFile):
+    """File interface for zero-copy read from CUDA buffers.
+
+    Note: Read methods return pointers to device memory. This means
+    you must be careful using this interface with any Arrow code which
+    may expect to be able to do anything other than pointer arithmetic
+    on the returned buffers.
+    """
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer:
+        """Return a slice view of the underlying device buffer.
+
+        The slice will start at the current reader position and will
+        have specified size in bytes.
+
+        Parameters
+        ----------
+        nbytes : int, default None
+          Specify the number of bytes to read. Default: None (read all
+          remaining bytes).
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          New device buffer.
+
+        """
+
+class BufferWriter(lib.NativeFile):
+    """File interface for writing to CUDA buffers.
+
+    By default writes are unbuffered. Use set_buffer_size to enable
+    buffering.
+    """
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def writeat(self, position: int, data: ArrayLike) -> None:
+        """Write data to buffer starting from position.
+
+        Parameters
+        ----------
+        position : int
+          Specify device buffer position where the data will be
+          written.
+        data : array-like
+          Specify data, the data instance must implement buffer
+          protocol.
+        """
+    @property
+    def buffer_size(self) -> int:
+        """Returns size of host (CPU) buffer, 0 for unbuffered"""
+    @buffer_size.setter
+    def buffer_size(self, buffer_size: int):
+        """Set CPU buffer size to limit calls to cudaMemcpy
+
+        Parameters
+        ----------
+        buffer_size : int
+          Specify the size of CPU buffer to allocate in bytes.
+        """
+    @property
+    def num_bytes_buffered(self) -> int:
+        """Returns number of bytes buffered on host"""
+
+def new_host_buffer(size: int, device: int = 0) -> HostBuffer:
+    """Return buffer with CUDA-accessible memory on CPU host
+
+    Parameters
+    ----------
+    size : int
+      Specify the number of bytes to be allocated.
+    device : int
+      Specify GPU device number.
+
+    Returns
+    -------
+    dbuf : HostBuffer
+      Allocated host buffer
+    """
+
+def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer:
+    """Write record batch message to GPU device memory
+
+    Parameters
+    ----------
+    batch : RecordBatch
+      Record batch to write
+    ctx : Context
+      CUDA Context to allocate device memory from
+
+    Returns
+    -------
+    dbuf : CudaBuffer
+      device buffer which contains the record batch message
+    """
+
+def read_message(
+    source: CudaBuffer | cuda.BufferReader, pool: lib.MemoryManager | None = None
+) -> lib.Message:
+    """Read Arrow IPC message located on GPU device
+
+    Parameters
+    ----------
+    source : {CudaBuffer, cuda.BufferReader}
+      Device buffer or reader of device buffer.
+    pool : MemoryPool (optional)
+      Pool to allocate CPU memory for the metadata
+
+    Returns
+    -------
+    message : Message
+      The deserialized message, body still on device
+    """
+
+def read_record_batch(
+    buffer: lib.Buffer,
+    object: lib.Schema,
+    *,
+    dictionary_memo: lib.DictionaryMemo | None = None,
+    pool: lib.MemoryPool | None = None,
+) -> lib.RecordBatch:
+    """Construct RecordBatch referencing IPC message located on CUDA device.
+
+    While the metadata is copied to host memory for deserialization,
+    the record batch data remains on the device.
+
+    Parameters
+    ----------
+    buffer :
+      Device buffer containing the complete IPC message
+    schema : Schema
+      The schema for the record batch
+    dictionary_memo : DictionaryMemo, optional
+        If message contains dictionaries, must pass a populated
+        DictionaryMemo
+    pool : MemoryPool (optional)
+      Pool to allocate metadata from
+
+    Returns
+    -------
+    batch : RecordBatch
+      Reconstructed record batch, with device pointers
+
+    """
diff --git a/python/pyarrow-stubs/_cuda.pyi b/python/pyarrow-stubs/_cuda.pyi
new file mode 100644
index 00000000000..94f1b33e2e0
--- /dev/null
+++ b/python/pyarrow-stubs/_cuda.pyi
@@ -0,0 +1,557 @@
+from typing import Any
+
+import cuda  # type: ignore[import-not-found]
+
+from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-not-found]
+
+# from . import lib
+from .lib import _Weakrefable, Buffer, MemoryPool, NativeFile, RecordBatch, Schema, DictionaryMemo, Message, MemoryManager, Device
+from ._stubs_typing import ArrayLike
+
+class Context(_Weakrefable):
+    """
+    CUDA driver context.
+    """
+
+    def __init__(self, device_number: int = 0, handle: int | None = None) -> None:
+        """
+        Create a CUDA driver context for a particular device.
+
+        If a CUDA context handle is passed, it is wrapped, otherwise
+        a default CUDA context for the given device is requested.
+
+        Parameters
+        ----------
+        device_number : int (default 0)
+          Specify the GPU device for which the CUDA driver context is
+          requested.
+        handle : int, optional
+          Specify CUDA handle for a shared context that has been created
+          by another library.
+        """
+    @staticmethod
+    def from_numba(context: _numba_driver.Context | None = None) -> Context:
+        """
+        Create a Context instance from a Numba CUDA context.
+
+        Parameters
+        ----------
+        context : {numba.cuda.cudadrv.driver.Context, None}
+          A Numba CUDA context instance.
+          If None, the current Numba context is used.
+
+        Returns
+        -------
+        shared_context : pyarrow.cuda.Context
+          Context instance.
+        """
+    def to_numba(self) -> _numba_driver.Context:
+        """
+        Convert Context to a Numba CUDA context.
+
+        Returns
+        -------
+        context : numba.cuda.cudadrv.driver.Context
+          Numba CUDA context instance.
+        """
+    @staticmethod
+    def get_num_devices() -> int:
+        """Return the number of GPU devices."""
+    @property
+    def device_number(self) -> int:
+        """Return context device number."""
+    @property
+    def handle(self) -> int:
+        """Return pointer to context handle."""
+    def synchronize(self) -> None:
+        """Blocks until the device has completed all preceding requested
+        tasks.
+        """
+    @property
+    def bytes_allocated(self) -> int:
+        """Return the number of allocated bytes."""
+    def get_device_address(self, address: int) -> int:
+        """Return the device address that is reachable from kernels running in
+        the context
+
+        Parameters
+        ----------
+        address : int
+          Specify memory address value
+
+        Returns
+        -------
+        device_address : int
+          Device address accessible from device context
+
+        Notes
+        -----
+        The device address is defined as a memory address accessible
+        by device. While it is often a device memory address but it
+        can be also a host memory address, for instance, when the
+        memory is allocated as host memory (using cudaMallocHost or
+        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
+        or the host memory is page-locked (using cudaHostRegister).
+        """
+    def new_buffer(self, nbytes: int) -> CudaBuffer:
+        """Return new device buffer.
+
+        Parameters
+        ----------
+        nbytes : int
+          Specify the number of bytes to be allocated.
+
+        Returns
+        -------
+        buf : CudaBuffer
+          Allocated buffer.
+        """
+    @property
+    def memory_manager(self) -> MemoryManager:
+        """
+        The default memory manager tied to this context's device.
+
+        Returns
+        -------
+        MemoryManager
+        """
+    @property
+    def device(self) -> Device:
+        """
+        The device instance associated with this context.
+
+        Returns
+        -------
+        Device
+        """
+    def foreign_buffer(self, address: int, size: int, base: Any | None = None) -> CudaBuffer:
+        """
+        Create device buffer from address and size as a view.
+
+        The caller is responsible for allocating and freeing the
+        memory. When `address==size==0` then a new zero-sized buffer
+        is returned.
+
+        Parameters
+        ----------
+        address : int
+          Specify the starting address of the buffer. The address can
+          refer to both device or host memory but it must be
+          accessible from device after mapping it with
+          `get_device_address` method.
+        size : int
+          Specify the size of device buffer in bytes.
+        base : {None, object}
+          Specify object that owns the referenced memory.
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device reachable memory.
+
+        """
+    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer:
+        """Open existing CUDA IPC memory handle
+
+        Parameters
+        ----------
+        ipc_handle : IpcMemHandle
+          Specify opaque pointer to CUipcMemHandle (driver API).
+
+        Returns
+        -------
+        buf : CudaBuffer
+          referencing device buffer
+        """
+    def buffer_from_data(
+        self,
+        data: CudaBuffer | HostBuffer | Buffer | ArrayLike,
+        offset: int = 0,
+        size: int = -1,
+    ) -> CudaBuffer:
+        """Create device buffer and initialize with data.
+
+        Parameters
+        ----------
+        data : {CudaBuffer, HostBuffer, Buffer, array-like}
+          Specify data to be copied to device buffer.
+        offset : int
+          Specify the offset of input buffer for device data
+          buffering. Default: 0.
+        size : int
+          Specify the size of device buffer in bytes. Default: all
+          (starting from input offset)
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer with copied data.
+        """
+    def buffer_from_object(self, obj: Any) -> CudaBuffer:
+        """Create device buffer view of arbitrary object that references
+        device accessible memory.
+
+        When the object contains a non-contiguous view of device
+        accessible memory then the returned device buffer will contain
+        contiguous view of the memory, that is, including the
+        intermediate data that is otherwise invisible to the input
+        object.
+
+        Parameters
+        ----------
+        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
+          Specify an object that holds (device or host) address that
+          can be accessed from device. This includes objects with
+          types defined in pyarrow.cuda as well as arbitrary objects
+          that implement the CUDA array interface as defined by numba.
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of device accessible memory.
+
+        """
+
+class IpcMemHandle(_Weakrefable):
+    """A serializable container for a CUDA IPC handle."""
+    @staticmethod
+    def from_buffer(opaque_handle: Buffer) -> IpcMemHandle:
+        """Create IpcMemHandle from opaque buffer (e.g. from another
+        process)
+
+        Parameters
+        ----------
+        opaque_handle :
+          a CUipcMemHandle as a const void*
+
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+        """
+    def serialize(self, pool: MemoryPool | None = None) -> Buffer:
+        """Write IpcMemHandle to a Buffer
+
+        Parameters
+        ----------
+        pool : {MemoryPool, None}
+          Specify a pool to allocate memory from
+
+        Returns
+        -------
+        buf : Buffer
+          The serialized buffer.
+        """
+
+class CudaBuffer(Buffer):
+    """An Arrow buffer with data located in a GPU device.
+
+    To create a CudaBuffer instance, use Context.device_buffer().
+
+    The memory allocated in a CudaBuffer is freed when the buffer object
+    is deleted.
+    """
+
+    @staticmethod
+    def from_buffer(buf: Buffer) -> CudaBuffer:
+        """Convert back generic buffer into CudaBuffer
+
+        Parameters
+        ----------
+        buf : Buffer
+          Specify buffer containing CudaBuffer
+
+        Returns
+        -------
+        dbuf : CudaBuffer
+          Resulting device buffer.
+        """
+    @staticmethod
+    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer:
+        """Create a CudaBuffer view from numba MemoryPointer instance.
+
+        Parameters
+        ----------
+        mem :  numba.cuda.cudadrv.driver.MemoryPointer
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          Device buffer as a view of numba MemoryPointer.
+        """
+    def to_numba(self) -> _numba_driver.MemoryPointer:
+        """Return numba memory pointer of CudaBuffer instance."""
+    def copy_to_host(
+        self,
+        position: int = 0,
+        nbytes: int = -1,
+        buf: Buffer | None = None,
+        memory_pool: MemoryPool | None = None,
+        resizable: bool = False,
+    ) -> Buffer:
+        """Copy memory from GPU device to CPU host
+
+        Caller is responsible for ensuring that all tasks affecting
+        the memory are finished. Use
+
+          `<CudaBuffer instance>.context.synchronize()`
+
+        when needed.
+
+        Parameters
+        ----------
+        position : int
+          Specify the starting position of the source data in GPU
+          device buffer. Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          the position until host buffer is full).
+        buf : Buffer
+          Specify a pre-allocated output buffer in host. Default: None
+          (allocate new output buffer).
+        memory_pool : MemoryPool
+        resizable : bool
+          Specify extra arguments to allocate_buffer. Used only when
+          buf is None.
+
+        Returns
+        -------
+        buf : Buffer
+          Output buffer in host.
+
+        """
+    def copy_from_host(
+        self, data: Buffer | ArrayLike, position: int = 0, nbytes: int = -1
+    ) -> int:
+        """Copy data from host to device.
+
+        The device buffer must be pre-allocated.
+
+        Parameters
+        ----------
+        data : {Buffer, array-like}
+          Specify data in host. It can be array-like that is valid
+          argument to py_buffer
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+        """
+    def copy_from_device(self, buf: CudaBuffer, position: int = 0, nbytes: int = -1) -> int:
+        """Copy data from device to device.
+
+        Parameters
+        ----------
+        buf : CudaBuffer
+          Specify source device buffer.
+        position : int
+          Specify the starting position of the copy in device buffer.
+          Default: 0.
+        nbytes : int
+          Specify the number of bytes to copy. Default: -1 (all from
+          source until device buffer, starting from position, is full)
+
+        Returns
+        -------
+        nbytes : int
+          Number of bytes copied.
+
+        """
+    def export_for_ipc(self) -> IpcMemHandle:
+        """
+        Expose this device buffer as IPC memory which can be used in other
+        processes.
+
+        After calling this function, this device memory will not be
+        freed when the CudaBuffer is destructed.
+
+        Returns
+        -------
+        ipc_handle : IpcMemHandle
+          The exported IPC handle
+
+        """
+    @property
+    def context(self) -> Context:
+        """Returns the CUDA driver context of this buffer."""
+    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer:
+        """Return slice of device buffer
+
+        Parameters
+        ----------
+        offset : int, default 0
+          Specify offset from the start of device buffer to slice
+        length : int, default None
+          Specify the length of slice (default is until end of device
+          buffer starting from offset). If the length is larger than
+          the data available, the returned slice will have a size of
+          the available data starting from the offset.
+
+        Returns
+        -------
+        sliced : CudaBuffer
+          Zero-copy slice of device buffer.
+
+        """
+    def to_pybytes(self) -> bytes:
+        """Return device buffer content as Python bytes."""
+
+class HostBuffer(Buffer):
+    """Device-accessible CPU memory created using cudaHostAlloc.
+
+    To create a HostBuffer instance, use
+
+      cuda.new_host_buffer(<nbytes>)
+    """
+    @property
+    def size(self) -> int: ...
+
+class BufferReader(NativeFile):
+    """File interface for zero-copy read from CUDA buffers.
+
+    Note: Read methods return pointers to device memory. This means
+    you must be careful using this interface with any Arrow code which
+    may expect to be able to do anything other than pointer arithmetic
+    on the returned buffers.
+    """
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer:
+        """Return a slice view of the underlying device buffer.
+
+        The slice will start at the current reader position and will
+        have specified size in bytes.
+
+        Parameters
+        ----------
+        nbytes : int, default None
+          Specify the number of bytes to read. Default: None (read all
+          remaining bytes).
+
+        Returns
+        -------
+        cbuf : CudaBuffer
+          New device buffer.
+
+        """
+
+class BufferWriter(NativeFile):
+    """File interface for writing to CUDA buffers.
+
+    By default writes are unbuffered. Use set_buffer_size to enable
+    buffering.
+    """
+    def __init__(self, obj: CudaBuffer) -> None: ...
+    def writeat(self, position: int, data: ArrayLike) -> None:
+        """Write data to buffer starting from position.
+
+        Parameters
+        ----------
+        position : int
+          Specify device buffer position where the data will be
+          written.
+        data : array-like
+          Specify data, the data instance must implement buffer
+          protocol.
+        """
+    @property
+    def buffer_size(self) -> int:
+        """Returns size of host (CPU) buffer, 0 for unbuffered"""
+    @buffer_size.setter
+    def buffer_size(self, buffer_size: int):
+        """Set CPU buffer size to limit calls to cudaMemcpy
+
+        Parameters
+        ----------
+        buffer_size : int
+          Specify the size of CPU buffer to allocate in bytes.
+        """
+    @property
+    def num_bytes_buffered(self) -> int:
+        """Returns number of bytes buffered on host"""
+
+def new_host_buffer(size: int, device: int = 0) -> HostBuffer:
+    """Return buffer with CUDA-accessible memory on CPU host
+
+    Parameters
+    ----------
+    size : int
+      Specify the number of bytes to be allocated.
+    device : int
+      Specify GPU device number.
+
+    Returns
+    -------
+    dbuf : HostBuffer
+      Allocated host buffer
+    """
+
+def serialize_record_batch(batch: RecordBatch, ctx: Context) -> CudaBuffer:
+    """Write record batch message to GPU device memory
+
+    Parameters
+    ----------
+    batch : RecordBatch
+      Record batch to write
+    ctx : Context
+      CUDA Context to allocate device memory from
+
+    Returns
+    -------
+    dbuf : CudaBuffer
+      device buffer which contains the record batch message
+    """
+
+def read_message(
+    source: CudaBuffer | cuda.BufferReader, pool: MemoryManager | None = None
+) -> Message:
+    """Read Arrow IPC message located on GPU device
+
+    Parameters
+    ----------
+    source : {CudaBuffer, cuda.BufferReader}
+      Device buffer or reader of device buffer.
+    pool : MemoryPool (optional)
+      Pool to allocate CPU memory for the metadata
+
+    Returns
+    -------
+    message : Message
+      The deserialized message, body still on device
+    """
+
+def read_record_batch(
+    buffer: Buffer,
+    object: Schema,
+    *,
+    dictionary_memo: DictionaryMemo | None = None,
+    pool: MemoryPool | None = None,
+) -> RecordBatch:
+    """Construct RecordBatch referencing IPC message located on CUDA device.
+
+    While the metadata is copied to host memory for deserialization,
+    the record batch data remains on the device.
+
+    Parameters
+    ----------
+    buffer :
+      Device buffer containing the complete IPC message
+    schema : Schema
+      The schema for the record batch
+    dictionary_memo : DictionaryMemo, optional
+        If message contains dictionaries, must pass a populated
+        DictionaryMemo
+    pool : MemoryPool (optional)
+      Pool to allocate metadata from
+
+    Returns
+    -------
+    batch : RecordBatch
+      Reconstructed record batch, with device pointers
+
+    """
diff --git a/python/pyarrow-stubs/_dataset.pyi b/python/pyarrow-stubs/_dataset.pyi
index 03e7762b6df..e0f38d54eff 100644
--- a/python/pyarrow-stubs/_dataset.pyi
+++ b/python/pyarrow-stubs/_dataset.pyi
@@ -1,2300 +1,2282 @@
-# import sys
-#
-# if sys.version_info >= (3, 11):
-#     from typing import Self
-# else:
-#     from typing_extensions import Self
-# from typing import (
-#     IO,
-#     Any,
-#     Callable,
-#     Generic,
-#     Iterator,
-#     Literal,
-#     NamedTuple,
-#     TypeVar,
-#     overload,
-# )
-#
-# from _typeshed import StrPath
-#
-# from . import _csv, _json, _parquet, lib
-# from ._fs import FileSelector, FileSystem, SupportedFileSystem
-# from ._stubs_typing import Indices, JoinType, Order
-# from .acero import ExecNodeOptions
-# from .compute import Expression
-# from .ipc import IpcWriteOptions, RecordBatchReader
-#
-# class Dataset(lib._Weakrefable):
-#     """
-#     Collection of data fragments and potentially child datasets.
-#
-#     Arrow Datasets allow you to query against data that has been split across
-#     multiple files. This sharding of data may indicate partitioning, which
-#     can accelerate queries that only touch some partitions (files).
-#     """
-#
-#     @property
-#     def partition_expression(self) -> Expression:
-#         """
-#         An Expression which evaluates to true for all data viewed by this
-#         Dataset.
-#         """
-#     def replace_schema(self, schema: lib.Schema) -> None:
-#         """
-#         Return a copy of this Dataset with a different schema.
-#
-#         The copy will view the same Fragments. If the new schema is not
-#         compatible with the original dataset's schema then an error will
-#         be raised.
-#
-#         Parameters
-#         ----------
-#         schema : Schema
-#             The new dataset schema.
-#         """
-#     def get_fragments(self, filter: Expression | None = None):
-#         """Returns an iterator over the fragments in this dataset.
-#
-#         Parameters
-#         ----------
-#         filter : Expression, default None
-#             Return fragments matching the optional filter, either using the
-#             partition_expression or internal information like Parquet's
-#             statistics.
-#
-#         Returns
-#         -------
-#         fragments : iterator of Fragment
-#         """
-#     def scanner(
-#         self,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> Scanner:
-#         """
-#         Build a scan operation against the dataset.
-#
-#         Data is not loaded immediately. Instead, this produces a Scanner,
-#         which exposes further operations (e.g. loading all data as a
-#         table, counting rows).
-#
-#         See the :meth:`Scanner.from_dataset` method for further information.
-#
-#         Parameters
-#         ----------
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         scanner : Scanner
-#
-#         Examples
-#         --------
-#         >>> import pyarrow as pa
-#         >>> table = pa.table(
-#         ...     {
-#         ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-#         ...         "n_legs": [2, 2, 4, 4, 5, 100],
-#         ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-#         ...     }
-#         ... )
-#         >>>
-#         >>> import pyarrow.parquet as pq
-#         >>> pq.write_table(table, "dataset_scanner.parquet")
-#
-#         >>> import pyarrow.dataset as ds
-#         >>> dataset = ds.dataset("dataset_scanner.parquet")
-#
-#         Selecting a subset of the columns:
-#
-#         >>> dataset.scanner(columns=["year", "n_legs"]).to_table()
-#         pyarrow.Table
-#         year: int64
-#         n_legs: int64
-#         ----
-#         year: [[2020,2022,2021,2022,2019,2021]]
-#         n_legs: [[2,2,4,4,5,100]]
-#
-#         Projecting selected columns using an expression:
-#
-#         >>> dataset.scanner(
-#         ...     columns={
-#         ...         "n_legs_uint": ds.field("n_legs").cast("uint8"),
-#         ...     }
-#         ... ).to_table()
-#         pyarrow.Table
-#         n_legs_uint: uint8
-#         ----
-#         n_legs_uint: [[2,2,4,4,5,100]]
-#
-#         Filtering rows while scanning:
-#
-#         >>> dataset.scanner(filter=ds.field("year") > 2020).to_table()
-#         pyarrow.Table
-#         year: int64
-#         n_legs: int64
-#         animal: string
-#         ----
-#         year: [[2022,2021,2022,2021]]
-#         n_legs: [[2,4,4,100]]
-#         animal: [["Parrot","Dog","Horse","Centipede"]]
-#         """
-#     def to_batches(
-#         self,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> Iterator[lib.RecordBatch]:
-#         """
-#         Read the dataset as materialized record batches.
-#
-#         Parameters
-#         ----------
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         record_batches : iterator of RecordBatch
-#         """
-#     def to_table(
-#         self,
-#         columns: list[str] | dict[str, Expression] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> lib.Table:
-#         """
-#         Read the dataset to an Arrow table.
-#
-#         Note that this method reads all the selected data from the dataset
-#         into memory.
-#
-#         Parameters
-#         ----------
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         table : Table
-#         """
-#     def take(
-#         self,
-#         indices: Indices,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> lib.Table:
-#         """
-#         Select rows of data by index.
-#
-#         Parameters
-#         ----------
-#         indices : Array or array-like
-#             indices of rows to select in the dataset.
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         table : Table
-#         """
-#     def head(
-#         self,
-#         num_rows: int,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> lib.Table:
-#         """
-#         Load the first N rows of the dataset.
-#
-#         Parameters
-#         ----------
-#         num_rows : int
-#             The number of rows to load.
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         table : Table
-#         """
-#     def count_rows(
-#         self,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> int:
-#         """
-#         Count rows matching the scanner filter.
-#
-#         Parameters
-#         ----------
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         count : int
-#         """
-#     @property
-#     def schema(self) -> lib.Schema:
-#         """The common schema of the full Dataset"""
-#     def filter(self, expression: Expression) -> Self:
-#         """
-#         Apply a row filter to the dataset.
-#
-#         Parameters
-#         ----------
-#         expression : Expression
-#             The filter that should be applied to the dataset.
-#
-#         Returns
-#         -------
-#         Dataset
-#         """
-#     def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> InMemoryDataset:
-#         """
-#         Sort the Dataset by one or multiple columns.
-#
-#         Parameters
-#         ----------
-#         sorting : str or list[tuple(name, order)]
-#             Name of the column to use to sort (ascending), or
-#             a list of multiple sorting conditions where
-#             each entry is a tuple with column name
-#             and sorting order ("ascending" or "descending")
-#         **kwargs : dict, optional
-#             Additional sorting options.
-#             As allowed by :class:`SortOptions`
-#
-#         Returns
-#         -------
-#         InMemoryDataset
-#             A new dataset sorted according to the sort keys.
-#         """
-#     def join(
-#         self,
-#         right_dataset: Dataset,
-#         keys: str | list[str],
-#         right_keys: str | list[str] | None = None,
-#         join_type: JoinType = "left outer",
-#         left_suffix: str | None = None,
-#         right_suffix: str | None = None,
-#         coalesce_keys: bool = True,
-#         use_threads: bool = True,
-#     ) -> InMemoryDataset:
-#         """
-#         Perform a join between this dataset and another one.
-#
-#         Result of the join will be a new dataset, where further
-#         operations can be applied.
-#
-#         Parameters
-#         ----------
-#         right_dataset : dataset
-#             The dataset to join to the current one, acting as the right dataset
-#             in the join operation.
-#         keys : str or list[str]
-#             The columns from current dataset that should be used as keys
-#             of the join operation left side.
-#         right_keys : str or list[str], default None
-#             The columns from the right_dataset that should be used as keys
-#             on the join operation right side.
-#             When ``None`` use the same key names as the left dataset.
-#         join_type : str, default "left outer"
-#             The kind of join that should be performed, one of
-#             ("left semi", "right semi", "left anti", "right anti",
-#             "inner", "left outer", "right outer", "full outer")
-#         left_suffix : str, default None
-#             Which suffix to add to right column names. This prevents confusion
-#             when the columns in left and right datasets have colliding names.
-#         right_suffix : str, default None
-#             Which suffix to add to the left column names. This prevents confusion
-#             when the columns in left and right datasets have colliding names.
-#         coalesce_keys : bool, default True
-#             If the duplicated keys should be omitted from one of the sides
-#             in the join result.
-#         use_threads : bool, default True
-#             Whenever to use multithreading or not.
-#
-#         Returns
-#         -------
-#         InMemoryDataset
-#         """
-#     def join_asof(
-#         self,
-#         right_dataset: Dataset,
-#         on: str,
-#         by: str | list[str],
-#         tolerance: int,
-#         right_on: str | list[str] | None = None,
-#         right_by: str | list[str] | None = None,
-#     ) -> InMemoryDataset:
-#         """
-#         Perform an asof join between this dataset and another one.
-#
-#         This is similar to a left-join except that we match on nearest key rather
-#         than equal keys. Both datasets must be sorted by the key. This type of join
-#         is most useful for time series data that are not perfectly aligned.
-#
-#         Optionally match on equivalent keys with "by" before searching with "on".
-#
-#         Result of the join will be a new Dataset, where further
-#         operations can be applied.
-#
-#         Parameters
-#         ----------
-#         right_dataset : dataset
-#             The dataset to join to the current one, acting as the right dataset
-#             in the join operation.
-#         on : str
-#             The column from current dataset that should be used as the "on" key
-#             of the join operation left side.
-#
-#             An inexact match is used on the "on" key, i.e. a row is considered a
-#             match if and only if left_on - tolerance <= right_on <= left_on.
-#
-#             The input table must be sorted by the "on" key. Must be a single
-#             field of a common type.
-#
-#             Currently, the "on" key must be an integer, date, or timestamp type.
-#         by : str or list[str]
-#             The columns from current dataset that should be used as the keys
-#             of the join operation left side. The join operation is then done
-#             only for the matches in these columns.
-#         tolerance : int
-#             The tolerance for inexact "on" key matching. A right row is considered
-#             a match with the left row `right.on - left.on <= tolerance`. The
-#             `tolerance` may be:
-#
-#             - negative, in which case a past-as-of-join occurs;
-#             - or positive, in which case a future-as-of-join occurs;
-#             - or zero, in which case an exact-as-of-join occurs.
-#
-#             The tolerance is interpreted in the same units as the "on" key.
-#         right_on : str or list[str], default None
-#             The columns from the right_dataset that should be used as the on key
-#             on the join operation right side.
-#             When ``None`` use the same key name as the left dataset.
-#         right_by : str or list[str], default None
-#             The columns from the right_dataset that should be used as by keys
-#             on the join operation right side.
-#             When ``None`` use the same key names as the left dataset.
-#
-#         Returns
-#         -------
-#         InMemoryDataset
-#         """
-#
-# class InMemoryDataset(Dataset):
-#     """
-#     A Dataset wrapping in-memory data.
-#
-#     Parameters
-#     ----------
-#     source : RecordBatch, Table, list, tuple
-#         The data for this dataset. Can be a RecordBatch, Table, list of
-#         RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
-#         If an iterable is provided, the schema must also be provided.
-#     schema : Schema, optional
-#         Only required if passing an iterable as the source
-#     """
-#
-# class UnionDataset(Dataset):
-#     """
-#     A Dataset wrapping child datasets.
-#
-#     Children's schemas must agree with the provided schema.
-#
-#     Parameters
-#     ----------
-#     schema : Schema
-#         A known schema to conform to.
-#     children : list of Dataset
-#         One or more input children
-#     """
-#
-#     @property
-#     def children(self) -> list[Dataset]: ...
-#
-# class FileSystemDataset(Dataset):
-#     """
-#     A Dataset of file fragments.
-#
-#     A FileSystemDataset is composed of one or more FileFragment.
-#
-#     Parameters
-#     ----------
-#     fragments : list[Fragments]
-#         List of fragments to consume.
-#     schema : Schema
-#         The top-level schema of the Dataset.
-#     format : FileFormat
-#         File format of the fragments, currently only ParquetFileFormat,
-#         IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
-#     filesystem : FileSystem
-#         FileSystem of the fragments.
-#     root_partition : Expression, optional
-#         The top-level partition of the DataDataset.
-#     """
-#
-#     def __init__(
-#         self,
-#         fragments: list[Fragment],
-#         schema: lib.Schema,
-#         format: FileFormat,
-#         filesystem: SupportedFileSystem | None = None,
-#         root_partition: Expression | None = None,
-#     ) -> None: ...
-#     @classmethod
-#     def from_paths(
-#         cls,
-#         paths: list[str],
-#         schema: lib.Schema | None = None,
-#         format: FileFormat | None = None,
-#         filesystem: SupportedFileSystem | None = None,
-#         partitions: list[Expression] | None = None,
-#         root_partition: Expression | None = None,
-#     ) -> FileSystemDataset:
-#         """
-#         A Dataset created from a list of paths on a particular filesystem.
-#
-#         Parameters
-#         ----------
-#         paths : list of str
-#             List of file paths to create the fragments from.
-#         schema : Schema
-#             The top-level schema of the DataDataset.
-#         format : FileFormat
-#             File format to create fragments from, currently only
-#             ParquetFileFormat, IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
-#         filesystem : FileSystem
-#             The filesystem which files are from.
-#         partitions : list[Expression], optional
-#             Attach additional partition information for the file paths.
-#         root_partition : Expression, optional
-#             The top-level partition of the DataDataset.
-#         """
-#     @property
-#     def filesystem(self) -> FileSystem: ...
-#     @property
-#     def partitioning(self) -> Partitioning | None:
-#         """
-#         The partitioning of the Dataset source, if discovered.
-#
-#         If the FileSystemDataset is created using the ``dataset()`` factory
-#         function with a partitioning specified, this will return the
-#         finalized Partitioning object from the dataset discovery. In all
-#         other cases, this returns None.
-#         """
-#     @property
-#     def files(self) -> list[str]:
-#         """List of the files"""
-#     @property
-#     def format(self) -> FileFormat:
-#         """The FileFormat of this source."""
-#
-# class FileWriteOptions(lib._Weakrefable):
-#     @property
-#     def format(self) -> FileFormat: ...
-#
-# class FileFormat(lib._Weakrefable):
-#     def inspect(
-#         self, file: StrPath | IO, filesystem: SupportedFileSystem | None = None
-#     ) -> lib.Schema:
-#         """
-#         Infer the schema of a file.
-#
-#         Parameters
-#         ----------
-#         file : file-like object, path-like or str
-#             The file or file path to infer a schema from.
-#         filesystem : Filesystem, optional
-#             If `filesystem` is given, `file` must be a string and specifies
-#             the path of the file to read from the filesystem.
-#
-#         Returns
-#         -------
-#         schema : Schema
-#             The schema inferred from the file
-#         """
-#     def make_fragment(
-#         self,
-#         file: StrPath | IO,
-#         filesystem: SupportedFileSystem | None = None,
-#         partition_expression: Expression | None = None,
-#         *,
-#         file_size: int | None = None,
-#     ) -> Fragment:
-#         """
-#         Make a FileFragment from a given file.
-#
-#         Parameters
-#         ----------
-#         file : file-like object, path-like or str
-#             The file or file path to make a fragment from.
-#         filesystem : Filesystem, optional
-#             If `filesystem` is given, `file` must be a string and specifies
-#             the path of the file to read from the filesystem.
-#         partition_expression : Expression, optional
-#             An expression that is guaranteed true for all rows in the fragment.  Allows
-#             fragment to be potentially skipped while scanning with a filter.
-#         file_size : int, optional
-#             The size of the file in bytes. Can improve performance with high-latency filesystems
-#             when file size needs to be known before reading.
-#
-#         Returns
-#         -------
-#         fragment : Fragment
-#             The file fragment
-#         """
-#     def make_write_options(self) -> FileWriteOptions: ...
-#     @property
-#     def default_extname(self) -> str: ...
-#     @property
-#     def default_fragment_scan_options(self) -> FragmentScanOptions: ...
-#     @default_fragment_scan_options.setter
-#     def default_fragment_scan_options(self, options: FragmentScanOptions) -> None: ...
-#
-# class Fragment(lib._Weakrefable):
-#     """Fragment of data from a Dataset."""
-#     @property
-#     def physical_schema(self) -> lib.Schema:
-#         """Return the physical schema of this Fragment. This schema can be
-#         different from the dataset read schema."""
-#     @property
-#     def partition_expression(self) -> Expression:
-#         """An Expression which evaluates to true for all data viewed by this
-#         Fragment.
-#         """
-#     def scanner(
-#         self,
-#         schema: lib.Schema | None = None,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> Scanner:
-#         """
-#         Build a scan operation against the fragment.
-#
-#         Data is not loaded immediately. Instead, this produces a Scanner,
-#         which exposes further operations (e.g. loading all data as a
-#         table, counting rows).
-#
-#         Parameters
-#         ----------
-#         schema : Schema
-#             Schema to use for scanning. This is used to unify a Fragment to
-#             its Dataset's schema. If not specified this will use the
-#             Fragment's physical schema which might differ for each Fragment.
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         scanner : Scanner
-#         """
-#     def to_batches(
-#         self,
-#         schema: lib.Schema | None = None,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> Iterator[lib.RecordBatch]:
-#         """
-#         Read the fragment as materialized record batches.
-#
-#         Parameters
-#         ----------
-#         schema : Schema, optional
-#             Concrete schema to use for scanning.
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         record_batches : iterator of RecordBatch
-#         """
-#     def to_table(
-#         self,
-#         schema: lib.Schema | None = None,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> lib.Table:
-#         """
-#         Convert this Fragment into a Table.
-#
-#         Use this convenience utility with care. This will serially materialize
-#         the Scan result in memory before creating the Table.
-#
-#         Parameters
-#         ----------
-#         schema : Schema, optional
-#             Concrete schema to use for scanning.
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         table : Table
-#         """
-#     def take(
-#         self,
-#         indices: Indices,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> lib.Table:
-#         """
-#         Select rows of data by index.
-#
-#         Parameters
-#         ----------
-#         indices : Array or array-like
-#             The indices of row to select in the dataset.
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         Table
-#         """
-#     def head(
-#         self,
-#         num_rows: int,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> lib.Table:
-#         """
-#         Load the first N rows of the fragment.
-#
-#         Parameters
-#         ----------
-#         num_rows : int
-#             The number of rows to load.
-#         columns : list of str, default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         Table
-#         """
-#     def count_rows(
-#         self,
-#         columns: list[str] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> int:
-#         """
-#         Count rows matching the scanner filter.
-#
-#         Parameters
-#         ----------
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#
-#         Returns
-#         -------
-#         count : int
-#         """
-#
-# class FileFragment(Fragment):
-#     """A Fragment representing a data file."""
-#
-#     def open(self) -> lib.NativeFile:
-#         """
-#         Open a NativeFile of the buffer or file viewed by this fragment.
-#         """
-#     @property
-#     def path(self) -> str:
-#         """
-#         The path of the data file viewed by this fragment, if it views a
-#         file. If instead it views a buffer, this will be "<Buffer>".
-#         """
-#     @property
-#     def filesystem(self) -> FileSystem:
-#         """
-#         The FileSystem containing the data file viewed by this fragment, if
-#         it views a file. If instead it views a buffer, this will be None.
-#         """
-#     @property
-#     def buffer(self) -> lib.Buffer:
-#         """
-#         The buffer viewed by this fragment, if it views a buffer. If
-#         instead it views a file, this will be None.
-#         """
-#     @property
-#     def format(self) -> FileFormat:
-#         """
-#         The format of the data file viewed by this fragment.
-#         """
-#
-# class FragmentScanOptions(lib._Weakrefable):
-#     """Scan options specific to a particular fragment and scan operation."""
-#
-#     @property
-#     def type_name(self) -> str: ...
-#
-# class IpcFileWriteOptions(FileWriteOptions):
-#     @property
-#     def write_options(self) -> IpcWriteOptions: ...
-#     @write_options.setter
-#     def write_options(self, write_options: IpcWriteOptions) -> None: ...
-#
-# class IpcFileFormat(FileFormat):
-#     def equals(self, other: IpcFileFormat) -> bool: ...
-#     def make_write_options(self, **kwargs) -> IpcFileWriteOptions: ...
-#     @property
-#     def default_extname(self) -> str: ...
-#
-# class FeatherFileFormat(IpcFileFormat): ...
-#
-# class CsvFileFormat(FileFormat):
-#     """
-#     FileFormat for CSV files.
-#
-#     Parameters
-#     ----------
-#     parse_options : pyarrow.csv.ParseOptions
-#         Options regarding CSV parsing.
-#     default_fragment_scan_options : CsvFragmentScanOptions
-#         Default options for fragments scan.
-#     convert_options : pyarrow.csv.ConvertOptions
-#         Options regarding value conversion.
-#     read_options : pyarrow.csv.ReadOptions
-#         General read options.
-#     """
-#     def __init__(
-#         self,
-#         parse_options: _csv.ParseOptions | None = None,
-#         default_fragment_scan_options: CsvFragmentScanOptions | None = None,
-#         convert_options: _csv.ConvertOptions | None = None,
-#         read_options: _csv.ReadOptions | None = None,
-#     ) -> None: ...
-#     def make_write_options(self) -> _csv.WriteOptions: ...  # type: ignore[override]
-#     @property
-#     def parse_options(self) -> _csv.ParseOptions: ...
-#     @parse_options.setter
-#     def parse_options(self, parse_options: _csv.ParseOptions) -> None: ...
-#     def equals(self, other: CsvFileFormat) -> bool: ...
-#
-# class CsvFragmentScanOptions(FragmentScanOptions):
-#     """
-#     Scan-specific options for CSV fragments.
-#
-#     Parameters
-#     ----------
-#     convert_options : pyarrow.csv.ConvertOptions
-#         Options regarding value conversion.
-#     read_options : pyarrow.csv.ReadOptions
-#         General read options.
-#     """
-#
-#     convert_options: _csv.ConvertOptions
-#     read_options: _csv.ReadOptions
-#
-#     def __init__(
-#         self, convert_options: _csv.ConvertOptions, read_options: _csv.ReadOptions
-#     ) -> None: ...
-#     def equals(self, other: CsvFragmentScanOptions) -> bool: ...
-#
-# class CsvFileWriteOptions(FileWriteOptions):
-#     write_options: _csv.WriteOptions
-#
-# class JsonFileFormat(FileFormat):
-#     """
-#     FileFormat for JSON files.
-#
-#     Parameters
-#     ----------
-#     default_fragment_scan_options : JsonFragmentScanOptions
-#         Default options for fragments scan.
-#     parse_options : pyarrow.json.ParseOptions
-#         Options regarding json parsing.
-#     read_options : pyarrow.json.ReadOptions
-#         General read options.
-#     """
-#     def __init__(
-#         self,
-#         default_fragment_scan_options: JsonFragmentScanOptions | None = None,
-#         parse_options: _json.ParseOptions | None = None,
-#         read_options: _json.ReadOptions | None = None,
-#     ) -> None: ...
-#     def equals(self, other: JsonFileFormat) -> bool: ...
-#
-# class JsonFragmentScanOptions(FragmentScanOptions):
-#     """
-#     Scan-specific options for JSON fragments.
-#
-#     Parameters
-#     ----------
-#     parse_options : pyarrow.json.ParseOptions
-#         Options regarding JSON parsing.
-#     read_options : pyarrow.json.ReadOptions
-#         General read options.
-#     """
-#
-#     parse_options: _json.ParseOptions
-#     read_options: _json.ReadOptions
-#     def __init__(
-#         self, parse_options: _json.ParseOptions, read_options: _json.ReadOptions
-#     ) -> None: ...
-#     def equals(self, other: JsonFragmentScanOptions) -> bool: ...
-#
-# class Partitioning(lib._Weakrefable):
-#     def parse(self, path: str) -> Expression:
-#         """
-#         Parse a path into a partition expression.
-#
-#         Parameters
-#         ----------
-#         path : str
-#
-#         Returns
-#         -------
-#         pyarrow.dataset.Expression
-#         """
-#     def format(self, expr: Expression) -> tuple[str, str]:
-#         """
-#         Convert a filter expression into a tuple of (directory, filename) using
-#         the current partitioning scheme
-#
-#         Parameters
-#         ----------
-#         expr : pyarrow.dataset.Expression
-#
-#         Returns
-#         -------
-#         tuple[str, str]
-#
-#         Examples
-#         --------
-#
-#         Specify the Schema for paths like "/2009/June":
-#
-#         >>> import pyarrow as pa
-#         >>> import pyarrow.dataset as ds
-#         >>> import pyarrow.compute as pc
-#         >>> part = ds.partitioning(pa.schema([("year", pa.int16()), ("month", pa.string())]))
-#         >>> part.format((pc.field("year") == 1862) & (pc.field("month") == "Jan"))
-#         ('1862/Jan', '')
-#         """
-#     @property
-#     def schema(self) -> lib.Schema:
-#         """The arrow Schema attached to the partitioning."""
-#
-# class PartitioningFactory(lib._Weakrefable):
-#     @property
-#     def type_name(self) -> str: ...
-#
-# class KeyValuePartitioning(Partitioning):
-#     @property
-#     def dictionaries(self) -> list[lib.Array | None]:
-#         """
-#         The unique values for each partition field, if available.
-#
-#         Those values are only available if the Partitioning object was
-#         created through dataset discovery from a PartitioningFactory, or
-#         if the dictionaries were manually specified in the constructor.
-#         If no dictionary field is available, this returns an empty list.
-#         """
-#
-# class DirectoryPartitioning(KeyValuePartitioning):
-#     """
-#     A Partitioning based on a specified Schema.
-#
-#     The DirectoryPartitioning expects one segment in the file path for each
-#     field in the schema (all fields are required to be present).
-#     For example given schema<year:int16, month:int8> the path "/2009/11" would
-#     be parsed to ("year"_ == 2009 and "month"_ == 11).
-#
-#     Parameters
-#     ----------
-#     schema : Schema
-#         The schema that describes the partitions present in the file path.
-#     dictionaries : dict[str, Array]
-#         If the type of any field of `schema` is a dictionary type, the
-#         corresponding entry of `dictionaries` must be an array containing
-#         every value which may be taken by the corresponding column or an
-#         error will be raised in parsing.
-#     segment_encoding : str, default "uri"
-#         After splitting paths into segments, decode the segments. Valid
-#         values are "uri" (URI-decode segments) and "none" (leave as-is).
-#
-#     Returns
-#     -------
-#     DirectoryPartitioning
-#
-#     Examples
-#     --------
-#     >>> from pyarrow.dataset import DirectoryPartitioning
-#     >>> partitioning = DirectoryPartitioning(
-#     ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
-#     ... )
-#     >>> print(partitioning.parse("/2009/11/"))
-#     ((year == 2009) and (month == 11))
-#     """
-#
-#     @staticmethod
-#     def discover(
-#         field_names: list[str] | None = None,
-#         infer_dictionary: bool = False,
-#         max_partition_dictionary_size: int = 0,
-#         schema: lib.Schema | None = None,
-#         segment_encoding: Literal["uri", "none"] = "uri",
-#     ) -> PartitioningFactory:
-#         """
-#         Discover a DirectoryPartitioning.
-#
-#         Parameters
-#         ----------
-#         field_names : list of str
-#             The names to associate with the values from the subdirectory names.
-#             If schema is given, will be populated from the schema.
-#         infer_dictionary : bool, default False
-#             When inferring a schema for partition fields, yield dictionary
-#             encoded types instead of plain types. This can be more efficient
-#             when materializing virtual columns, and Expressions parsed by the
-#             finished Partitioning will include dictionaries of all unique
-#             inspected values for each field.
-#         max_partition_dictionary_size : int, default 0
-#             Synonymous with infer_dictionary for backwards compatibility with
-#             1.0: setting this to -1 or None is equivalent to passing
-#             infer_dictionary=True.
-#         schema : Schema, default None
-#             Use this schema instead of inferring a schema from partition
-#             values. Partition values will be validated against this schema
-#             before accumulation into the Partitioning's dictionary.
-#         segment_encoding : str, default "uri"
-#             After splitting paths into segments, decode the segments. Valid
-#             values are "uri" (URI-decode segments) and "none" (leave as-is).
-#
-#         Returns
-#         -------
-#         PartitioningFactory
-#             To be used in the FileSystemFactoryOptions.
-#         """
-#     def __init__(
-#         self,
-#         schema: lib.Schema,
-#         dictionaries: dict[str, lib.Array] | None = None,
-#         segment_encoding: Literal["uri", "none"] = "uri",
-#     ) -> None: ...
-#
-# class HivePartitioning(KeyValuePartitioning):
-#     """
-#     A Partitioning for "/$key=$value/" nested directories as found in
-#     Apache Hive.
-#
-#     Multi-level, directory based partitioning scheme originating from
-#     Apache Hive with all data files stored in the leaf directories. Data is
-#     partitioned by static values of a particular column in the schema.
-#     Partition keys are represented in the form $key=$value in directory names.
-#     Field order is ignored, as are missing or unrecognized field names.
-#
-#     For example, given schema<year:int16, month:int8, day:int8>, a possible
-#     path would be "/year=2009/month=11/day=15".
-#
-#     Parameters
-#     ----------
-#     schema : Schema
-#         The schema that describes the partitions present in the file path.
-#     dictionaries : dict[str, Array]
-#         If the type of any field of `schema` is a dictionary type, the
-#         corresponding entry of `dictionaries` must be an array containing
-#         every value which may be taken by the corresponding column or an
-#         error will be raised in parsing.
-#     null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
-#         If any field is None then this fallback will be used as a label
-#     segment_encoding : str, default "uri"
-#         After splitting paths into segments, decode the segments. Valid
-#         values are "uri" (URI-decode segments) and "none" (leave as-is).
-#
-#     Returns
-#     -------
-#     HivePartitioning
-#
-#     Examples
-#     --------
-#     >>> from pyarrow.dataset import HivePartitioning
-#     >>> partitioning = HivePartitioning(pa.schema([("year", pa.int16()), ("month", pa.int8())]))
-#     >>> print(partitioning.parse("/year=2009/month=11/"))
-#     ((year == 2009) and (month == 11))
-#
-#     """
-#     def __init__(
-#         self,
-#         schema: lib.Schema,
-#         dictionaries: dict[str, lib.Array] | None = None,
-#         null_fallback: str = "__HIVE_DEFAULT_PARTITION__",
-#         segment_encoding: Literal["uri", "none"] = "uri",
-#     ) -> None: ...
-#     @staticmethod
-#     def discover(
-#         infer_dictionary: bool = False,
-#         max_partition_dictionary_size: int = 0,
-#         null_fallback="__HIVE_DEFAULT_PARTITION__",
-#         schema: lib.Schema | None = None,
-#         segment_encoding: Literal["uri", "none"] = "uri",
-#     ) -> PartitioningFactory:
-#         """
-#         Discover a HivePartitioning.
-#
-#         Parameters
-#         ----------
-#         infer_dictionary : bool, default False
-#             When inferring a schema for partition fields, yield dictionary
-#             encoded types instead of plain. This can be more efficient when
-#             materializing virtual columns, and Expressions parsed by the
-#             finished Partitioning will include dictionaries of all unique
-#             inspected values for each field.
-#         max_partition_dictionary_size : int, default 0
-#             Synonymous with infer_dictionary for backwards compatibility with
-#             1.0: setting this to -1 or None is equivalent to passing
-#             infer_dictionary=True.
-#         null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
-#             When inferring a schema for partition fields this value will be
-#             replaced by null.  The default is set to __HIVE_DEFAULT_PARTITION__
-#             for compatibility with Spark
-#         schema : Schema, default None
-#             Use this schema instead of inferring a schema from partition
-#             values. Partition values will be validated against this schema
-#             before accumulation into the Partitioning's dictionary.
-#         segment_encoding : str, default "uri"
-#             After splitting paths into segments, decode the segments. Valid
-#             values are "uri" (URI-decode segments) and "none" (leave as-is).
-#
-#         Returns
-#         -------
-#         PartitioningFactory
-#             To be used in the FileSystemFactoryOptions.
-#         """
-#
-# class FilenamePartitioning(KeyValuePartitioning):
-#     """
-#     A Partitioning based on a specified Schema.
-#
-#     The FilenamePartitioning expects one segment in the file name for each
-#     field in the schema (all fields are required to be present) separated
-#     by '_'. For example given schema<year:int16, month:int8> the name
-#     ``"2009_11_"`` would be parsed to ("year" == 2009 and "month" == 11).
-#
-#     Parameters
-#     ----------
-#     schema : Schema
-#         The schema that describes the partitions present in the file path.
-#     dictionaries : dict[str, Array]
-#         If the type of any field of `schema` is a dictionary type, the
-#         corresponding entry of `dictionaries` must be an array containing
-#         every value which may be taken by the corresponding column or an
-#         error will be raised in parsing.
-#     segment_encoding : str, default "uri"
-#         After splitting paths into segments, decode the segments. Valid
-#         values are "uri" (URI-decode segments) and "none" (leave as-is).
-#
-#     Returns
-#     -------
-#     FilenamePartitioning
-#
-#     Examples
-#     --------
-#     >>> from pyarrow.dataset import FilenamePartitioning
-#     >>> partitioning = FilenamePartitioning(
-#     ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
-#     ... )
-#     >>> print(partitioning.parse("2009_11_data.parquet"))
-#     ((year == 2009) and (month == 11))
-#     """
-#
-#     def __init__(
-#         self,
-#         schema: lib.Schema,
-#         dictionaries: dict[str, lib.Array] | None = None,
-#         segment_encoding: Literal["uri", "none"] = "uri",
-#     ) -> None: ...
-#     @staticmethod
-#     def discover(
-#         field_names: list[str] | None = None,
-#         infer_dictionary: bool = False,
-#         schema: lib.Schema | None = None,
-#         segment_encoding: Literal["uri", "none"] = "uri",
-#     ) -> PartitioningFactory:
-#         """
-#         Discover a FilenamePartitioning.
-#
-#         Parameters
-#         ----------
-#         field_names : list of str
-#             The names to associate with the values from the subdirectory names.
-#             If schema is given, will be populated from the schema.
-#         infer_dictionary : bool, default False
-#             When inferring a schema for partition fields, yield dictionary
-#             encoded types instead of plain types. This can be more efficient
-#             when materializing virtual columns, and Expressions parsed by the
-#             finished Partitioning will include dictionaries of all unique
-#             inspected values for each field.
-#         schema : Schema, default None
-#             Use this schema instead of inferring a schema from partition
-#             values. Partition values will be validated against this schema
-#             before accumulation into the Partitioning's dictionary.
-#         segment_encoding : str, default "uri"
-#             After splitting paths into segments, decode the segments. Valid
-#             values are "uri" (URI-decode segments) and "none" (leave as-is).
-#
-#         Returns
-#         -------
-#         PartitioningFactory
-#             To be used in the FileSystemFactoryOptions.
-#         """
-#
-# class DatasetFactory(lib._Weakrefable):
-#     """
-#     DatasetFactory is used to create a Dataset, inspect the Schema
-#     of the fragments contained in it, and declare a partitioning.
-#     """
-#
-#     root_partition: Expression
-#     def finish(self, schema: lib.Schema | None = None) -> Dataset:
-#         """
-#         Create a Dataset using the inspected schema or an explicit schema
-#         (if given).
-#
-#         Parameters
-#         ----------
-#         schema : Schema, default None
-#             The schema to conform the source to.  If None, the inspected
-#             schema is used.
-#
-#         Returns
-#         -------
-#         Dataset
-#         """
-#     def inspect(self) -> lib.Schema:
-#         """
-#         Inspect all data fragments and return a common Schema.
-#
-#         Returns
-#         -------
-#         Schema
-#         """
-#     def inspect_schemas(self) -> list[lib.Schema]: ...
-#
-# class FileSystemFactoryOptions(lib._Weakrefable):
-#     """
-#     Influences the discovery of filesystem paths.
-#
-#     Parameters
-#     ----------
-#     partition_base_dir : str, optional
-#         For the purposes of applying the partitioning, paths will be
-#         stripped of the partition_base_dir. Files not matching the
-#         partition_base_dir prefix will be skipped for partitioning discovery.
-#         The ignored files will still be part of the Dataset, but will not
-#         have partition information.
-#     partitioning : Partitioning/PartitioningFactory, optional
-#        Apply the Partitioning to every discovered Fragment. See Partitioning or
-#        PartitioningFactory documentation.
-#     exclude_invalid_files : bool, optional (default True)
-#         If True, invalid files will be excluded (file format specific check).
-#         This will incur IO for each files in a serial and single threaded
-#         fashion. Disabling this feature will skip the IO, but unsupported
-#         files may be present in the Dataset (resulting in an error at scan
-#         time).
-#     selector_ignore_prefixes : list, optional
-#         When discovering from a Selector (and not from an explicit file list),
-#         ignore files and directories matching any of these prefixes.
-#         By default this is ['.', '_'].
-#     """
-#
-#     partitioning: Partitioning
-#     partitioning_factory: PartitioningFactory
-#     partition_base_dir: str
-#     exclude_invalid_files: bool
-#     selector_ignore_prefixes: list[str]
-#
-#     def __init__(
-#         self,
-#         artition_base_dir: str | None = None,
-#         partitioning: Partitioning | PartitioningFactory | None = None,
-#         exclude_invalid_files: bool = True,
-#         selector_ignore_prefixes: list[str] | None = None,
-#     ) -> None: ...
-#
-# class FileSystemDatasetFactory(DatasetFactory):
-#     """
-#     Create a DatasetFactory from a list of paths with schema inspection.
-#
-#     Parameters
-#     ----------
-#     filesystem : pyarrow.fs.FileSystem
-#         Filesystem to discover.
-#     paths_or_selector : pyarrow.fs.FileSelector or list of path-likes
-#         Either a Selector object or a list of path-like objects.
-#     format : FileFormat
-#         Currently only ParquetFileFormat and IpcFileFormat are supported.
-#     options : FileSystemFactoryOptions, optional
-#         Various flags influencing the discovery of filesystem paths.
-#     """
-#
-#     def __init__(
-#         self,
-#         filesystem: SupportedFileSystem,
-#         paths_or_selector: FileSelector,
-#         format: FileFormat,
-#         options: FileSystemFactoryOptions | None = None,
-#     ) -> None: ...
-#
-# class UnionDatasetFactory(DatasetFactory):
-#     """
-#     Provides a way to inspect/discover a Dataset's expected schema before
-#     materialization.
-#
-#     Parameters
-#     ----------
-#     factories : list of DatasetFactory
-#     """
-#     def __init__(self, factories: list[DatasetFactory]) -> None: ...
-#
-# _RecordBatchT = TypeVar("_RecordBatchT", bound=lib.RecordBatch)
-#
-# class RecordBatchIterator(lib._Weakrefable, Generic[_RecordBatchT]):
-#     """An iterator over a sequence of record batches."""
-#     def __iter__(self) -> Self: ...
-#     def __next__(self) -> _RecordBatchT: ...
-#
-# class TaggedRecordBatch(NamedTuple):
-#     """
-#     A combination of a record batch and the fragment it came from.
-#
-#     Parameters
-#     ----------
-#     record_batch : RecordBatch
-#         The record batch.
-#     fragment : Fragment
-#         Fragment of the record batch.
-#     """
-#
-#     record_batch: lib.RecordBatch
-#     fragment: Fragment
-#
-# class TaggedRecordBatchIterator(lib._Weakrefable):
-#     """An iterator over a sequence of record batches with fragments."""
-#     def __iter__(self) -> Self: ...
-#     def __next__(self) -> TaggedRecordBatch: ...
-#
-# class Scanner(lib._Weakrefable):
-#     """A materialized scan operation with context and options bound.
-#
-#     A scanner is the class that glues the scan tasks, data fragments and data
-#     sources together.
-#     """
-#     @staticmethod
-#     def from_dataset(
-#         dataset: Dataset,
-#         *,
-#         columns: list[str] | dict[str, Expression] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> Scanner:
-#         """
-#         Create Scanner from Dataset,
-#
-#         Parameters
-#         ----------
-#         dataset : Dataset
-#             Dataset to scan.
-#         columns : list[str] or dict[str, Expression], default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#         """
-#     @staticmethod
-#     def from_fragment(
-#         fragment: Fragment,
-#         *,
-#         schema: lib.Schema | None = None,
-#         columns: list[str] | dict[str, Expression] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> Scanner:
-#         """
-#         Create Scanner from Fragment,
-#
-#         Parameters
-#         ----------
-#         fragment : Fragment
-#             fragment to scan.
-#         schema : Schema, optional
-#             The schema of the fragment.
-#         columns : list[str] or dict[str, Expression], default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#         """
-#     @overload
-#     @staticmethod
-#     def from_batches(
-#         source: Iterator[lib.RecordBatch],
-#         *,
-#         schema: lib.Schema,
-#         columns: list[str] | dict[str, Expression] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> Scanner: ...
-#     @overload
-#     @staticmethod
-#     def from_batches(
-#         source: RecordBatchReader,
-#         *,
-#         columns: list[str] | dict[str, Expression] | None = None,
-#         filter: Expression | None = None,
-#         batch_size: int = ...,
-#         batch_readahead: int = 16,
-#         fragment_readahead: int = 4,
-#         fragment_scan_options: FragmentScanOptions | None = None,
-#         use_threads: bool = True,
-#         cache_metadata: bool = True,
-#         memory_pool: lib.MemoryPool | None = None,
-#     ) -> Scanner: ...
-#     @staticmethod
-#     def from_batches(*args, **kwargs):
-#         """
-#         Create a Scanner from an iterator of batches.
-#
-#         This creates a scanner which can be used only once. It is
-#         intended to support writing a dataset (which takes a scanner)
-#         from a source which can be read only once (e.g. a
-#         RecordBatchReader or generator).
-#
-#         Parameters
-#         ----------
-#         source : Iterator or Arrow-compatible stream object
-#             The iterator of Batches. This can be a pyarrow RecordBatchReader,
-#             any object that implements the Arrow PyCapsule Protocol for
-#             streams, or an actual Python iterator of RecordBatches.
-#         schema : Schema
-#             The schema of the batches (required when passing a Python
-#             iterator).
-#         columns : list[str] or dict[str, Expression], default None
-#             The columns to project. This can be a list of column names to
-#             include (order and duplicates will be preserved), or a dictionary
-#             with {new_column_name: expression} values for more advanced
-#             projections.
-#
-#             The list of columns or expressions may use the special fields
-#             `__batch_index` (the index of the batch within the fragment),
-#             `__fragment_index` (the index of the fragment within the dataset),
-#             `__last_in_fragment` (whether the batch is last in fragment), and
-#             `__filename` (the name of the source file or a description of the
-#             source fragment).
-#
-#             The columns will be passed down to Datasets and corresponding data
-#             fragments to avoid loading, copying, and deserializing columns
-#             that will not be required further down the compute chain.
-#             By default all of the available columns are projected. Raises
-#             an exception if any of the referenced column names does not exist
-#             in the dataset's Schema.
-#         filter : Expression, default None
-#             Scan will return only the rows matching the filter.
-#             If possible the predicate will be pushed down to exploit the
-#             partition information or internal metadata found in the data
-#             source, e.g. Parquet statistics. Otherwise filters the loaded
-#             RecordBatches before yielding them.
-#         batch_size : int, default 131_072
-#             The maximum row count for scanned record batches. If scanned
-#             record batches are overflowing memory then this method can be
-#             called to reduce their size.
-#         batch_readahead : int, default 16
-#             The number of batches to read ahead in a file. This might not work
-#             for all file formats. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_readahead : int, default 4
-#             The number of files to read ahead. Increasing this number will increase
-#             RAM usage but could also improve IO utilization.
-#         fragment_scan_options : FragmentScanOptions, default None
-#             Options specific to a particular scan and fragment type, which
-#             can change between different scans of the same dataset.
-#         use_threads : bool, default True
-#             If enabled, then maximum parallelism will be used determined by
-#             the number of available CPU cores.
-#         cache_metadata : bool, default True
-#             If enabled, metadata may be cached when scanning to speed up
-#             repeated scans.
-#         memory_pool : MemoryPool, default None
-#             For memory allocations, if required. If not specified, uses the
-#             default pool.
-#         """
-#     @property
-#     def dataset_schema(self) -> lib.Schema:
-#         """The schema with which batches will be read from fragments."""
-#     @property
-#     def projected_schema(self) -> lib.Schema:
-#         """
-#         The materialized schema of the data, accounting for projections.
-#
-#         This is the schema of any data returned from the scanner.
-#         """
-#     def to_batches(self) -> Iterator[lib.RecordBatch]:
-#         """
-#         Consume a Scanner in record batches.
-#
-#         Returns
-#         -------
-#         record_batches : iterator of RecordBatch
-#         """
-#     def scan_batches(self) -> TaggedRecordBatchIterator:
-#         """
-#         Consume a Scanner in record batches with corresponding fragments.
-#
-#         Returns
-#         -------
-#         record_batches : iterator of TaggedRecordBatch
-#         """
-#     def to_table(self) -> lib.Table:
-#         """
-#         Convert a Scanner into a Table.
-#
-#         Use this convenience utility with care. This will serially materialize
-#         the Scan result in memory before creating the Table.
-#
-#         Returns
-#         -------
-#         Table
-#         """
-#     def take(self, indices: Indices) -> lib.Table:
-#         """
-#         Select rows of data by index.
-#
-#         Will only consume as many batches of the underlying dataset as
-#         needed. Otherwise, this is equivalent to
-#         ``to_table().take(indices)``.
-#
-#         Parameters
-#         ----------
-#         indices : Array or array-like
-#             indices of rows to select in the dataset.
-#
-#         Returns
-#         -------
-#         Table
-#         """
-#     def head(self, num_rows: int) -> lib.Table:
-#         """
-#         Load the first N rows of the dataset.
-#
-#         Parameters
-#         ----------
-#         num_rows : int
-#             The number of rows to load.
-#
-#         Returns
-#         -------
-#         Table
-#         """
-#     def count_rows(self) -> int:
-#         """
-#         Count rows matching the scanner filter.
-#
-#         Returns
-#         -------
-#         count : int
-#         """
-#     def to_reader(self) -> RecordBatchReader:
-#         """Consume this scanner as a RecordBatchReader.
-#
-#         Returns
-#         -------
-#         RecordBatchReader
-#         """
-#
-# def get_partition_keys(partition_expression: Expression) -> dict[str, Any]:
-#     """
-#     Extract partition keys (equality constraints between a field and a scalar)
-#     from an expression as a dict mapping the field's name to its value.
-#
-#     NB: All expressions yielded by a HivePartitioning or DirectoryPartitioning
-#     will be conjunctions of equality conditions and are accessible through this
-#     function. Other subexpressions will be ignored.
-#
-#     Parameters
-#     ----------
-#     partition_expression : pyarrow.dataset.Expression
-#
-#     Returns
-#     -------
-#     dict
-#
-#     Examples
-#     --------
-#
-#     For example, an expression of
-#     <pyarrow.dataset.Expression ((part == A:string) and (year == 2016:int32))>
-#     is converted to {'part': 'A', 'year': 2016}
-#     """
-#
-# class WrittenFile(lib._Weakrefable):
-#     """
-#     Metadata information about files written as
-#     part of a dataset write operation
-#
-#     Parameters
-#     ----------
-#     path : str
-#         Path to the file.
-#     metadata : pyarrow.parquet.FileMetaData, optional
-#         For Parquet files, the Parquet file metadata.
-#     size : int
-#         The size of the file in bytes.
-#     """
-#     def __init__(self, path: str, metadata: _parquet.FileMetaData | None, size: int) -> None: ...
-#
-# def _filesystemdataset_write(
-#     data: Scanner,
-#     base_dir: StrPath,
-#     basename_template: str,
-#     filesystem: SupportedFileSystem,
-#     partitioning: Partitioning,
-#     file_options: FileWriteOptions,
-#     max_partitions: int,
-#     file_visitor: Callable[[str], None],
-#     existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"],
-#     max_open_files: int,
-#     max_rows_per_file: int,
-#     min_rows_per_group: int,
-#     max_rows_per_group: int,
-#     create_dir: bool,
-# ): ...
-#
-# class _ScanNodeOptions(ExecNodeOptions):
-#     def _set_options(self, dataset: Dataset, scan_options: dict) -> None: ...
-#
-# class ScanNodeOptions(_ScanNodeOptions):
-#     """
-#     A Source node which yields batches from a Dataset scan.
-#
-#     This is the option class for the "scan" node factory.
-#
-#     This node is capable of applying pushdown projections or filters
-#     to the file readers which reduce the amount of data that needs to
-#     be read (if supported by the file format). But note that this does not
-#     construct associated filter or project nodes to perform the final
-#     filtering or projection. Rather, you may supply the same filter
-#     expression or projection to the scan node that you also supply
-#     to the filter or project node.
-#
-#     Yielded batches will be augmented with fragment/batch indices when
-#     implicit_ordering=True to enable stable ordering for simple ExecPlans.
-#
-#     Parameters
-#     ----------
-#     dataset : pyarrow.dataset.Dataset
-#         The table which acts as the data source.
-#     **kwargs : dict, optional
-#         Scan options. See `Scanner.from_dataset` for possible arguments.
-#     require_sequenced_output : bool, default False
-#         Batches are yielded sequentially, like single-threaded
-#     implicit_ordering : bool, default False
-#         Preserve implicit ordering of data.
-#     """
-#
-#     def __init__(
-#         self, dataset: Dataset, require_sequenced_output: bool = False, **kwargs
-#     ) -> None: ...
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import (
+    IO,
+    Any,
+    Callable,
+    Generic,
+    Iterator,
+    Literal,
+    NamedTuple,
+    TypeVar,
+    overload,
+)
+
+from _typeshed import StrPath
+
+from . import csv, _json, _parquet, lib
+from ._fs import FileSelector, FileSystem, SupportedFileSystem
+from ._stubs_typing import Indices, JoinType, Order
+from .acero import ExecNodeOptions
+from .compute import Expression
+from .ipc import IpcWriteOptions, RecordBatchReader
+
+class Dataset(lib._Weakrefable):
+    """
+    Collection of data fragments and potentially child datasets.
+
+    Arrow Datasets allow you to query against data that has been split across
+    multiple files. This sharding of data may indicate partitioning, which
+    can accelerate queries that only touch some partitions (files).
+    """
+
+    @property
+    def partition_expression(self) -> Expression:
+        """
+        An Expression which evaluates to true for all data viewed by this
+        Dataset.
+        """
+    def replace_schema(self, schema: lib.Schema) -> None:
+        """
+        Return a copy of this Dataset with a different schema.
+
+        The copy will view the same Fragments. If the new schema is not
+        compatible with the original dataset's schema then an error will
+        be raised.
+
+        Parameters
+        ----------
+        schema : Schema
+            The new dataset schema.
+        """
+    def get_fragments(self, filter: Expression | None = None):
+        """Returns an iterator over the fragments in this dataset.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Return fragments matching the optional filter, either using the
+            partition_expression or internal information like Parquet's
+            statistics.
+
+        Returns
+        -------
+        fragments : iterator of Fragment
+        """
+    def scanner(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Build a scan operation against the dataset.
+
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
+
+        See the :meth:`Scanner.from_dataset` method for further information.
+
+        Parameters
+        ----------
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        scanner : Scanner
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>>
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "dataset_scanner.parquet")
+
+        >>> import pyarrow.dataset as ds
+        >>> dataset = ds.dataset("dataset_scanner.parquet")
+
+        Selecting a subset of the columns:
+
+        >>> dataset.scanner(columns=["year", "n_legs"]).to_table()
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        ----
+        year: [[2020,2022,2021,2022,2019,2021]]
+        n_legs: [[2,2,4,4,5,100]]
+
+        Projecting selected columns using an expression:
+
+        >>> dataset.scanner(
+        ...     columns={
+        ...         "n_legs_uint": ds.field("n_legs").cast("uint8"),
+        ...     }
+        ... ).to_table()
+        pyarrow.Table
+        n_legs_uint: uint8
+        ----
+        n_legs_uint: [[2,2,4,4,5,100]]
+
+        Filtering rows while scanning:
+
+        >>> dataset.scanner(filter=ds.field("year") > 2020).to_table()
+        pyarrow.Table
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        year: [[2022,2021,2022,2021]]
+        n_legs: [[2,4,4,100]]
+        animal: [["Parrot","Dog","Horse","Centipede"]]
+        """
+    def to_batches(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]:
+        """
+        Read the dataset as materialized record batches.
+
+        Parameters
+        ----------
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        record_batches : iterator of RecordBatch
+        """
+    def to_table(
+        self,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Read the dataset to an Arrow table.
+
+        Note that this method reads all the selected data from the dataset
+        into memory.
+
+        Parameters
+        ----------
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Select rows of data by index.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            indices of rows to select in the dataset.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Load the first N rows of the dataset.
+
+        Parameters
+        ----------
+        num_rows : int
+            The number of rows to load.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def count_rows(
+        self,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int:
+        """
+        Count rows matching the scanner filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        count : int
+        """
+    @property
+    def schema(self) -> lib.Schema:
+        """The common schema of the full Dataset"""
+    def filter(self, expression: Expression) -> Self:
+        """
+        Apply a row filter to the dataset.
+
+        Parameters
+        ----------
+        expression : Expression
+            The filter that should be applied to the dataset.
+
+        Returns
+        -------
+        Dataset
+        """
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> InMemoryDataset:
+        """
+        Sort the Dataset by one or multiple columns.
+
+        Parameters
+        ----------
+        sorting : str or list[tuple(name, order)]
+            Name of the column to use to sort (ascending), or
+            a list of multiple sorting conditions where
+            each entry is a tuple with column name
+            and sorting order ("ascending" or "descending")
+        **kwargs : dict, optional
+            Additional sorting options.
+            As allowed by :class:`SortOptions`
+
+        Returns
+        -------
+        InMemoryDataset
+            A new dataset sorted according to the sort keys.
+        """
+    def join(
+        self,
+        right_dataset: Dataset,
+        keys: str | list[str],
+        right_keys: str | list[str] | None = None,
+        join_type: JoinType = "left outer",
+        left_suffix: str | None = None,
+        right_suffix: str | None = None,
+        coalesce_keys: bool = True,
+        use_threads: bool = True,
+    ) -> InMemoryDataset:
+        """
+        Perform a join between this dataset and another one.
+
+        Result of the join will be a new dataset, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_dataset : dataset
+            The dataset to join to the current one, acting as the right dataset
+            in the join operation.
+        keys : str or list[str]
+            The columns from current dataset that should be used as keys
+            of the join operation left side.
+        right_keys : str or list[str], default None
+            The columns from the right_dataset that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left dataset.
+        join_type : str, default "left outer"
+            The kind of join that should be performed, one of
+            ("left semi", "right semi", "left anti", "right anti",
+            "inner", "left outer", "right outer", "full outer")
+        left_suffix : str, default None
+            Which suffix to add to right column names. This prevents confusion
+            when the columns in left and right datasets have colliding names.
+        right_suffix : str, default None
+            Which suffix to add to the left column names. This prevents confusion
+            when the columns in left and right datasets have colliding names.
+        coalesce_keys : bool, default True
+            If the duplicated keys should be omitted from one of the sides
+            in the join result.
+        use_threads : bool, default True
+            Whenever to use multithreading or not.
+
+        Returns
+        -------
+        InMemoryDataset
+        """
+    def join_asof(
+        self,
+        right_dataset: Dataset,
+        on: str,
+        by: str | list[str],
+        tolerance: int,
+        right_on: str | list[str] | None = None,
+        right_by: str | list[str] | None = None,
+    ) -> InMemoryDataset:
+        """
+        Perform an asof join between this dataset and another one.
+
+        This is similar to a left-join except that we match on nearest key rather
+        than equal keys. Both datasets must be sorted by the key. This type of join
+        is most useful for time series data that are not perfectly aligned.
+
+        Optionally match on equivalent keys with "by" before searching with "on".
+
+        Result of the join will be a new Dataset, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_dataset : dataset
+            The dataset to join to the current one, acting as the right dataset
+            in the join operation.
+        on : str
+            The column from current dataset that should be used as the "on" key
+            of the join operation left side.
+
+            An inexact match is used on the "on" key, i.e. a row is considered a
+            match if and only if left_on - tolerance <= right_on <= left_on.
+
+            The input table must be sorted by the "on" key. Must be a single
+            field of a common type.
+
+            Currently, the "on" key must be an integer, date, or timestamp type.
+        by : str or list[str]
+            The columns from current dataset that should be used as the keys
+            of the join operation left side. The join operation is then done
+            only for the matches in these columns.
+        tolerance : int
+            The tolerance for inexact "on" key matching. A right row is considered
+            a match with the left row `right.on - left.on <= tolerance`. The
+            `tolerance` may be:
+
+            - negative, in which case a past-as-of-join occurs;
+            - or positive, in which case a future-as-of-join occurs;
+            - or zero, in which case an exact-as-of-join occurs.
+
+            The tolerance is interpreted in the same units as the "on" key.
+        right_on : str or list[str], default None
+            The columns from the right_dataset that should be used as the on key
+            on the join operation right side.
+            When ``None`` use the same key name as the left dataset.
+        right_by : str or list[str], default None
+            The columns from the right_dataset that should be used as by keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left dataset.
+
+        Returns
+        -------
+        InMemoryDataset
+        """
+
+class InMemoryDataset(Dataset):
+    """
+    A Dataset wrapping in-memory data.
+
+    Parameters
+    ----------
+    source : RecordBatch, Table, list, tuple
+        The data for this dataset. Can be a RecordBatch, Table, list of
+        RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
+        If an iterable is provided, the schema must also be provided.
+    schema : Schema, optional
+        Only required if passing an iterable as the source
+    """
+
+class UnionDataset(Dataset):
+    """
+    A Dataset wrapping child datasets.
+
+    Children's schemas must agree with the provided schema.
+
+    Parameters
+    ----------
+    schema : Schema
+        A known schema to conform to.
+    children : list of Dataset
+        One or more input children
+    """
+
+    @property
+    def children(self) -> list[Dataset]: ...
+
+class FileSystemDataset(Dataset):
+    """
+    A Dataset of file fragments.
+
+    A FileSystemDataset is composed of one or more FileFragment.
+
+    Parameters
+    ----------
+    fragments : list[Fragments]
+        List of fragments to consume.
+    schema : Schema
+        The top-level schema of the Dataset.
+    format : FileFormat
+        File format of the fragments, currently only ParquetFileFormat,
+        IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
+    filesystem : FileSystem
+        FileSystem of the fragments.
+    root_partition : Expression, optional
+        The top-level partition of the DataDataset.
+    """
+
+    def __init__(
+        self,
+        fragments: list[Fragment],
+        schema: lib.Schema,
+        format: FileFormat,
+        filesystem: SupportedFileSystem | None = None,
+        root_partition: Expression | None = None,
+    ) -> None: ...
+    @classmethod
+    def from_paths(
+        cls,
+        paths: list[str],
+        schema: lib.Schema | None = None,
+        format: FileFormat | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        partitions: list[Expression] | None = None,
+        root_partition: Expression | None = None,
+    ) -> FileSystemDataset:
+        """
+        A Dataset created from a list of paths on a particular filesystem.
+
+        Parameters
+        ----------
+        paths : list of str
+            List of file paths to create the fragments from.
+        schema : Schema
+            The top-level schema of the DataDataset.
+        format : FileFormat
+            File format to create fragments from, currently only
+            ParquetFileFormat, IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
+        filesystem : FileSystem
+            The filesystem which files are from.
+        partitions : list[Expression], optional
+            Attach additional partition information for the file paths.
+        root_partition : Expression, optional
+            The top-level partition of the DataDataset.
+        """
+    @property
+    def filesystem(self) -> FileSystem: ...
+    @property
+    def partitioning(self) -> Partitioning | None:
+        """
+        The partitioning of the Dataset source, if discovered.
+
+        If the FileSystemDataset is created using the ``dataset()`` factory
+        function with a partitioning specified, this will return the
+        finalized Partitioning object from the dataset discovery. In all
+        other cases, this returns None.
+        """
+    @property
+    def files(self) -> list[str]:
+        """List of the files"""
+    @property
+    def format(self) -> FileFormat:
+        """The FileFormat of this source."""
+
+class FileWriteOptions(lib._Weakrefable):
+    @property
+    def format(self) -> FileFormat: ...
+
+class FileFormat(lib._Weakrefable):
+    def inspect(
+        self, file: StrPath | IO, filesystem: SupportedFileSystem | None = None
+    ) -> lib.Schema:
+        """
+        Infer the schema of a file.
+
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to infer a schema from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+
+        Returns
+        -------
+        schema : Schema
+            The schema inferred from the file
+        """
+    def make_fragment(
+        self,
+        file: StrPath | IO,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment:
+        """
+        Make a FileFragment from a given file.
+
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to make a fragment from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+        partition_expression : Expression, optional
+            An expression that is guaranteed true for all rows in the fragment.  Allows
+            fragment to be potentially skipped while scanning with a filter.
+        file_size : int, optional
+            The size of the file in bytes. Can improve performance with high-latency filesystems
+            when file size needs to be known before reading.
+
+        Returns
+        -------
+        fragment : Fragment
+            The file fragment
+        """
+    def make_write_options(self) -> FileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+    @property
+    def default_fragment_scan_options(self) -> FragmentScanOptions: ...
+    @default_fragment_scan_options.setter
+    def default_fragment_scan_options(self, options: FragmentScanOptions) -> None: ...
+
+class Fragment(lib._Weakrefable):
+    """Fragment of data from a Dataset."""
+    @property
+    def physical_schema(self) -> lib.Schema:
+        """Return the physical schema of this Fragment. This schema can be
+        different from the dataset read schema."""
+    @property
+    def partition_expression(self) -> Expression:
+        """An Expression which evaluates to true for all data viewed by this
+        Fragment.
+        """
+    def scanner(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Build a scan operation against the fragment.
+
+        Data is not loaded immediately. Instead, this produces a Scanner,
+        which exposes further operations (e.g. loading all data as a
+        table, counting rows).
+
+        Parameters
+        ----------
+        schema : Schema
+            Schema to use for scanning. This is used to unify a Fragment to
+            its Dataset's schema. If not specified this will use the
+            Fragment's physical schema which might differ for each Fragment.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        scanner : Scanner
+        """
+    def to_batches(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Iterator[lib.RecordBatch]:
+        """
+        Read the fragment as materialized record batches.
+
+        Parameters
+        ----------
+        schema : Schema, optional
+            Concrete schema to use for scanning.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        record_batches : iterator of RecordBatch
+        """
+    def to_table(
+        self,
+        schema: lib.Schema | None = None,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Convert this Fragment into a Table.
+
+        Use this convenience utility with care. This will serially materialize
+        the Scan result in memory before creating the Table.
+
+        Parameters
+        ----------
+        schema : Schema, optional
+            Concrete schema to use for scanning.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        table : Table
+        """
+    def take(
+        self,
+        indices: Indices,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Select rows of data by index.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            The indices of row to select in the dataset.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        Table
+        """
+    def head(
+        self,
+        num_rows: int,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> lib.Table:
+        """
+        Load the first N rows of the fragment.
+
+        Parameters
+        ----------
+        num_rows : int
+            The number of rows to load.
+        columns : list of str, default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        Table
+        """
+    def count_rows(
+        self,
+        columns: list[str] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> int:
+        """
+        Count rows matching the scanner filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+
+        Returns
+        -------
+        count : int
+        """
+
+class FileFragment(Fragment):
+    """A Fragment representing a data file."""
+
+    def open(self) -> lib.NativeFile:
+        """
+        Open a NativeFile of the buffer or file viewed by this fragment.
+        """
+    @property
+    def path(self) -> str:
+        """
+        The path of the data file viewed by this fragment, if it views a
+        file. If instead it views a buffer, this will be "<Buffer>".
+        """
+    @property
+    def filesystem(self) -> FileSystem:
+        """
+        The FileSystem containing the data file viewed by this fragment, if
+        it views a file. If instead it views a buffer, this will be None.
+        """
+    @property
+    def buffer(self) -> lib.Buffer:
+        """
+        The buffer viewed by this fragment, if it views a buffer. If
+        instead it views a file, this will be None.
+        """
+    @property
+    def format(self) -> FileFormat:
+        """
+        The format of the data file viewed by this fragment.
+        """
+
+class FragmentScanOptions(lib._Weakrefable):
+    """Scan options specific to a particular fragment and scan operation."""
+
+    @property
+    def type_name(self) -> str: ...
+
+class IpcFileWriteOptions(FileWriteOptions):
+    @property
+    def write_options(self) -> IpcWriteOptions: ...
+    @write_options.setter
+    def write_options(self, write_options: IpcWriteOptions) -> None: ...
+
+class IpcFileFormat(FileFormat):
+    def equals(self, other: IpcFileFormat) -> bool: ...
+    def make_write_options(self, **kwargs) -> IpcFileWriteOptions: ...
+    @property
+    def default_extname(self) -> str: ...
+
+class FeatherFileFormat(IpcFileFormat): ...
+
+class CsvFileFormat(FileFormat):
+    """
+    FileFormat for CSV files.
+
+    Parameters
+    ----------
+    parse_options : pyarrow.csv.ParseOptions
+        Options regarding CSV parsing.
+    default_fragment_scan_options : CsvFragmentScanOptions
+        Default options for fragments scan.
+    convert_options : pyarrow.csv.ConvertOptions
+        Options regarding value conversion.
+    read_options : pyarrow.csv.ReadOptions
+        General read options.
+    """
+    def __init__(
+        self,
+        parse_options: csv.ParseOptions | None = None,
+        default_fragment_scan_options: CsvFragmentScanOptions | None = None,
+        convert_options: csv.ConvertOptions | None = None,
+        read_options: csv.ReadOptions | None = None,
+    ) -> None: ...
+    def make_write_options(self) -> csv.WriteOptions: ...  # type: ignore[override]
+    @property
+    def parse_options(self) -> csv.ParseOptions: ...
+    @parse_options.setter
+    def parse_options(self, parse_options: csv.ParseOptions) -> None: ...
+    def equals(self, other: CsvFileFormat) -> bool: ...
+
+class CsvFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for CSV fragments.
+
+    Parameters
+    ----------
+    convert_options : pyarrow.csv.ConvertOptions
+        Options regarding value conversion.
+    read_options : pyarrow.csv.ReadOptions
+        General read options.
+    """
+
+    convert_options: csv.ConvertOptions
+    read_options: csv.ReadOptions
+
+    def __init__(
+        self, convert_options: csv.ConvertOptions, read_options: csv.ReadOptions
+    ) -> None: ...
+    def equals(self, other: CsvFragmentScanOptions) -> bool: ...
+
+class CsvFileWriteOptions(FileWriteOptions):
+    write_options: csv.WriteOptions
+
+class JsonFileFormat(FileFormat):
+    """
+    FileFormat for JSON files.
+
+    Parameters
+    ----------
+    default_fragment_scan_options : JsonFragmentScanOptions
+        Default options for fragments scan.
+    parse_options : pyarrow.json.ParseOptions
+        Options regarding json parsing.
+    read_options : pyarrow.json.ReadOptions
+        General read options.
+    """
+    def __init__(
+        self,
+        default_fragment_scan_options: JsonFragmentScanOptions | None = None,
+        parse_options: _json.ParseOptions | None = None,
+        read_options: _json.ReadOptions | None = None,
+    ) -> None: ...
+    def equals(self, other: JsonFileFormat) -> bool: ...
+
+class JsonFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for JSON fragments.
+
+    Parameters
+    ----------
+    parse_options : pyarrow.json.ParseOptions
+        Options regarding JSON parsing.
+    read_options : pyarrow.json.ReadOptions
+        General read options.
+    """
+
+    parse_options: _json.ParseOptions
+    read_options: _json.ReadOptions
+    def __init__(
+        self, parse_options: _json.ParseOptions, read_options: _json.ReadOptions
+    ) -> None: ...
+    def equals(self, other: JsonFragmentScanOptions) -> bool: ...
+
+class Partitioning(lib._Weakrefable):
+    def parse(self, path: str) -> Expression:
+        """
+        Parse a path into a partition expression.
+
+        Parameters
+        ----------
+        path : str
+
+        Returns
+        -------
+        pyarrow.dataset.Expression
+        """
+    def format(self, expr: Expression) -> tuple[str, str]:
+        """
+        Convert a filter expression into a tuple of (directory, filename) using
+        the current partitioning scheme
+
+        Parameters
+        ----------
+        expr : pyarrow.dataset.Expression
+
+        Returns
+        -------
+        tuple[str, str]
+
+        Examples
+        --------
+
+        Specify the Schema for paths like "/2009/June":
+
+        >>> import pyarrow as pa
+        >>> import pyarrow.dataset as ds
+        >>> import pyarrow.compute as pc
+        >>> part = ds.partitioning(pa.schema([("year", pa.int16()), ("month", pa.string())]))
+        >>> part.format((pc.field("year") == 1862) & (pc.field("month") == "Jan"))
+        ('1862/Jan', '')
+        """
+    @property
+    def schema(self) -> lib.Schema:
+        """The arrow Schema attached to the partitioning."""
+
+class PartitioningFactory(lib._Weakrefable):
+    @property
+    def type_name(self) -> str: ...
+
+class KeyValuePartitioning(Partitioning):
+    @property
+    def dictionaries(self) -> list[lib.Array | None]:
+        """
+        The unique values for each partition field, if available.
+
+        Those values are only available if the Partitioning object was
+        created through dataset discovery from a PartitioningFactory, or
+        if the dictionaries were manually specified in the constructor.
+        If no dictionary field is available, this returns an empty list.
+        """
+
+class DirectoryPartitioning(KeyValuePartitioning):
+    """
+    A Partitioning based on a specified Schema.
+
+    The DirectoryPartitioning expects one segment in the file path for each
+    field in the schema (all fields are required to be present).
+    For example given schema<year:int16, month:int8> the path "/2009/11" would
+    be parsed to ("year"_ == 2009 and "month"_ == 11).
+
+    Parameters
+    ----------
+    schema : Schema
+        The schema that describes the partitions present in the file path.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing.
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+    Returns
+    -------
+    DirectoryPartitioning
+
+    Examples
+    --------
+    >>> from pyarrow.dataset import DirectoryPartitioning
+    >>> partitioning = DirectoryPartitioning(
+    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
+    ... )
+    >>> print(partitioning.parse("/2009/11/"))
+    ((year == 2009) and (month == 11))
+    """
+
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory:
+        """
+        Discover a DirectoryPartitioning.
+
+        Parameters
+        ----------
+        field_names : list of str
+            The names to associate with the values from the subdirectory names.
+            If schema is given, will be populated from the schema.
+        infer_dictionary : bool, default False
+            When inferring a schema for partition fields, yield dictionary
+            encoded types instead of plain types. This can be more efficient
+            when materializing virtual columns, and Expressions parsed by the
+            finished Partitioning will include dictionaries of all unique
+            inspected values for each field.
+        max_partition_dictionary_size : int, default 0
+            Synonymous with infer_dictionary for backwards compatibility with
+            1.0: setting this to -1 or None is equivalent to passing
+            infer_dictionary=True.
+        schema : Schema, default None
+            Use this schema instead of inferring a schema from partition
+            values. Partition values will be validated against this schema
+            before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+        Returns
+        -------
+        PartitioningFactory
+            To be used in the FileSystemFactoryOptions.
+        """
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+
+class HivePartitioning(KeyValuePartitioning):
+    """
+    A Partitioning for "/$key=$value/" nested directories as found in
+    Apache Hive.
+
+    Multi-level, directory based partitioning scheme originating from
+    Apache Hive with all data files stored in the leaf directories. Data is
+    partitioned by static values of a particular column in the schema.
+    Partition keys are represented in the form $key=$value in directory names.
+    Field order is ignored, as are missing or unrecognized field names.
+
+    For example, given schema<year:int16, month:int8, day:int8>, a possible
+    path would be "/year=2009/month=11/day=15".
+
+    Parameters
+    ----------
+    schema : Schema
+        The schema that describes the partitions present in the file path.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing.
+    null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
+        If any field is None then this fallback will be used as a label
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+    Returns
+    -------
+    HivePartitioning
+
+    Examples
+    --------
+    >>> from pyarrow.dataset import HivePartitioning
+    >>> partitioning = HivePartitioning(pa.schema([("year", pa.int16()), ("month", pa.int8())]))
+    >>> print(partitioning.parse("/year=2009/month=11/"))
+    ((year == 2009) and (month == 11))
+
+    """
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        null_fallback: str = "__HIVE_DEFAULT_PARTITION__",
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+    @staticmethod
+    def discover(
+        infer_dictionary: bool = False,
+        max_partition_dictionary_size: int = 0,
+        null_fallback="__HIVE_DEFAULT_PARTITION__",
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory:
+        """
+        Discover a HivePartitioning.
+
+        Parameters
+        ----------
+        infer_dictionary : bool, default False
+            When inferring a schema for partition fields, yield dictionary
+            encoded types instead of plain. This can be more efficient when
+            materializing virtual columns, and Expressions parsed by the
+            finished Partitioning will include dictionaries of all unique
+            inspected values for each field.
+        max_partition_dictionary_size : int, default 0
+            Synonymous with infer_dictionary for backwards compatibility with
+            1.0: setting this to -1 or None is equivalent to passing
+            infer_dictionary=True.
+        null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
+            When inferring a schema for partition fields this value will be
+            replaced by null.  The default is set to __HIVE_DEFAULT_PARTITION__
+            for compatibility with Spark
+        schema : Schema, default None
+            Use this schema instead of inferring a schema from partition
+            values. Partition values will be validated against this schema
+            before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+        Returns
+        -------
+        PartitioningFactory
+            To be used in the FileSystemFactoryOptions.
+        """
+
+class FilenamePartitioning(KeyValuePartitioning):
+    """
+    A Partitioning based on a specified Schema.
+
+    The FilenamePartitioning expects one segment in the file name for each
+    field in the schema (all fields are required to be present) separated
+    by '_'. For example given schema<year:int16, month:int8> the name
+    ``"2009_11_"`` would be parsed to ("year" == 2009 and "month" == 11).
+
+    Parameters
+    ----------
+    schema : Schema
+        The schema that describes the partitions present in the file path.
+    dictionaries : dict[str, Array]
+        If the type of any field of `schema` is a dictionary type, the
+        corresponding entry of `dictionaries` must be an array containing
+        every value which may be taken by the corresponding column or an
+        error will be raised in parsing.
+    segment_encoding : str, default "uri"
+        After splitting paths into segments, decode the segments. Valid
+        values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+    Returns
+    -------
+    FilenamePartitioning
+
+    Examples
+    --------
+    >>> from pyarrow.dataset import FilenamePartitioning
+    >>> partitioning = FilenamePartitioning(
+    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
+    ... )
+    >>> print(partitioning.parse("2009_11_data.parquet"))
+    ((year == 2009) and (month == 11))
+    """
+
+    def __init__(
+        self,
+        schema: lib.Schema,
+        dictionaries: dict[str, lib.Array] | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> None: ...
+    @staticmethod
+    def discover(
+        field_names: list[str] | None = None,
+        infer_dictionary: bool = False,
+        schema: lib.Schema | None = None,
+        segment_encoding: Literal["uri", "none"] = "uri",
+    ) -> PartitioningFactory:
+        """
+        Discover a FilenamePartitioning.
+
+        Parameters
+        ----------
+        field_names : list of str
+            The names to associate with the values from the subdirectory names.
+            If schema is given, will be populated from the schema.
+        infer_dictionary : bool, default False
+            When inferring a schema for partition fields, yield dictionary
+            encoded types instead of plain types. This can be more efficient
+            when materializing virtual columns, and Expressions parsed by the
+            finished Partitioning will include dictionaries of all unique
+            inspected values for each field.
+        schema : Schema, default None
+            Use this schema instead of inferring a schema from partition
+            values. Partition values will be validated against this schema
+            before accumulation into the Partitioning's dictionary.
+        segment_encoding : str, default "uri"
+            After splitting paths into segments, decode the segments. Valid
+            values are "uri" (URI-decode segments) and "none" (leave as-is).
+
+        Returns
+        -------
+        PartitioningFactory
+            To be used in the FileSystemFactoryOptions.
+        """
+
+class DatasetFactory(lib._Weakrefable):
+    """
+    DatasetFactory is used to create a Dataset, inspect the Schema
+    of the fragments contained in it, and declare a partitioning.
+    """
+
+    root_partition: Expression
+    def finish(self, schema: lib.Schema | None = None) -> Dataset:
+        """
+        Create a Dataset using the inspected schema or an explicit schema
+        (if given).
+
+        Parameters
+        ----------
+        schema : Schema, default None
+            The schema to conform the source to.  If None, the inspected
+            schema is used.
+
+        Returns
+        -------
+        Dataset
+        """
+    def inspect(self) -> lib.Schema:
+        """
+        Inspect all data fragments and return a common Schema.
+
+        Returns
+        -------
+        Schema
+        """
+    def inspect_schemas(self) -> list[lib.Schema]: ...
+
+class FileSystemFactoryOptions(lib._Weakrefable):
+    """
+    Influences the discovery of filesystem paths.
+
+    Parameters
+    ----------
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+    partitioning : Partitioning/PartitioningFactory, optional
+       Apply the Partitioning to every discovered Fragment. See Partitioning or
+       PartitioningFactory documentation.
+    exclude_invalid_files : bool, optional (default True)
+        If True, invalid files will be excluded (file format specific check).
+        This will incur IO for each files in a serial and single threaded
+        fashion. Disabling this feature will skip the IO, but unsupported
+        files may be present in the Dataset (resulting in an error at scan
+        time).
+    selector_ignore_prefixes : list, optional
+        When discovering from a Selector (and not from an explicit file list),
+        ignore files and directories matching any of these prefixes.
+        By default this is ['.', '_'].
+    """
+
+    partitioning: Partitioning
+    partitioning_factory: PartitioningFactory
+    partition_base_dir: str
+    exclude_invalid_files: bool
+    selector_ignore_prefixes: list[str]
+
+    def __init__(
+        self,
+        artition_base_dir: str | None = None,
+        partitioning: Partitioning | PartitioningFactory | None = None,
+        exclude_invalid_files: bool = True,
+        selector_ignore_prefixes: list[str] | None = None,
+    ) -> None: ...
+
+class FileSystemDatasetFactory(DatasetFactory):
+    """
+    Create a DatasetFactory from a list of paths with schema inspection.
+
+    Parameters
+    ----------
+    filesystem : pyarrow.fs.FileSystem
+        Filesystem to discover.
+    paths_or_selector : pyarrow.fs.FileSelector or list of path-likes
+        Either a Selector object or a list of path-like objects.
+    format : FileFormat
+        Currently only ParquetFileFormat and IpcFileFormat are supported.
+    options : FileSystemFactoryOptions, optional
+        Various flags influencing the discovery of filesystem paths.
+    """
+
+    def __init__(
+        self,
+        filesystem: SupportedFileSystem,
+        paths_or_selector: FileSelector,
+        format: FileFormat,
+        options: FileSystemFactoryOptions | None = None,
+    ) -> None: ...
+
+class UnionDatasetFactory(DatasetFactory):
+    """
+    Provides a way to inspect/discover a Dataset's expected schema before
+    materialization.
+
+    Parameters
+    ----------
+    factories : list of DatasetFactory
+    """
+    def __init__(self, factories: list[DatasetFactory]) -> None: ...
+
+_RecordBatchT = TypeVar("_RecordBatchT", bound=lib.RecordBatch)
+
+class RecordBatchIterator(lib._Weakrefable, Generic[_RecordBatchT]):
+    """An iterator over a sequence of record batches."""
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> _RecordBatchT: ...
+
+class TaggedRecordBatch(NamedTuple):
+    """
+    A combination of a record batch and the fragment it came from.
+
+    Parameters
+    ----------
+    record_batch : RecordBatch
+        The record batch.
+    fragment : Fragment
+        Fragment of the record batch.
+    """
+
+    record_batch: lib.RecordBatch
+    fragment: Fragment
+
+class TaggedRecordBatchIterator(lib._Weakrefable):
+    """An iterator over a sequence of record batches with fragments."""
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> TaggedRecordBatch: ...
+
+class Scanner(lib._Weakrefable):
+    """A materialized scan operation with context and options bound.
+
+    A scanner is the class that glues the scan tasks, data fragments and data
+    sources together.
+    """
+    @staticmethod
+    def from_dataset(
+        dataset: Dataset,
+        *,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Create Scanner from Dataset,
+
+        Parameters
+        ----------
+        dataset : Dataset
+            Dataset to scan.
+        columns : list[str] or dict[str, Expression], default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+        """
+    @staticmethod
+    def from_fragment(
+        fragment: Fragment,
+        *,
+        schema: lib.Schema | None = None,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Create Scanner from Fragment,
+
+        Parameters
+        ----------
+        fragment : Fragment
+            fragment to scan.
+        schema : Schema, optional
+            The schema of the fragment.
+        columns : list[str] or dict[str, Expression], default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+        """
+    @staticmethod
+    def from_batches(
+        source: Iterator[lib.RecordBatch] | RecordBatchReader,
+        *,
+        schema: lib.Schema | None = None,
+        columns: list[str] | dict[str, Expression] | None = None,
+        filter: Expression | None = None,
+        batch_size: int = ...,
+        batch_readahead: int = 16,
+        fragment_readahead: int = 4,
+        fragment_scan_options: FragmentScanOptions | None = None,
+        use_threads: bool = True,
+        cache_metadata: bool = True,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> Scanner:
+        """
+        Create a Scanner from an iterator of batches.
+
+        This creates a scanner which can be used only once. It is
+        intended to support writing a dataset (which takes a scanner)
+        from a source which can be read only once (e.g. a
+        RecordBatchReader or generator).
+
+        Parameters
+        ----------
+        source : Iterator or Arrow-compatible stream object
+            The iterator of Batches. This can be a pyarrow RecordBatchReader,
+            any object that implements the Arrow PyCapsule Protocol for
+            streams, or an actual Python iterator of RecordBatches.
+        schema : Schema
+            The schema of the batches (required when passing a Python
+            iterator).
+        columns : list[str] or dict[str, Expression], default None
+            The columns to project. This can be a list of column names to
+            include (order and duplicates will be preserved), or a dictionary
+            with {new_column_name: expression} values for more advanced
+            projections.
+
+            The list of columns or expressions may use the special fields
+            `__batch_index` (the index of the batch within the fragment),
+            `__fragment_index` (the index of the fragment within the dataset),
+            `__last_in_fragment` (whether the batch is last in fragment), and
+            `__filename` (the name of the source file or a description of the
+            source fragment).
+
+            The columns will be passed down to Datasets and corresponding data
+            fragments to avoid loading, copying, and deserializing columns
+            that will not be required further down the compute chain.
+            By default all of the available columns are projected. Raises
+            an exception if any of the referenced column names does not exist
+            in the dataset's Schema.
+        filter : Expression, default None
+            Scan will return only the rows matching the filter.
+            If possible the predicate will be pushed down to exploit the
+            partition information or internal metadata found in the data
+            source, e.g. Parquet statistics. Otherwise filters the loaded
+            RecordBatches before yielding them.
+        batch_size : int, default 131_072
+            The maximum row count for scanned record batches. If scanned
+            record batches are overflowing memory then this method can be
+            called to reduce their size.
+        batch_readahead : int, default 16
+            The number of batches to read ahead in a file. This might not work
+            for all file formats. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_readahead : int, default 4
+            The number of files to read ahead. Increasing this number will increase
+            RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
+        """
+    @property
+    def dataset_schema(self) -> lib.Schema:
+        """The schema with which batches will be read from fragments."""
+    @property
+    def projected_schema(self) -> lib.Schema:
+        """
+        The materialized schema of the data, accounting for projections.
+
+        This is the schema of any data returned from the scanner.
+        """
+    def to_batches(self) -> Iterator[lib.RecordBatch]:
+        """
+        Consume a Scanner in record batches.
+
+        Returns
+        -------
+        record_batches : iterator of RecordBatch
+        """
+    def scan_batches(self) -> TaggedRecordBatchIterator:
+        """
+        Consume a Scanner in record batches with corresponding fragments.
+
+        Returns
+        -------
+        record_batches : iterator of TaggedRecordBatch
+        """
+    def to_table(self) -> lib.Table:
+        """
+        Convert a Scanner into a Table.
+
+        Use this convenience utility with care. This will serially materialize
+        the Scan result in memory before creating the Table.
+
+        Returns
+        -------
+        Table
+        """
+    def take(self, indices: Indices) -> lib.Table:
+        """
+        Select rows of data by index.
+
+        Will only consume as many batches of the underlying dataset as
+        needed. Otherwise, this is equivalent to
+        ``to_table().take(indices)``.
+
+        Parameters
+        ----------
+        indices : Array or array-like
+            indices of rows to select in the dataset.
+
+        Returns
+        -------
+        Table
+        """
+    def head(self, num_rows: int) -> lib.Table:
+        """
+        Load the first N rows of the dataset.
+
+        Parameters
+        ----------
+        num_rows : int
+            The number of rows to load.
+
+        Returns
+        -------
+        Table
+        """
+    def count_rows(self) -> int:
+        """
+        Count rows matching the scanner filter.
+
+        Returns
+        -------
+        count : int
+        """
+    def to_reader(self) -> RecordBatchReader:
+        """Consume this scanner as a RecordBatchReader.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+
+def get_partition_keys(partition_expression: Expression) -> dict[str, Any]:
+    """
+    Extract partition keys (equality constraints between a field and a scalar)
+    from an expression as a dict mapping the field's name to its value.
+
+    NB: All expressions yielded by a HivePartitioning or DirectoryPartitioning
+    will be conjunctions of equality conditions and are accessible through this
+    function. Other subexpressions will be ignored.
+
+    Parameters
+    ----------
+    partition_expression : pyarrow.dataset.Expression
+
+    Returns
+    -------
+    dict
+
+    Examples
+    --------
+
+    For example, an expression of
+    <pyarrow.dataset.Expression ((part == A:string) and (year == 2016:int32))>
+    is converted to {'part': 'A', 'year': 2016}
+    """
+
+class WrittenFile(lib._Weakrefable):
+    """
+    Metadata information about files written as
+    part of a dataset write operation
+
+    Parameters
+    ----------
+    path : str
+        Path to the file.
+    metadata : pyarrow.parquet.FileMetaData, optional
+        For Parquet files, the Parquet file metadata.
+    size : int
+        The size of the file in bytes.
+    """
+    def __init__(self, path: str, metadata: _parquet.FileMetaData | None, size: int) -> None: ...
+
+def _filesystemdataset_write(
+    data: Scanner,
+    base_dir: StrPath,
+    basename_template: str,
+    filesystem: SupportedFileSystem,
+    partitioning: Partitioning,
+    file_options: FileWriteOptions,
+    max_partitions: int,
+    file_visitor: Callable[[str], None],
+    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"],
+    max_open_files: int,
+    max_rows_per_file: int,
+    min_rows_per_group: int,
+    max_rows_per_group: int,
+    create_dir: bool,
+): ...
+
+class _ScanNodeOptions(ExecNodeOptions):
+    def _set_options(self, dataset: Dataset, scan_options: dict) -> None: ...
+
+class ScanNodeOptions(_ScanNodeOptions):
+    """
+    A Source node which yields batches from a Dataset scan.
+
+    This is the option class for the "scan" node factory.
+
+    This node is capable of applying pushdown projections or filters
+    to the file readers which reduce the amount of data that needs to
+    be read (if supported by the file format). But note that this does not
+    construct associated filter or project nodes to perform the final
+    filtering or projection. Rather, you may supply the same filter
+    expression or projection to the scan node that you also supply
+    to the filter or project node.
+
+    Yielded batches will be augmented with fragment/batch indices when
+    implicit_ordering=True to enable stable ordering for simple ExecPlans.
+
+    Parameters
+    ----------
+    dataset : pyarrow.dataset.Dataset
+        The table which acts as the data source.
+    **kwargs : dict, optional
+        Scan options. See `Scanner.from_dataset` for possible arguments.
+    require_sequenced_output : bool, default False
+        Batches are yielded sequentially, like single-threaded
+    implicit_ordering : bool, default False
+        Preserve implicit ordering of data.
+    """
+
+    def __init__(
+        self, dataset: Dataset, require_sequenced_output: bool = False, **kwargs
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/_dataset_orc.pyi b/python/pyarrow-stubs/_dataset_orc.pyi
new file mode 100644
index 00000000000..9c4ac04198f
--- /dev/null
+++ b/python/pyarrow-stubs/_dataset_orc.pyi
@@ -0,0 +1,6 @@
+from ._dataset import FileFormat
+
+class OrcFileFormat(FileFormat):
+    def equals(self, other: OrcFileFormat) -> bool: ...
+    @property
+    def default_extname(self): ...
diff --git a/python/pyarrow-stubs/_dataset_parquet.pyi b/python/pyarrow-stubs/_dataset_parquet.pyi
new file mode 100644
index 00000000000..cbcc17235f1
--- /dev/null
+++ b/python/pyarrow-stubs/_dataset_parquet.pyi
@@ -0,0 +1,314 @@
+from dataclasses import dataclass
+from typing import IO, Any, Iterable, TypedDict
+
+from _typeshed import StrPath
+
+from ._compute import Expression
+from ._dataset import (
+    DatasetFactory,
+    FileFormat,
+    FileFragment,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+)
+from ._dataset_parquet_encryption import ParquetDecryptionConfig
+from ._fs import SupportedFileSystem
+from ._parquet import FileDecryptionProperties, FileMetaData
+from .lib import CacheOptions, Schema, _Weakrefable
+
+parquet_encryption_enabled: bool
+
+class ParquetFileFormat(FileFormat):
+    """
+    FileFormat for Parquet
+
+    Parameters
+    ----------
+    read_options : ParquetReadOptions
+        Read options for the file.
+    default_fragment_scan_options : ParquetFragmentScanOptions
+        Scan Options for the file.
+    **kwargs : dict
+        Additional options for read option or scan option
+    """
+    def __init__(
+        self,
+        read_options: ParquetReadOptions | None = None,
+        default_fragment_scan_options: ParquetFragmentScanOptions | None = None,
+        **kwargs,
+    ) -> None: ...
+    @property
+    def read_options(self) -> ParquetReadOptions: ...
+    def make_write_options(self) -> ParquetFileWriteOptions: ...  # type: ignore[override]
+    def equals(self, other: ParquetFileFormat) -> bool: ...
+    @property
+    def default_extname(self) -> str: ...
+    def make_fragment(
+        self,
+        file: StrPath | IO,
+        filesystem: SupportedFileSystem | None = None,
+        partition_expression: Expression | None = None,
+        row_groups: Iterable[int] | None = None,
+        *,
+        file_size: int | None = None,
+    ) -> Fragment:
+        """
+        Make a FileFragment from a given file.
+
+        Parameters
+        ----------
+        file : file-like object, path-like or str
+            The file or file path to make a fragment from.
+        filesystem : Filesystem, optional
+            If `filesystem` is given, `file` must be a string and specifies
+            the path of the file to read from the filesystem.
+        partition_expression : Expression, optional
+            An expression that is guaranteed true for all rows in the fragment.  Allows
+            fragment to be potentially skipped while scanning with a filter.
+        row_groups : Iterable, optional
+            The indices of the row groups to include
+        file_size : int, optional
+            The size of the file in bytes. Can improve performance with high-latency filesystems
+            when file size needs to be known before reading.
+
+        Returns
+        -------
+        fragment : Fragment
+            The file fragment
+        """
+
+class _NameStats(TypedDict):
+    min: Any
+    max: Any
+
+class RowGroupInfo:
+    """
+    A wrapper class for RowGroup information
+
+    Parameters
+    ----------
+    id : integer
+        The group ID.
+    metadata : FileMetaData
+        The rowgroup metadata.
+    schema : Schema
+        Schema of the rows.
+    """
+
+    id: int
+    metadata: FileMetaData
+    schema: Schema
+
+    def __init__(self, id: int, metadata: FileMetaData, schema: Schema) -> None: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def statistics(self) -> dict[str, _NameStats]: ...
+
+class ParquetFileFragment(FileFragment):
+    """A Fragment representing a parquet file."""
+
+    def ensure_complete_metadata(self) -> None: ...
+    @property
+    def row_groups(self) -> list[RowGroupInfo]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def num_row_groups(self) -> int:
+        """
+        Return the number of row groups viewed by this fragment (not the
+        number of row groups in the origin file).
+        """
+    def split_by_row_group(
+        self, filter: Expression | None = None, schema: Schema | None = None
+    ) -> list[Fragment]:
+        """
+        Split the fragment into multiple fragments.
+
+        Yield a Fragment wrapping each row group in this ParquetFileFragment.
+        Row groups will be excluded whose metadata contradicts the optional
+        filter.
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's physical schema
+
+        Returns
+        -------
+        A list of Fragments
+        """
+    def subset(
+        self,
+        filter: Expression | None = None,
+        schema: Schema | None = None,
+        row_group_ids: list[int] | None = None,
+    ) -> ParquetFileFormat:
+        """
+        Create a subset of the fragment (viewing a subset of the row groups).
+
+        Subset can be specified by either a filter predicate (with optional
+        schema) or by a list of row group IDs. Note that when using a filter,
+        the resulting fragment can be empty (viewing no row groups).
+
+        Parameters
+        ----------
+        filter : Expression, default None
+            Only include the row groups which satisfy this predicate (using
+            the Parquet RowGroup statistics).
+        schema : Schema, default None
+            Schema to use when filtering row groups. Defaults to the
+            Fragment's physical schema
+        row_group_ids : list of ints
+            The row group IDs to include in the subset. Can only be specified
+            if `filter` is None.
+
+        Returns
+        -------
+        ParquetFileFragment
+        """
+
+class ParquetReadOptions(_Weakrefable):
+    """
+    Parquet format specific options for reading.
+
+    Parameters
+    ----------
+    dictionary_columns : list of string, default None
+        Names of columns which should be dictionary encoded as
+        they are read
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds
+    """
+    def __init__(
+        self, dictionary_columns: list[str] | None, coerce_int96_timestamp_unit: str | None = None
+    ) -> None: ...
+    @property
+    def coerce_int96_timestamp_unit(self) -> str: ...
+    @coerce_int96_timestamp_unit.setter
+    def coerce_int96_timestamp_unit(self, unit: str) -> None: ...
+    def equals(self, other: ParquetReadOptions) -> bool: ...
+
+class ParquetFileWriteOptions(FileWriteOptions):
+    def update(self, **kwargs) -> None: ...
+    def _set_properties(self) -> None: ...
+    def _set_arrow_properties(self) -> None: ...
+    def _set_encryption_config(self) -> None: ...
+
+@dataclass(kw_only=True)
+class ParquetFragmentScanOptions(FragmentScanOptions):
+    """
+    Scan-specific options for Parquet fragments.
+
+    Parameters
+    ----------
+    use_buffered_stream : bool, default False
+        Read files through buffered input streams rather than loading entire
+        row groups at once. This may be enabled to reduce memory overhead.
+        Disabled by default.
+    buffer_size : int, default 8192
+        Size of buffered stream, if enabled. Default is 8KB.
+    pre_buffer : bool, default True
+        If enabled, pre-buffer the raw Parquet data instead of issuing one
+        read per column chunk. This can improve performance on high-latency
+        filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
+        parallel using a background I/O thread pool.
+        Set to False if you want to prioritize minimal memory usage
+        over maximum speed.
+    cache_options : pyarrow.CacheOptions, default None
+        Cache options used when pre_buffer is enabled. The default values should
+        be good for most use cases. You may want to adjust these for example if
+        you have exceptionally high latency to the file system.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None
+        If not None, use the provided ParquetDecryptionConfig to decrypt the
+        Parquet file.
+    decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None
+        If not None, use the provided FileDecryptionProperties to decrypt encrypted
+        Parquet file.
+    page_checksum_verification : bool, default False
+        If True, verify the page checksum for each page read from the file.
+    """
+
+    use_buffered_stream: bool = False
+    buffer_size: int = 8192
+    pre_buffer: bool = True
+    cache_options: CacheOptions | None = None
+    thrift_string_size_limit: int | None = None
+    thrift_container_size_limit: int | None = None
+    decryption_config: ParquetDecryptionConfig | None = None
+    decryption_properties: FileDecryptionProperties | None = None
+    page_checksum_verification: bool = False
+
+    def equals(self, other: ParquetFragmentScanOptions) -> bool: ...
+
+@dataclass
+class ParquetFactoryOptions(_Weakrefable):
+    """
+    Influences the discovery of parquet dataset.
+
+    Parameters
+    ----------
+    partition_base_dir : str, optional
+        For the purposes of applying the partitioning, paths will be
+        stripped of the partition_base_dir. Files not matching the
+        partition_base_dir prefix will be skipped for partitioning discovery.
+        The ignored files will still be part of the Dataset, but will not
+        have partition information.
+    partitioning : Partitioning, PartitioningFactory, optional
+        The partitioning scheme applied to fragments, see ``Partitioning``.
+    validate_column_chunk_paths : bool, default False
+        Assert that all ColumnChunk paths are consistent. The parquet spec
+        allows for ColumnChunk data to be stored in multiple files, but
+        ParquetDatasetFactory supports only a single file with all ColumnChunk
+        data. If this flag is set construction of a ParquetDatasetFactory will
+        raise an error if ColumnChunk data is not resident in a single file.
+    """
+
+    partition_base_dir: str | None = None
+    partitioning: Partitioning | PartitioningFactory | None = None
+    validate_column_chunk_paths: bool = False
+
+class ParquetDatasetFactory(DatasetFactory):
+    """
+    Create a ParquetDatasetFactory from a Parquet `_metadata` file.
+
+    Parameters
+    ----------
+    metadata_path : str
+        Path to the `_metadata` parquet metadata-only file generated with
+        `pyarrow.parquet.write_metadata`.
+    filesystem : pyarrow.fs.FileSystem
+        Filesystem to read the metadata_path from, and subsequent parquet
+        files.
+    format : ParquetFileFormat
+        Parquet format options.
+    options : ParquetFactoryOptions, optional
+        Various flags influencing the discovery of filesystem paths.
+    """
+    def __init__(
+        self,
+        metadata_path: str,
+        filesystem: SupportedFileSystem,
+        format: FileFormat,
+        options: ParquetFactoryOptions | None = None,
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/_dataset_parquet_encryption.pyi b/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
new file mode 100644
index 00000000000..7623275b865
--- /dev/null
+++ b/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
@@ -0,0 +1,85 @@
+from ._dataset_parquet import ParquetFileWriteOptions, ParquetFragmentScanOptions
+from ._parquet import FileDecryptionProperties
+from ._parquet_encryption import CryptoFactory, EncryptionConfiguration, KmsConnectionConfig
+from .lib import _Weakrefable
+
+class ParquetEncryptionConfig(_Weakrefable):
+    """
+    Core configuration class encapsulating parameters for high-level encryption
+    within the Parquet framework.
+
+    The ParquetEncryptionConfig class serves as a bridge for passing encryption-related
+    parameters to the appropriate components within the Parquet library. It maintains references
+    to objects that define the encryption strategy, Key Management Service (KMS) configuration,
+    and specific encryption configurations for Parquet data.
+
+    Parameters
+    ----------
+    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
+        Shared pointer to a `CryptoFactory` object. The `CryptoFactory` is responsible for
+        creating cryptographic components, such as encryptors and decryptors.
+    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
+        Shared pointer to a `KmsConnectionConfig` object. This object holds the configuration
+        parameters necessary for connecting to a Key Management Service (KMS).
+    encryption_config : pyarrow.parquet.encryption.EncryptionConfiguration
+        Shared pointer to an `EncryptionConfiguration` object. This object defines specific
+        encryption settings for Parquet data, including the keys assigned to different columns.
+
+    Raises
+    ------
+    ValueError
+        Raised if `encryption_config` is None.
+    """
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> None: ...
+
+class ParquetDecryptionConfig(_Weakrefable):
+    """
+    Core configuration class encapsulating parameters for high-level decryption
+    within the Parquet framework.
+
+    ParquetDecryptionConfig is designed to pass decryption-related parameters to
+    the appropriate decryption components within the Parquet library. It holds references to
+    objects that define the decryption strategy, Key Management Service (KMS) configuration,
+    and specific decryption configurations for reading encrypted Parquet data.
+
+    Parameters
+    ----------
+    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
+        Shared pointer to a `CryptoFactory` object, pivotal in creating cryptographic
+        components for the decryption process.
+    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
+        Shared pointer to a `KmsConnectionConfig` object, containing parameters necessary
+        for connecting to a Key Management Service (KMS) during decryption.
+    decryption_config : pyarrow.parquet.encryption.DecryptionConfiguration
+        Shared pointer to a `DecryptionConfiguration` object, specifying decryption settings
+        for reading encrypted Parquet data.
+
+    Raises
+    ------
+    ValueError
+        Raised if `decryption_config` is None.
+    """
+    def __init__(
+        self,
+        crypto_factory: CryptoFactory,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> None: ...
+
+def set_encryption_config(
+    opts: ParquetFileWriteOptions,
+    config: ParquetEncryptionConfig,
+) -> None: ...
+def set_decryption_properties(
+    opts: ParquetFragmentScanOptions,
+    config: FileDecryptionProperties,
+): ...
+def set_decryption_config(
+    opts: ParquetFragmentScanOptions,
+    config: ParquetDecryptionConfig,
+): ...
diff --git a/python/pyarrow-stubs/_feather.pyi b/python/pyarrow-stubs/_feather.pyi
new file mode 100644
index 00000000000..8bb914ba45d
--- /dev/null
+++ b/python/pyarrow-stubs/_feather.pyi
@@ -0,0 +1,29 @@
+from typing import IO
+
+from _typeshed import StrPath
+
+from .lib import Buffer, NativeFile, Table, _Weakrefable
+
+class FeatherError(Exception): ...
+
+def write_feather(
+    table: Table,
+    dest: StrPath | IO | NativeFile,
+    compression: str | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: int = 2,
+): ...
+
+class FeatherReader(_Weakrefable):
+    def __init__(
+        self,
+        source: StrPath | IO | NativeFile | Buffer,
+        use_memory_map: bool,
+        use_threads: bool,
+    ) -> None: ...
+    @property
+    def version(self) -> str: ...
+    def read(self) -> Table: ...
+    def read_indices(self, indices: list[int]) -> Table: ...
+    def read_names(self, names: list[str]) -> Table: ...
diff --git a/python/pyarrow-stubs/_flight.pyi b/python/pyarrow-stubs/_flight.pyi
new file mode 100644
index 00000000000..4450c42df49
--- /dev/null
+++ b/python/pyarrow-stubs/_flight.pyi
@@ -0,0 +1,1380 @@
+import asyncio
+import enum
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import Generator, Generic, Iterable, Iterator, NamedTuple, TypeVar
+
+from typing_extensions import deprecated
+
+from .ipc import _ReadPandasMixin
+from .lib import (
+    ArrowCancelled,
+    ArrowException,
+    ArrowInvalid,
+    Buffer,
+    IpcReadOptions,
+    IpcWriteOptions,
+    RecordBatch,
+    RecordBatchReader,
+    Schema,
+    Table,
+    TimestampScalar,
+    _CRecordBatchWriter,
+    _Weakrefable,
+)
+
+_T = TypeVar("_T")
+
+class FlightCallOptions(_Weakrefable):
+    """RPC-layer options for a Flight call."""
+
+    def __init__(
+        self,
+        timeout: float | None = None,
+        write_options: IpcWriteOptions | None = None,
+        headers: list[tuple[str, str]] | None = None,
+        read_options: IpcReadOptions | None = None,
+    ) -> None:
+        """Create call options.
+
+        Parameters
+        ----------
+        timeout : float, None
+            A timeout for the call, in seconds. None means that the
+            timeout defaults to an implementation-specific value.
+        write_options : pyarrow.ipc.IpcWriteOptions, optional
+            IPC write options. The default options can be controlled
+            by environment variables (see pyarrow.ipc).
+        headers : List[Tuple[str, str]], optional
+            A list of arbitrary headers as key, value tuples
+        read_options : pyarrow.ipc.IpcReadOptions, optional
+            Serialization options for reading IPC format.
+        """
+
+class CertKeyPair(NamedTuple):
+    """A TLS certificate and key for use in Flight."""
+
+    cert: str
+    key: str
+
+class FlightError(Exception):
+    """
+    The base class for Flight-specific errors.
+
+    A server may raise this class or one of its subclasses to provide
+    a more detailed error to clients.
+
+    Parameters
+    ----------
+    message : str, optional
+        The error message.
+    extra_info : bytes, optional
+        Extra binary error details that were provided by the
+        server/will be sent to the client.
+
+    Attributes
+    ----------
+    extra_info : bytes
+        Extra binary error details that were provided by the
+        server/will be sent to the client.
+    """
+
+    extra_info: bytes
+
+class FlightInternalError(FlightError, ArrowException):
+    """An error internal to the Flight server occurred."""
+
+class FlightTimedOutError(FlightError, ArrowException):
+    """The Flight RPC call timed out."""
+
+class FlightCancelledError(FlightError, ArrowCancelled):
+    """The operation was cancelled."""
+
+class FlightServerError(FlightError, ArrowException):
+    """A server error occurred."""
+
+class FlightUnauthenticatedError(FlightError, ArrowException):
+    """The client is not authenticated."""
+
+class FlightUnauthorizedError(FlightError, ArrowException):
+    """The client is not authorized to perform the given operation."""
+
+class FlightUnavailableError(FlightError, ArrowException):
+    """The server is not reachable or available."""
+
+class FlightWriteSizeExceededError(ArrowInvalid):
+    """A write operation exceeded the client-configured limit."""
+
+    limit: int
+    actual: int
+
+class Action(_Weakrefable):
+    """An action executable on a Flight service."""
+
+    def __init__(self, action_type: bytes | str, buf: Buffer | bytes) -> None:
+        """Create an action from a type and a buffer.
+
+        Parameters
+        ----------
+        action_type : bytes or str
+        buf : Buffer or bytes-like object
+        """
+    @property
+    def type(self) -> str:
+        """The action type."""
+    @property
+    def body(self) -> Buffer:
+        """The action body (arguments for the action)."""
+    def serialize(self) -> bytes:
+        """Get the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self:
+        """Parse the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+
+class ActionType(NamedTuple):
+    """A type of action that is executable on a Flight service."""
+
+    type: str
+    description: str
+
+    def make_action(self, buf: Buffer | bytes) -> Action:
+        """Create an Action with this type.
+
+        Parameters
+        ----------
+        buf : obj
+            An Arrow buffer or Python bytes or bytes-like object.
+        """
+
+class Result(_Weakrefable):
+    """A result from executing an Action."""
+    def __init__(self, buf: Buffer | bytes) -> None:
+        """Create a new result.
+
+        Parameters
+        ----------
+        buf : Buffer or bytes-like object
+        """
+    @property
+    def body(self) -> Buffer:
+        """Get the Buffer containing the result."""
+    def serialize(self) -> bytes:
+        """Get the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self:
+        """Parse the wire-format representation of this type.
+
+        Useful when interoperating with non-Flight systems (e.g. REST
+        services) that may want to return Flight types.
+
+        """
+
+class BasicAuth(_Weakrefable):
+    """A container for basic auth."""
+    def __init__(
+        self, username: str | bytes | None = None, password: str | bytes | None = None
+    ) -> None:
+        """Create a new basic auth object.
+
+        Parameters
+        ----------
+        username : string
+        password : string
+        """
+    @property
+    def username(self) -> bytes: ...
+    @property
+    def password(self) -> bytes: ...
+    def serialize(self) -> str: ...
+    @staticmethod
+    def deserialize(serialized: str | bytes) -> BasicAuth: ...
+
+class DescriptorType(enum.Enum):
+    """
+    The type of a FlightDescriptor.
+
+    Attributes
+    ----------
+
+    UNKNOWN
+        An unknown descriptor type.
+
+    PATH
+        A Flight stream represented by a path.
+
+    CMD
+        A Flight stream represented by an application-defined command.
+
+    """
+
+    UNKNOWN = 0
+    PATH = 1
+    CMD = 2
+
+class FlightMethod(enum.Enum):
+    """The implemented methods in Flight."""
+
+    INVALID = 0
+    HANDSHAKE = 1
+    LIST_FLIGHTS = 2
+    GET_FLIGHT_INFO = 3
+    GET_SCHEMA = 4
+    DO_GET = 5
+    DO_PUT = 6
+    DO_ACTION = 7
+    LIST_ACTIONS = 8
+    DO_EXCHANGE = 9
+
+class FlightDescriptor(_Weakrefable):
+    """A description of a data stream available from a Flight service."""
+    @staticmethod
+    def for_path(*path: str | bytes) -> FlightDescriptor:
+        """Create a FlightDescriptor for a resource path."""
+
+    @staticmethod
+    def for_command(command: str | bytes) -> FlightDescriptor:
+        """Create a FlightDescriptor for an opaque command."""
+    @property
+    def descriptor_type(self) -> DescriptorType:
+        """Get the type of this descriptor."""
+    @property
+    def path(self) -> list[bytes] | None:
+        """Get the path for this descriptor."""
+    @property
+    def command(self) -> bytes | None:
+        """Get the command for this descriptor."""
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class Ticket(_Weakrefable):
+    """A ticket for requesting a Flight stream."""
+    def __init__(self, ticket: str | bytes) -> None: ...
+    @property
+    def ticket(self) -> bytes: ...
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class Location(_Weakrefable):
+    """The location of a Flight service."""
+    def __init__(self, uri: str | bytes) -> None: ...
+    @property
+    def uri(self) -> bytes: ...
+    def equals(self, other: Location) -> bool: ...
+    @staticmethod
+    def for_grpc_tcp(host: str | bytes, port: int) -> Location:
+        """Create a Location for a TCP-based gRPC service."""
+    @staticmethod
+    def for_grpc_tls(host: str | bytes, port: int) -> Location:
+        """Create a Location for a TLS-based gRPC service."""
+    @staticmethod
+    def for_grpc_unix(path: str | bytes) -> Location:
+        """Create a Location for a domain socket-based gRPC service."""
+
+class FlightEndpoint(_Weakrefable):
+    """A Flight stream, along with the ticket and locations to access it."""
+    def __init__(
+        self,
+        ticket: Ticket | str | bytes,
+        locations: list[str | Location],
+        expiration_time: TimestampScalar | None = ...,
+        app_metadata: bytes | str = ...,
+    ):
+        """Create a FlightEndpoint from a ticket and list of locations.
+
+        Parameters
+        ----------
+        ticket : Ticket or bytes
+            the ticket needed to access this flight
+        locations : list of string URIs
+            locations where this flight is available
+        expiration_time : TimestampScalar, default None
+            Expiration time of this stream. If present, clients may assume
+            they can retry DoGet requests. Otherwise, clients should avoid
+            retrying DoGet requests.
+        app_metadata : bytes or str, default ""
+            Application-defined opaque metadata.
+
+        Raises
+        ------
+        ArrowException
+            If one of the location URIs is not a valid URI.
+        """
+    @property
+    def ticket(self) -> Ticket:
+        """Get the ticket in this endpoint."""
+    @property
+    def locations(self) -> list[Location]:
+        """Get locations where this flight is available."""
+    def serialize(self) -> bytes: ...
+    @property
+    def expiration_time(self) -> TimestampScalar | None:
+        """Get the expiration time of this stream.
+
+        If present, clients may assume they can retry DoGet requests.
+        Otherwise, clients should avoid retrying DoGet requests.
+
+        """
+    @property
+    def app_metadata(self) -> bytes | str:
+        """Get application-defined opaque metadata."""
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class SchemaResult(_Weakrefable):
+    """The serialized schema returned from a GetSchema request."""
+    def __init__(self, schema: Schema) -> None:
+        """Create a SchemaResult from a schema.
+
+        Parameters
+        ----------
+        schema: Schema
+            the schema of the data in this flight.
+        """
+    @property
+    def schema(self) -> Schema:
+        """The schema of the data in this flight."""
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class FlightInfo(_Weakrefable):
+    """A description of a Flight stream."""
+    def __init__(
+        self,
+        schema: Schema,
+        descriptor: FlightDescriptor,
+        endpoints: list[FlightEndpoint],
+        total_records: int = ...,
+        total_bytes: int = ...,
+        ordered: bool = ...,
+        app_metadata: bytes | str = ...,
+    ) -> None:
+        """Create a FlightInfo object from a schema, descriptor, and endpoints.
+
+        Parameters
+        ----------
+        schema : Schema
+            the schema of the data in this flight.
+        descriptor : FlightDescriptor
+            the descriptor for this flight.
+        endpoints : list of FlightEndpoint
+            a list of endpoints where this flight is available.
+        total_records : int, default None
+            the total records in this flight, -1 or None if unknown.
+        total_bytes : int, default None
+            the total bytes in this flight, -1 or None if unknown.
+        ordered : boolean, default False
+            Whether endpoints are in the same order as the data.
+        app_metadata : bytes or str, default ""
+            Application-defined opaque metadata.
+        """
+    @property
+    def schema(self) -> Schema:
+        """The schema of the data in this flight."""
+    @property
+    def descriptor(self) -> FlightDescriptor:
+        """The descriptor of the data in this flight."""
+    @property
+    def endpoints(self) -> list[FlightEndpoint]:
+        """The endpoints where this flight is available."""
+    @property
+    def total_records(self) -> int:
+        """The total record count of this flight, or -1 if unknown."""
+    @property
+    def total_bytes(self) -> int:
+        """The size in bytes of the data in this flight, or -1 if unknown."""
+    @property
+    def ordered(self) -> bool:
+        """Whether endpoints are in the same order as the data."""
+    @property
+    def app_metadata(self) -> bytes | str:
+        """
+        Application-defined opaque metadata.
+
+        There is no inherent or required relationship between this and the
+        app_metadata fields in the FlightEndpoints or resulting FlightData
+        messages. Since this metadata is application-defined, a given
+        application could define there to be a relationship, but there is
+        none required by the spec.
+
+        """
+    def serialize(self) -> bytes: ...
+    @classmethod
+    def deserialize(cls, serialized: bytes) -> Self: ...
+
+class FlightStreamChunk(_Weakrefable):
+    """A RecordBatch with application metadata on the side."""
+    @property
+    def data(self) -> RecordBatch | None: ...
+    @property
+    def app_metadata(self) -> Buffer | None: ...
+    def __iter__(self): ...
+
+class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
+    """A reader for Flight streams."""
+
+    # Needs to be separate class so the "real" class can subclass the
+    # pure-Python mixin class
+
+    def __iter__(self) -> Self: ...
+    def __next__(self) -> FlightStreamChunk: ...
+    @property
+    def schema(self) -> Schema:
+        """Get the schema for this reader."""
+    def read_all(self) -> Table:
+        """Read the entire contents of the stream as a Table."""
+    def read_chunk(self) -> FlightStreamChunk:
+        """Read the next FlightStreamChunk along with any metadata.
+
+        Returns
+        -------
+        chunk : FlightStreamChunk
+            The next FlightStreamChunk in the stream.
+
+        Raises
+        ------
+        StopIteration
+            when the stream is finished
+        """
+    def to_reader(self) -> RecordBatchReader:
+        """Convert this reader into a regular RecordBatchReader.
+
+        This may fail if the schema cannot be read from the remote end.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+
+class MetadataRecordBatchReader(_MetadataRecordBatchReader):
+    """The base class for readers for Flight streams.
+
+    See Also
+    --------
+    FlightStreamReader
+    """
+
+class FlightStreamReader(MetadataRecordBatchReader):
+    """A reader that can also be canceled."""
+    def cancel(self) -> None:
+        """Cancel the read operation."""
+    def read_all(self) -> Table:
+        """Read the entire contents of the stream as a Table."""
+
+class MetadataRecordBatchWriter(_CRecordBatchWriter):
+    """A RecordBatchWriter that also allows writing application metadata.
+
+    This class is a context manager; on exit, close() will be called.
+    """
+
+    def begin(self, schema: Schema, options: IpcWriteOptions | None = None) -> None:
+        """Prepare to write data to this stream with the given schema."""
+    def write_metadata(self, buf: Buffer) -> None:
+        """Write Flight metadata by itself."""
+    def write_batch(self, batch: RecordBatch) -> None:  # type: ignore[override]
+        """
+        Write RecordBatch to stream.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+        """
+    def write_table(self, table: Table, max_chunksize: int | None = None, **kwargs) -> None:
+        """
+        Write Table to stream in (contiguous) RecordBatch objects.
+
+        Parameters
+        ----------
+        table : Table
+        max_chunksize : int, default None
+            Maximum number of rows for RecordBatch chunks. Individual chunks may
+            be smaller depending on the chunk layout of individual columns.
+        """
+    def close(self) -> None:
+        """
+        Close stream and write end-of-stream 0 marker.
+        """
+    def write_with_metadata(self, batch: RecordBatch, buf: Buffer) -> None:
+        """Write a RecordBatch along with Flight metadata.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+            The next RecordBatch in the stream.
+        buf : Buffer
+            Application-specific metadata for the batch as defined by
+            Flight.
+        """
+
+class FlightStreamWriter(MetadataRecordBatchWriter):
+    """A writer that also allows closing the write side of a stream."""
+    def done_writing(self) -> None:
+        """Indicate that the client is done writing, but not done reading."""
+
+class FlightMetadataReader(_Weakrefable):
+    """A reader for Flight metadata messages sent during a DoPut."""
+    def read(self) -> Buffer | None:
+        """Read the next metadata message."""
+
+class FlightMetadataWriter(_Weakrefable):
+    """A sender for Flight metadata messages during a DoPut."""
+    def write(self, message: Buffer) -> None:
+        """Write the next metadata message.
+
+        Parameters
+        ----------
+        message : Buffer
+        """
+
+class AsyncioCall(Generic[_T]):
+    """State for an async RPC using asyncio."""
+
+    _future: asyncio.Future[_T]
+
+    def as_awaitable(self) -> asyncio.Future[_T]: ...
+    def wakeup(self, result_or_exception: BaseException | _T) -> None: ...
+
+class AsyncioFlightClient:
+    """
+    A FlightClient with an asyncio-based async interface.
+
+    This interface is EXPERIMENTAL.
+    """
+
+    def __init__(self, client: FlightClient) -> None: ...
+    async def get_flight_info(
+        self,
+        descriptor: FlightDescriptor,
+        *,
+        options: FlightCallOptions | None = None,
+    ): ...
+
+class FlightClient(_Weakrefable):
+    """A client to a Flight service.
+
+    Connect to a Flight service on the given host and port.
+
+    Parameters
+    ----------
+    location : str, tuple or Location
+        Location to connect to. Either a gRPC URI like `grpc://localhost:port`,
+        a tuple of (host, port) pair, or a Location instance.
+    tls_root_certs : bytes or None
+        PEM-encoded
+    cert_chain: bytes or None
+        Client certificate if using mutual TLS
+    private_key: bytes or None
+        Client private key for cert_chain is using mutual TLS
+    override_hostname : str or None
+        Override the hostname checked by TLS. Insecure, use with caution.
+    middleware : list optional, default None
+        A list of ClientMiddlewareFactory instances.
+    write_size_limit_bytes : int optional, default None
+        A soft limit on the size of a data payload sent to the
+        server. Enabled if positive. If enabled, writing a record
+        batch that (when serialized) exceeds this limit will raise an
+        exception; the client can retry the write with a smaller
+        batch.
+    disable_server_verification : boolean optional, default False
+        A flag that indicates that, if the client is connecting
+        with TLS, that it skips server verification. If this is
+        enabled, all other TLS settings are overridden.
+    generic_options : list optional, default None
+        A list of generic (string, int or string) option tuples passed
+        to the underlying transport. Effect is implementation
+        dependent.
+    """
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location,
+        *,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        middleware: list[ClientMiddlewareFactory] | None = None,
+        write_size_limit_bytes: int | None = None,
+        disable_server_verification: bool = False,
+        generic_options: list[tuple[str, int | str]] | None = None,
+    ): ...
+    @property
+    def supports_async(self) -> bool: ...
+    def as_async(self) -> AsyncioFlightClient: ...
+    def wait_for_available(self, timeout: int = 5) -> None:
+        """Block until the server can be contacted.
+
+        Parameters
+        ----------
+        timeout : int, default 5
+            The maximum seconds to wait.
+        """
+    @deprecated(
+        "Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead."
+    )
+    @classmethod
+    def connect(
+        cls,
+        location: str | tuple[str, int] | Location,
+        tls_root_certs: str | None = None,
+        cert_chain: str | None = None,
+        private_key: str | None = None,
+        override_hostname: str | None = None,
+        disable_server_verification: bool = False,
+    ) -> FlightClient:
+        """Connect to a Flight server.
+
+        .. deprecated:: 0.15.0
+            Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead.
+        """
+    def authenticate(
+        self, auth_handler: ClientAuthHandler, options: FlightCallOptions | None = None
+    ) -> None:
+        """Authenticate to the server.
+
+        Parameters
+        ----------
+        auth_handler : ClientAuthHandler
+            The authentication mechanism to use.
+        options : FlightCallOptions
+            Options for this call.
+        """
+    def authenticate_basic_token(
+        self, username: str, password: str, options: FlightCallOptions | None = None
+    ) -> tuple[str, str]:
+        """Authenticate to the server with HTTP basic authentication.
+
+        Parameters
+        ----------
+        username : string
+            Username to authenticate with
+        password : string
+            Password to authenticate with
+        options  : FlightCallOptions
+            Options for this call
+
+        Returns
+        -------
+        tuple : Tuple[str, str]
+            A tuple representing the FlightCallOptions authorization
+            header entry of a bearer token.
+        """
+    def list_actions(self, options: FlightCallOptions | None = None) -> list[Action]:
+        """List the actions available on a service."""
+    def do_action(
+        self, action: Action, options: FlightCallOptions | None = None
+    ) -> Iterator[Result]:
+        """
+        Execute an action on a service.
+
+        Parameters
+        ----------
+        action : str, tuple, or Action
+            Can be action type name (no body), type and body, or any Action
+            object
+        options : FlightCallOptions
+            RPC options
+
+        Returns
+        -------
+        results : iterator of Result values
+        """
+    def list_flights(
+        self, criteria: str | None = None, options: FlightCallOptions | None = None
+    ) -> Generator[FlightInfo, None, None]:
+        """List the flights available on a service."""
+    def get_flight_info(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> FlightInfo:
+        """Request information about an available flight."""
+    def get_schema(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> Schema:
+        """Request schema for an available flight."""
+    def do_get(
+        self, ticket: Ticket, options: FlightCallOptions | None = None
+    ) -> FlightStreamReader:
+        """Request the data for a flight.
+
+        Returns
+        -------
+        reader : FlightStreamReader
+        """
+    def do_put(
+        self,
+        descriptor: FlightDescriptor,
+        schema: Schema,
+        options: FlightCallOptions | None = None,
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
+        """Upload data to a flight.
+
+        Returns
+        -------
+        writer : FlightStreamWriter
+        reader : FlightMetadataReader
+        """
+    def do_exchange(
+        self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
+        """Start a bidirectional data exchange with a server.
+
+        Parameters
+        ----------
+        descriptor : FlightDescriptor
+            A descriptor for the flight.
+        options : FlightCallOptions
+            RPC options.
+
+        Returns
+        -------
+        writer : FlightStreamWriter
+        reader : FlightStreamReader
+        """
+    def close(self) -> None:
+        """Close the client and disconnect."""
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, traceback) -> None: ...
+
+class FlightDataStream(_Weakrefable):
+    """
+    Abstract base class for Flight data streams.
+
+    See Also
+    --------
+    RecordBatchStream
+    GeneratorStream
+    """
+
+class RecordBatchStream(FlightDataStream):
+    """A Flight data stream backed by RecordBatches.
+
+    The remainder of this DoGet request will be handled in C++,
+    without having to acquire the GIL.
+
+    """
+    def __init__(
+        self, data_source: RecordBatchReader | Table, options: IpcWriteOptions | None = None
+    ) -> None:
+        """Create a RecordBatchStream from a data source.
+
+        Parameters
+        ----------
+        data_source : RecordBatchReader or Table
+            The data to stream to the client.
+        options : pyarrow.ipc.IpcWriteOptions, optional
+            Optional IPC options to control how to write the data.
+        """
+
+class GeneratorStream(FlightDataStream):
+    """A Flight data stream backed by a Python generator."""
+    def __init__(
+        self,
+        schema: Schema,
+        generator: Iterable[FlightDataStream | Table | RecordBatch | RecordBatchReader],
+        options: IpcWriteOptions | None = None,
+    ) -> None:
+        """Create a GeneratorStream from a Python generator.
+
+        Parameters
+        ----------
+        schema : Schema
+            The schema for the data to be returned.
+
+        generator : iterator or iterable
+            The generator should yield other FlightDataStream objects,
+            Tables, RecordBatches, or RecordBatchReaders.
+
+        options : pyarrow.ipc.IpcWriteOptions, optional
+        """
+
+class ServerCallContext(_Weakrefable):
+    """Per-call state/context."""
+    def peer_identity(self) -> bytes:
+        """Get the identity of the authenticated peer.
+
+        May be the empty string.
+        """
+    def peer(self) -> str:
+        """Get the address of the peer."""
+        # Set safe=True as gRPC on Windows sometimes gives garbage bytes
+    def is_cancelled(self) -> bool:
+        """Check if the current RPC call has been canceled by the client."""
+    def add_header(self, key: str, value: str) -> None:
+        """Add a response header."""
+    def add_trailer(self, key: str, value: str) -> None:
+        """Add a response trailer."""
+    def get_middleware(self, key: str) -> ServerMiddleware | None:
+        """
+        Get a middleware instance by key.
+
+        Returns None if the middleware was not found.
+        """
+
+class ServerAuthReader(_Weakrefable):
+    """A reader for messages from the client during an auth handshake."""
+    def read(self) -> str: ...
+
+class ServerAuthSender(_Weakrefable):
+    """A writer for messages to the client during an auth handshake."""
+    def write(self, message: str) -> None: ...
+
+class ClientAuthReader(_Weakrefable):
+    """A reader for messages from the server during an auth handshake."""
+    def read(self) -> str: ...
+
+class ClientAuthSender(_Weakrefable):
+    """A writer for messages to the server during an auth handshake."""
+    def write(self, message: str) -> None: ...
+
+class ServerAuthHandler(_Weakrefable):
+    """Authentication middleware for a server.
+
+    To implement an authentication mechanism, subclass this class and
+    override its methods.
+
+    """
+    def authenticate(self, outgoing: ServerAuthSender, incoming: ServerAuthReader):
+        """Conduct the handshake with the client.
+
+        May raise an error if the client cannot authenticate.
+
+        Parameters
+        ----------
+        outgoing : ServerAuthSender
+            A channel to send messages to the client.
+        incoming : ServerAuthReader
+            A channel to read messages from the client.
+        """
+    def is_valid(self, token: str) -> bool:
+        """Validate a client token, returning their identity.
+
+        May return an empty string (if the auth mechanism does not
+        name the peer) or raise an exception (if the token is
+        invalid).
+
+        Parameters
+        ----------
+        token : bytes
+            The authentication token from the client.
+
+        """
+
+class ClientAuthHandler(_Weakrefable):
+    """Authentication plugin for a client."""
+    def authenticate(self, outgoing: ClientAuthSender, incoming: ClientAuthReader):
+        """Conduct the handshake with the server.
+
+        Parameters
+        ----------
+        outgoing : ClientAuthSender
+            A channel to send messages to the server.
+        incoming : ClientAuthReader
+            A channel to read messages from the server.
+        """
+    def get_token(self) -> str:
+        """Get the auth token for a call."""
+
+class CallInfo(NamedTuple):
+    """Information about a particular RPC for Flight middleware."""
+
+    method: FlightMethod
+
+class ClientMiddlewareFactory(_Weakrefable):
+    """A factory for new middleware instances.
+
+    All middleware methods will be called from the same thread as the
+    RPC method implementation. That is, thread-locals set in the
+    client are accessible from the middleware itself.
+
+    """
+    def start_call(self, info: CallInfo) -> ClientMiddleware | None:
+        """Called at the start of an RPC.
+
+        This must be thread-safe and must not raise exceptions.
+
+        Parameters
+        ----------
+        info : CallInfo
+            Information about the call.
+
+        Returns
+        -------
+        instance : ClientMiddleware
+            An instance of ClientMiddleware (the instance to use for
+            the call), or None if this call is not intercepted.
+
+        """
+
+class ClientMiddleware(_Weakrefable):
+    """Client-side middleware for a call, instantiated per RPC.
+
+    Methods here should be fast and must be infallible: they should
+    not raise exceptions or stall indefinitely.
+
+    """
+
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
+        """A callback before headers are sent.
+
+        Returns
+        -------
+        headers : dict
+            A dictionary of header values to add to the request, or
+            None if no headers are to be added. The dictionary should
+            have string keys and string or list-of-string values.
+
+            Bytes values are allowed, but the underlying transport may
+            not support them or may restrict them. For gRPC, binary
+            values are only allowed on headers ending in "-bin".
+
+            Header names must be lowercase ASCII.
+
+        """
+
+    def received_headers(self, headers: dict[str, list[str] | list[bytes]]):
+        """A callback when headers are received.
+
+        The default implementation does nothing.
+
+        Parameters
+        ----------
+        headers : dict
+            A dictionary of headers from the server. Keys are strings
+            and values are lists of strings (for text headers) or
+            bytes (for binary headers).
+
+        """
+
+    def call_completed(self, exception: ArrowException):
+        """A callback when the call finishes.
+
+        The default implementation does nothing.
+
+        Parameters
+        ----------
+        exception : ArrowException
+            If the call errored, this is the equivalent
+            exception. Will be None if the call succeeded.
+
+        """
+
+class ServerMiddlewareFactory(_Weakrefable):
+    """A factory for new middleware instances.
+
+    All middleware methods will be called from the same thread as the
+    RPC method implementation. That is, thread-locals set in the
+    middleware are accessible from the method itself.
+
+    """
+
+    def start_call(
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> ServerMiddleware | None:
+        """Called at the start of an RPC.
+
+        This must be thread-safe.
+
+        Parameters
+        ----------
+        info : CallInfo
+            Information about the call.
+        headers : dict
+            A dictionary of headers from the client. Keys are strings
+            and values are lists of strings (for text headers) or
+            bytes (for binary headers).
+
+        Returns
+        -------
+        instance : ServerMiddleware
+            An instance of ServerMiddleware (the instance to use for
+            the call), or None if this call is not intercepted.
+
+        Raises
+        ------
+        exception : pyarrow.ArrowException
+            If an exception is raised, the call will be rejected with
+            the given error.
+
+        """
+
+class TracingServerMiddlewareFactory(ServerMiddlewareFactory):
+    """A factory for tracing middleware instances.
+
+    This enables OpenTelemetry support in Arrow (if Arrow was compiled
+    with OpenTelemetry support enabled). A new span will be started on
+    each RPC call. The TracingServerMiddleware instance can then be
+    retrieved within an RPC handler to get the propagated context,
+    which can be used to start a new span on the Python side.
+
+    Because the Python/C++ OpenTelemetry libraries do not
+    interoperate, spans on the C++ side are not directly visible to
+    the Python side and vice versa.
+
+    """
+
+class ServerMiddleware(_Weakrefable):
+    """Server-side middleware for a call, instantiated per RPC.
+
+    Methods here should be fast and must be infallible: they should
+    not raise exceptions or stall indefinitely.
+
+    """
+
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
+        """A callback before headers are sent.
+
+        Returns
+        -------
+        headers : dict
+            A dictionary of header values to add to the response, or
+            None if no headers are to be added. The dictionary should
+            have string keys and string or list-of-string values.
+
+            Bytes values are allowed, but the underlying transport may
+            not support them or may restrict them. For gRPC, binary
+            values are only allowed on headers ending in "-bin".
+
+            Header names must be lowercase ASCII.
+
+        """
+    def call_completed(self, exception: ArrowException):
+        """A callback when the call finishes.
+
+        Parameters
+        ----------
+        exception : pyarrow.ArrowException
+            If the call errored, this is the equivalent
+            exception. Will be None if the call succeeded.
+
+        """
+
+class TracingServerMiddleware(ServerMiddleware):
+    trace_context: dict
+    def __init__(self, trace_context: dict) -> None: ...
+
+class _ServerMiddlewareFactoryWrapper(ServerMiddlewareFactory):
+    """Wrapper to bundle server middleware into a single C++ one."""
+
+    def __init__(self, factories: dict[str, ServerMiddlewareFactory]) -> None: ...
+    def start_call(  # type: ignore[override]
+        self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
+    ) -> _ServerMiddlewareFactoryWrapper | None: ...
+
+class _ServerMiddlewareWrapper(ServerMiddleware):
+    def __init__(self, middleware: dict[str, ServerMiddleware]) -> None: ...
+    def send_headers(self) -> dict[str, dict[str, list[str] | list[bytes]]]: ...
+    def call_completed(self, exception: ArrowException) -> None: ...
+
+class _FlightServerFinalizer(_Weakrefable):
+    """
+    A finalizer that shuts down the server on destruction.
+
+    See ARROW-16597. If the server is still active at interpreter
+    exit, the process may segfault.
+    """
+
+    def finalize(self) -> None: ...
+
+class FlightServerBase(_Weakrefable):
+    """A Flight service definition.
+
+    To start the server, create an instance of this class with an
+    appropriate location. The server will be running as soon as the
+    instance is created; it is not required to call :meth:`serve`.
+
+    Override methods to define your Flight service.
+
+    Parameters
+    ----------
+    location : str, tuple or Location optional, default None
+        Location to serve on. Either a gRPC URI like `grpc://localhost:port`,
+        a tuple of (host, port) pair, or a Location instance.
+        If None is passed then the server will be started on localhost with a
+        system provided random port.
+    auth_handler : ServerAuthHandler optional, default None
+        An authentication mechanism to use. May be None.
+    tls_certificates : list optional, default None
+        A list of (certificate, key) pairs.
+    verify_client : boolean optional, default False
+        If True, then enable mutual TLS: require the client to present
+        a client certificate, and validate the certificate.
+    root_certificates : bytes optional, default None
+        If enabling mutual TLS, this specifies the PEM-encoded root
+        certificate used to validate client certificates.
+    middleware : dict optional, default None
+        A dictionary of :class:`ServerMiddlewareFactory` instances. The
+        string keys can be used to retrieve the middleware instance within
+        RPC handlers (see :meth:`ServerCallContext.get_middleware`).
+
+    """
+    def __init__(
+        self,
+        location: str | tuple[str, int] | Location | None = None,
+        auth_handler: ServerAuthHandler | None = None,
+        tls_certificates: list[tuple[str, str]] | None = None,
+        verify_client: bool = False,
+        root_certificates: str | None = None,
+        middleware: dict[str, ServerMiddlewareFactory] | None = None,
+    ): ...
+    @property
+    def port(self) -> int:
+        """
+        Get the port that this server is listening on.
+
+        Returns a non-positive value if the operation is invalid
+        (e.g. init() was not called or server is listening on a domain
+        socket).
+        """
+    def list_flights(self, context: ServerCallContext, criteria: str) -> Iterator[FlightInfo]:
+        """List flights available on this service.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        criteria : bytes
+            Filter criteria provided by the client.
+
+        Returns
+        -------
+        iterator of FlightInfo
+
+        """
+    def get_flight_info(
+        self, context: ServerCallContext, descriptor: FlightDescriptor
+    ) -> FlightInfo:
+        """Get information about a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+
+        Returns
+        -------
+        FlightInfo
+
+        """
+    def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor) -> Schema:
+        """Get the schema of a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+
+        Returns
+        -------
+        Schema
+
+        """
+    def do_put(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: FlightMetadataWriter,
+    ) -> None:
+        """Write data to a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+        reader : MetadataRecordBatchReader
+            A reader for data uploaded by the client.
+        writer : FlightMetadataWriter
+            A writer to send responses to the client.
+
+        """
+    def do_get(self, context: ServerCallContext, ticket: Ticket) -> FlightDataStream:
+        """Write data to a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        ticket : Ticket
+            The ticket for the flight.
+
+        Returns
+        -------
+        FlightDataStream
+            A stream of data to send back to the client.
+
+        """
+    def do_exchange(
+        self,
+        context: ServerCallContext,
+        descriptor: FlightDescriptor,
+        reader: MetadataRecordBatchReader,
+        writer: MetadataRecordBatchWriter,
+    ) -> None:
+        """Write data to a flight.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        descriptor : FlightDescriptor
+            The descriptor for the flight provided by the client.
+        reader : MetadataRecordBatchReader
+            A reader for data uploaded by the client.
+        writer : MetadataRecordBatchWriter
+            A writer to send responses to the client.
+
+        """
+    def list_actions(self, context: ServerCallContext) -> Iterable[Action]:
+        """List custom actions available on this server.
+
+        Applications should override this method to implement their
+        own behavior. The default method raises a NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+
+        Returns
+        -------
+        iterator of ActionType or tuple
+
+        """
+    def do_action(self, context: ServerCallContext, action: Action) -> Iterable[bytes]:
+        """Execute a custom action.
+
+        This method should return an iterator, or it should be a
+        generator. Applications should override this method to
+        implement their own behavior. The default method raises a
+        NotImplementedError.
+
+        Parameters
+        ----------
+        context : ServerCallContext
+            Common contextual information.
+        action : Action
+            The action to execute.
+
+        Returns
+        -------
+        iterator of bytes
+
+        """
+    def serve(self) -> None:
+        """Block until the server shuts down.
+
+        This method only returns if shutdown() is called or a signal is
+        received.
+        """
+    def run(self) -> None:
+        """Block until the server shuts down.
+
+        .. deprecated:: 0.15.0
+            Use the ``FlightServer.serve`` method instead
+        """
+    def shutdown(self) -> None:
+        """Shut down the server, blocking until current requests finish.
+
+        Do not call this directly from the implementation of a Flight
+        method, as then the server will block forever waiting for that
+        request to finish. Instead, call this method from a background
+        thread.
+
+        This method should only be called once.
+        """
+    def wait(self) -> None:
+        """Block until server is terminated with shutdown."""
+    def __enter__(self) -> Self: ...
+    def __exit__(self, exc_type, exc_value, traceback): ...
+
+def connect(
+    location: str | tuple[str, int] | Location,
+    *,
+    tls_root_certs: str | None = None,
+    cert_chain: str | None = None,
+    private_key: str | None = None,
+    override_hostname: str | None = None,
+    middleware: list[ClientMiddlewareFactory] | None = None,
+    write_size_limit_bytes: int | None = None,
+    disable_server_verification: bool = False,
+    generic_options: list[tuple[str, int | str]] | None = None,
+) -> FlightClient:
+    """
+    Connect to a Flight server.
+
+    Parameters
+    ----------
+    location : str, tuple, or Location
+        Location to connect to. Either a URI like "grpc://localhost:port",
+        a tuple of (host, port), or a Location instance.
+    tls_root_certs : bytes or None
+        PEM-encoded.
+    cert_chain: str or None
+        If provided, enables TLS mutual authentication.
+    private_key: str or None
+        If provided, enables TLS mutual authentication.
+    override_hostname : str or None
+        Override the hostname checked by TLS. Insecure, use with caution.
+    middleware : list or None
+        A list of ClientMiddlewareFactory instances to apply.
+    write_size_limit_bytes : int or None
+        A soft limit on the size of a data payload sent to the
+        server. Enabled if positive. If enabled, writing a record
+        batch that (when serialized) exceeds this limit will raise an
+        exception; the client can retry the write with a smaller
+        batch.
+    disable_server_verification : boolean or None
+        Disable verifying the server when using TLS.
+        Insecure, use with caution.
+    generic_options : list or None
+        A list of generic (string, int or string) options to pass to
+        the underlying transport.
+
+    Returns
+    -------
+    client : FlightClient
+    """
diff --git a/python/pyarrow-stubs/_fs.pyi b/python/pyarrow-stubs/_fs.pyi
new file mode 100644
index 00000000000..9b0f0ceaa20
--- /dev/null
+++ b/python/pyarrow-stubs/_fs.pyi
@@ -0,0 +1,1001 @@
+import datetime as dt
+import enum
+import sys
+
+from abc import ABC, abstractmethod
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from typing import Union, overload
+
+from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
+
+from .lib import NativeFile, _Weakrefable
+
+SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
+
+class FileType(enum.IntFlag):
+    NotFound = enum.auto()
+    Unknown = enum.auto()
+    File = enum.auto()
+    Directory = enum.auto()
+
+class FileInfo(_Weakrefable):
+    """
+    FileSystem entry info.
+
+    Parameters
+    ----------
+    path : str
+        The full path to the filesystem entry.
+    type : FileType
+        The type of the filesystem entry.
+    mtime : datetime or float, default None
+        If given, the modification time of the filesystem entry.
+        If a float is given, it is the number of seconds since the
+        Unix epoch.
+    mtime_ns : int, default None
+        If given, the modification time of the filesystem entry,
+        in nanoseconds since the Unix epoch.
+        `mtime` and `mtime_ns` are mutually exclusive.
+    size : int, default None
+        If given, the filesystem entry size in bytes.  This should only
+        be given if `type` is `FileType.File`.
+
+    Examples
+    --------
+    Generate a file:
+
+    >>> from pyarrow import fs
+    >>> local = fs.LocalFileSystem()
+    >>> path_fs = local_path + "/pyarrow-fs-example.dat"
+    >>> with local.open_output_stream(path_fs) as stream:
+    ...     stream.write(b"data")
+    4
+
+    Get FileInfo object using ``get_file_info()``:
+
+    >>> file_info = local.get_file_info(path_fs)
+    >>> file_info
+    <FileInfo for '.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+
+    Inspect FileInfo attributes:
+
+    >>> file_info.type
+    <FileType.File: 2>
+
+    >>> file_info.is_file
+    True
+
+    >>> file_info.path
+    '/.../pyarrow-fs-example.dat'
+
+    >>> file_info.base_name
+    'pyarrow-fs-example.dat'
+
+    >>> file_info.size
+    4
+
+    >>> file_info.extension
+    'dat'
+
+    >>> file_info.mtime  # doctest: +SKIP
+    datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
+
+    >>> file_info.mtime_ns  # doctest: +SKIP
+    1656489370873922073
+    """
+
+    def __init__(
+        self,
+        path: str,
+        type: FileType = FileType.Unknown,
+        *,
+        mtime: dt.datetime | float | None = None,
+        mtime_ns: int | None = None,
+        size: int | None = None,
+    ): ...
+    @property
+    def type(self) -> FileType:
+        """
+        Type of the file.
+
+        The returned enum values can be the following:
+
+        - FileType.NotFound: target does not exist
+        - FileType.Unknown: target exists but its type is unknown (could be a
+          special file such as a Unix socket or character device, or
+          Windows NUL / CON / ...)
+        - FileType.File: target is a regular file
+        - FileType.Directory: target is a regular directory
+
+        Returns
+        -------
+        type : FileType
+        """
+    @property
+    def is_file(self) -> bool: ...
+    @property
+    def path(self) -> str:
+        """
+        The full file path in the filesystem.
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.path
+        '/.../pyarrow-fs-example.dat'
+        """
+    @property
+    def base_name(self) -> str:
+        """
+        The file base name.
+
+        Component after the last directory separator.
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.base_name
+        'pyarrow-fs-example.dat'
+        """
+    @property
+    def size(self) -> int:
+        """
+        The size in bytes, if available.
+
+        Only regular files are guaranteed to have a size.
+
+        Returns
+        -------
+        size : int or None
+        """
+    @property
+    def extension(self) -> str:
+        """
+        The file extension.
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.extension
+        'dat'
+        """
+    @property
+    def mtime(self) -> dt.datetime | None:
+        """
+        The time of last modification, if available.
+
+        Returns
+        -------
+        mtime : datetime.datetime or None
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.mtime  # doctest: +SKIP
+        datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
+        """
+    @property
+    def mtime_ns(self) -> int | None:
+        """
+        The time of last modification, if available, expressed in nanoseconds
+        since the Unix epoch.
+
+        Returns
+        -------
+        mtime_ns : int or None
+
+        Examples
+        --------
+        >>> file_info = local.get_file_info(path)
+        >>> file_info.mtime_ns  # doctest: +SKIP
+        1656489370873922073
+        """
+
+class FileSelector(_Weakrefable):
+    """
+    File and directory selector.
+
+    It contains a set of options that describes how to search for files and
+    directories.
+
+    Parameters
+    ----------
+    base_dir : str
+        The directory in which to select files. Relative paths also work, use
+        '.' for the current directory and '..' for the parent.
+    allow_not_found : bool, default False
+        The behavior if `base_dir` doesn't exist in the filesystem.
+        If false, an error is returned.
+        If true, an empty selection is returned.
+    recursive : bool, default False
+        Whether to recurse into subdirectories.
+
+    Examples
+    --------
+    List the contents of a directory and subdirectories:
+
+    >>> selector_1 = fs.FileSelector(local_path, recursive=True)
+    >>> local.get_file_info(selector_1)  # doctest: +SKIP
+    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
+    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>,
+    <FileInfo for 'tmp/alphabet/subdir/example_copy.dat': type=FileType.File, size=4>]
+
+    List only the contents of the base directory:
+
+    >>> selector_2 = fs.FileSelector(local_path)
+    >>> local.get_file_info(selector_2)  # doctest: +SKIP
+    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
+    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>]
+
+    Return empty selection if the directory doesn't exist:
+
+    >>> selector_not_found = fs.FileSelector(
+    ...     local_path + "/missing", recursive=True, allow_not_found=True
+    ... )
+    >>> local.get_file_info(selector_not_found)
+    []
+    """
+
+    base_dir: str
+    allow_not_found: bool
+    recursive: bool
+    def __init__(self, base_dir: str, allow_not_found: bool = False, recursive: bool = False): ...
+
+class FileSystem(_Weakrefable):
+    """
+    Abstract file system API.
+    """
+
+    @classmethod
+    def from_uri(cls, uri: str) -> tuple[Self, str]:
+        """
+        Create a new FileSystem from URI or Path.
+
+        Recognized URI schemes are "file", "mock", "s3fs", "gs", "gcs", "hdfs" and "viewfs".
+        In addition, the argument can be a pathlib.Path object, or a string
+        describing an absolute local path.
+
+        Parameters
+        ----------
+        uri : string
+            URI-based path, for example: file:///some/local/path.
+
+        Returns
+        -------
+        tuple of (FileSystem, str path)
+            With (filesystem, path) tuple where path is the abstract path
+            inside the FileSystem instance.
+
+        Examples
+        --------
+        Create a new FileSystem subclass from a URI:
+
+        >>> uri = "file:///{}/pyarrow-fs-example.dat".format(local_path)
+        >>> local_new, path_new = fs.FileSystem.from_uri(uri)
+        >>> local_new
+        <pyarrow._fs.LocalFileSystem object at ...
+        >>> path_new
+        '/.../pyarrow-fs-example.dat'
+
+        Or from a s3 bucket:
+
+        >>> fs.FileSystem.from_uri("s3://usgs-landsat/collection02/")
+        (<pyarrow._s3fs.S3FileSystem object at ...>, 'usgs-landsat/collection02')
+        """
+    def equals(self, other: FileSystem) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.fs.FileSystem
+
+        Returns
+        -------
+        bool
+        """
+    @property
+    def type_name(self) -> str:
+        """
+        The filesystem's type name.
+        """
+    def get_file_info(self, paths_or_selector: str | FileSelector | list[str]) -> FileInfo | list[FileInfo]:
+        """
+        Get info for the given files.
+
+        Any symlink is automatically dereferenced, recursively. A non-existing
+        or unreachable file returns a FileStat object and has a FileType of
+        value NotFound. An exception indicates a truly exceptional condition
+        (low-level I/O error, etc.).
+
+        Parameters
+        ----------
+        paths_or_selector : FileSelector, path-like or list of path-likes
+            Either a selector object, a path-like object or a list of
+            path-like objects. The selector's base directory will not be
+            part of the results, even if it exists. If it doesn't exist,
+            use `allow_not_found`.
+
+        Returns
+        -------
+        FileInfo or list of FileInfo
+            Single FileInfo object is returned for a single path, otherwise
+            a list of FileInfo objects is returned.
+
+        Examples
+        --------
+        >>> local
+        <pyarrow._fs.LocalFileSystem object at ...>
+        >>> local.get_file_info("/{}/pyarrow-fs-example.dat".format(local_path))
+        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+        """
+    def create_dir(self, path: str, *, recursive: bool = True) -> None:
+        """
+        Create a directory and subdirectories.
+
+        This function succeeds if the directory already exists.
+
+        Parameters
+        ----------
+        path : str
+            The path of the new directory.
+        recursive : bool, default True
+            Create nested directories as well.
+        """
+    def delete_dir(self, path: str) -> None:
+        """
+        Delete a directory and its contents, recursively.
+
+        Parameters
+        ----------
+        path : str
+            The path of the directory to be deleted.
+        """
+    def delete_dir_contents(
+        self, path: str, *, accept_root_dir: bool = False, missing_dir_ok: bool = False
+    ) -> None:
+        """
+        Delete a directory's contents, recursively.
+
+        Like delete_dir, but doesn't delete the directory itself.
+
+        Parameters
+        ----------
+        path : str
+            The path of the directory to be deleted.
+        accept_root_dir : boolean, default False
+            Allow deleting the root directory's contents
+            (if path is empty or "/")
+        missing_dir_ok : boolean, default False
+            If False then an error is raised if path does
+            not exist
+        """
+    def move(self, src: str, dest: str) -> None:
+        """
+        Move / rename a file or directory.
+
+        If the destination exists:
+        - if it is a non-empty directory, an error is returned
+        - otherwise, if it has the same type as the source, it is replaced
+        - otherwise, behavior is unspecified (implementation-dependent).
+
+        Parameters
+        ----------
+        src : str
+            The path of the file or the directory to be moved.
+        dest : str
+            The destination path where the file or directory is moved to.
+
+        Examples
+        --------
+        Create a new folder with a file:
+
+        >>> local.create_dir("/tmp/other_dir")
+        >>> local.copy_file(path, "/tmp/move_example.dat")
+
+        Move the file:
+
+        >>> local.move("/tmp/move_example.dat", "/tmp/other_dir/move_example_2.dat")
+
+        Inspect the file info:
+
+        >>> local.get_file_info("/tmp/other_dir/move_example_2.dat")
+        <FileInfo for '/tmp/other_dir/move_example_2.dat': type=FileType.File, size=4>
+        >>> local.get_file_info("/tmp/move_example.dat")
+        <FileInfo for '/tmp/move_example.dat': type=FileType.NotFound>
+
+        Delete the folder:
+        >>> local.delete_dir("/tmp/other_dir")
+        """
+    def copy_file(self, src: str, dest: str) -> None:
+        """
+        Copy a file.
+
+        If the destination exists and is a directory, an error is returned.
+        Otherwise, it is replaced.
+
+        Parameters
+        ----------
+        src : str
+            The path of the file to be copied from.
+        dest : str
+            The destination path where the file is copied to.
+
+        Examples
+        --------
+        >>> local.copy_file(path, local_path + "/pyarrow-fs-example_copy.dat")
+
+        Inspect the file info:
+
+        >>> local.get_file_info(local_path + "/pyarrow-fs-example_copy.dat")
+        <FileInfo for '/.../pyarrow-fs-example_copy.dat': type=FileType.File, size=4>
+        >>> local.get_file_info(path)
+        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
+        """
+    def delete_file(self, path: str) -> None:
+        """
+        Delete a file.
+
+        Parameters
+        ----------
+        path : str
+            The path of the file to be deleted.
+        """
+    def open_input_file(self, path: str) -> NativeFile:
+        """
+        Open an input file for random access reading.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for reading.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        Print the data from the file with `open_input_file()`:
+
+        >>> with local.open_input_file(path) as f:
+        ...     print(f.readall())
+        b'data'
+        """
+    def open_input_stream(
+        self, path: str, compression: str | None = "detect", buffer_size: int | None = None
+    ) -> NativeFile:
+        """
+        Open an input stream for sequential reading.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for reading.
+        compression : str optional, default 'detect'
+            The compression algorithm to use for on-the-fly decompression.
+            If "detect" and source is a file path, then compression will be
+            chosen based on the file extension.
+            If None, no compression will be applied. Otherwise, a well-known
+            algorithm name must be supplied (e.g. "gzip").
+        buffer_size : int optional, default None
+            If None or 0, no buffering will happen. Otherwise the size of the
+            temporary read buffer.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        Print the data from the file with `open_input_stream()`:
+
+        >>> with local.open_input_stream(path) as f:
+        ...     print(f.readall())
+        b'data'
+        """
+    def open_output_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ) -> NativeFile:
+        """
+        Open an output stream for sequential writing.
+
+        If the target already exists, existing data is truncated.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for writing.
+        compression : str optional, default 'detect'
+            The compression algorithm to use for on-the-fly compression.
+            If "detect" and source is a file path, then compression will be
+            chosen based on the file extension.
+            If None, no compression will be applied. Otherwise, a well-known
+            algorithm name must be supplied (e.g. "gzip").
+        buffer_size : int optional, default None
+            If None or 0, no buffering will happen. Otherwise the size of the
+            temporary write buffer.
+        metadata : dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        >>> local = fs.LocalFileSystem()
+        >>> with local.open_output_stream(path) as stream:
+        ...     stream.write(b"data")
+        4
+        """
+    def open_append_stream(
+        self,
+        path: str,
+        compression: str | None = "detect",
+        buffer_size: int | None = None,
+        metadata: dict[str, str] | None = None,
+    ):
+        """
+        Open an output stream for appending.
+
+        If the target doesn't exist, a new empty file is created.
+
+        .. note::
+            Some filesystem implementations do not support efficient
+            appending to an existing file, in which case this method will
+            raise NotImplementedError.
+            Consider writing to multiple files (using e.g. the dataset layer)
+            instead.
+
+        Parameters
+        ----------
+        path : str
+            The source to open for writing.
+        compression : str optional, default 'detect'
+            The compression algorithm to use for on-the-fly compression.
+            If "detect" and source is a file path, then compression will be
+            chosen based on the file extension.
+            If None, no compression will be applied. Otherwise, a well-known
+            algorithm name must be supplied (e.g. "gzip").
+        buffer_size : int optional, default None
+            If None or 0, no buffering will happen. Otherwise the size of the
+            temporary write buffer.
+        metadata : dict optional, default None
+            If not None, a mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+            Unsupported metadata keys will be ignored.
+
+        Returns
+        -------
+        stream : NativeFile
+
+        Examples
+        --------
+        Append new data to a FileSystem subclass with nonempty file:
+
+        >>> with local.open_append_stream(path) as f:
+        ...     f.write(b"+newly added")
+        12
+
+        Print out the content to the file:
+
+        >>> with local.open_input_file(path) as f:
+        ...     print(f.readall())
+        b'data+newly added'
+        """
+    def normalize_path(self, path: str) -> str:
+        """
+        Normalize filesystem path.
+
+        Parameters
+        ----------
+        path : str
+            The path to normalize
+
+        Returns
+        -------
+        normalized_path : str
+            The normalized path
+        """
+
+class LocalFileSystem(FileSystem):
+    """
+    A FileSystem implementation accessing files on the local machine.
+
+    Details such as symlinks are abstracted away (symlinks are always followed,
+    except when deleting an entry).
+
+    Parameters
+    ----------
+    use_mmap : bool, default False
+        Whether open_input_stream and open_input_file should return
+        a mmap'ed file or a regular file.
+
+    Examples
+    --------
+    Create a FileSystem object with LocalFileSystem constructor:
+
+    >>> from pyarrow import fs
+    >>> local = fs.LocalFileSystem()
+    >>> local
+    <pyarrow._fs.LocalFileSystem object at ...>
+
+    and write data on to the file:
+
+    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
+    ...     stream.write(b"data")
+    4
+    >>> with local.open_input_stream("/tmp/local_fs.dat") as stream:
+    ...     print(stream.readall())
+    b'data'
+
+    Create a FileSystem object inferred from a URI of the saved file:
+
+    >>> local_new, path = fs.LocalFileSystem().from_uri("/tmp/local_fs.dat")
+    >>> local_new
+    <pyarrow._fs.LocalFileSystem object at ...
+    >>> path
+    '/tmp/local_fs.dat'
+
+    Check if FileSystems `local` and `local_new` are equal:
+
+    >>> local.equals(local_new)
+    True
+
+    Compare two different FileSystems:
+
+    >>> local2 = fs.LocalFileSystem(use_mmap=True)
+    >>> local.equals(local2)
+    False
+
+    Copy a file and print out the data:
+
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/local_fs-copy.dat")
+    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as stream:
+    ...     print(stream.readall())
+    b'data'
+
+    Open an output stream for appending, add text and print the new data:
+
+    >>> with local.open_append_stream("/tmp/local_fs-copy.dat") as f:
+    ...     f.write(b"+newly added")
+    12
+
+    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as f:
+    ...     print(f.readall())
+    b'data+newly added'
+
+    Create a directory, copy a file into it and then delete the whole directory:
+
+    >>> local.create_dir("/tmp/new_folder")
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
+    >>> local.delete_dir("/tmp/new_folder")
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.NotFound>
+
+    Create a directory, copy a file into it and then delete
+    the content of the directory:
+
+    >>> local.create_dir("/tmp/new_folder")
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
+    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
+    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.File, size=4>
+    >>> local.delete_dir_contents("/tmp/new_folder")
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
+    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
+    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
+
+    Create a directory, copy a file into it and then delete
+    the file from the directory:
+
+    >>> local.create_dir("/tmp/new_folder")
+    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
+    >>> local.delete_file("/tmp/new_folder/local_fs.dat")
+    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
+    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
+    >>> local.get_file_info("/tmp/new_folder")
+    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
+
+    Move the file:
+
+    >>> local.move("/tmp/local_fs-copy.dat", "/tmp/new_folder/local_fs-copy.dat")
+    >>> local.get_file_info("/tmp/new_folder/local_fs-copy.dat")
+    <FileInfo for '/tmp/new_folder/local_fs-copy.dat': type=FileType.File, size=16>
+    >>> local.get_file_info("/tmp/local_fs-copy.dat")
+    <FileInfo for '/tmp/local_fs-copy.dat': type=FileType.NotFound>
+
+    To finish delete the file left:
+    >>> local.delete_file("/tmp/local_fs.dat")
+    """
+
+    def __init__(self, *, use_mmap: bool = False) -> None: ...
+
+class SubTreeFileSystem(FileSystem):
+    """
+    Delegates to another implementation after prepending a fixed base path.
+
+    This is useful to expose a logical view of a subtree of a filesystem,
+    for example a directory in a LocalFileSystem.
+
+    Note, that this makes no security guarantee. For example, symlinks may
+    allow to "escape" the subtree and access other parts of the underlying
+    filesystem.
+
+    Parameters
+    ----------
+    base_path : str
+        The root of the subtree.
+    base_fs : FileSystem
+        FileSystem object the operations delegated to.
+
+    Examples
+    --------
+    Create a LocalFileSystem instance:
+
+    >>> from pyarrow import fs
+    >>> local = fs.LocalFileSystem()
+    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
+    ...     stream.write(b"data")
+    4
+
+    Create a directory and a SubTreeFileSystem instance:
+
+    >>> local.create_dir("/tmp/sub_tree")
+    >>> subtree = fs.SubTreeFileSystem("/tmp/sub_tree", local)
+
+    Write data into the existing file:
+
+    >>> with subtree.open_append_stream("sub_tree_fs.dat") as f:
+    ...     f.write(b"+newly added")
+    12
+
+    Print out the attributes:
+
+    >>> subtree.base_fs
+    <pyarrow._fs.LocalFileSystem object at ...>
+    >>> subtree.base_path
+    '/tmp/sub_tree/'
+
+    Get info for the given directory or given file:
+
+    >>> subtree.get_file_info("")
+    <FileInfo for '': type=FileType.Directory>
+    >>> subtree.get_file_info("sub_tree_fs.dat")
+    <FileInfo for 'sub_tree_fs.dat': type=FileType.File, size=12>
+
+    Delete the file and directory:
+
+    >>> subtree.delete_file("sub_tree_fs.dat")
+    >>> local.delete_dir("/tmp/sub_tree")
+    >>> local.delete_file("/tmp/local_fs.dat")
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+    def __init__(self, base_path: str, base_fs: FileSystem): ...
+    @property
+    def base_path(self) -> str: ...
+    @property
+    def base_fs(self) -> FileSystem: ...
+
+class _MockFileSystem(FileSystem):
+    def __init__(self, current_time: dt.datetime | None = None) -> None: ...
+
+class PyFileSystem(FileSystem):
+    """
+    A FileSystem with behavior implemented in Python.
+
+    Parameters
+    ----------
+    handler : FileSystemHandler
+        The handler object implementing custom filesystem behavior.
+
+    Examples
+    --------
+    Create an fsspec-based filesystem object for GitHub:
+
+    >>> from fsspec.implementations import github
+    >>> gfs = github.GithubFileSystem("apache", "arrow")  # doctest: +SKIP
+
+    Get a PyArrow FileSystem object:
+
+    >>> from pyarrow.fs import PyFileSystem, FSSpecHandler
+    >>> pa_fs = PyFileSystem(FSSpecHandler(gfs))  # doctest: +SKIP
+
+    Use :func:`~pyarrow.fs.FileSystem` functionality ``get_file_info()``:
+
+    >>> pa_fs.get_file_info("README.md")  # doctest: +SKIP
+    <FileInfo for 'README.md': type=FileType.File, size=...>
+    """
+    def __init__(self, handler: FileSystemHandler) -> None: ...
+    @property
+    def handler(self) -> FileSystemHandler:
+        """
+        The filesystem's underlying handler.
+
+        Returns
+        -------
+        handler : FileSystemHandler
+        """
+
+class FileSystemHandler(ABC):
+    """
+    An abstract class exposing methods to implement PyFileSystem's behavior.
+    """
+    @abstractmethod
+    def get_type_name(self) -> str:
+        """
+        Implement PyFileSystem.type_name.
+        """
+    @abstractmethod
+    def get_file_info(self, paths: str | list[str]) -> FileInfo | list[FileInfo]:
+        """
+        Implement PyFileSystem.get_file_info(paths).
+
+        Parameters
+        ----------
+        paths : list of str
+            paths for which we want to retrieve the info.
+        """
+    @abstractmethod
+    def get_file_info_selector(self, selector: FileSelector) -> list[FileInfo]:
+        """
+        Implement PyFileSystem.get_file_info(selector).
+
+        Parameters
+        ----------
+        selector : FileSelector
+            selector for which we want to retrieve the info.
+        """
+
+    @abstractmethod
+    def create_dir(self, path: str, recursive: bool) -> None:
+        """
+        Implement PyFileSystem.create_dir(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the directory.
+        recursive : bool
+            if the parent directories should be created too.
+        """
+    @abstractmethod
+    def delete_dir(self, path: str) -> None:
+        """
+        Implement PyFileSystem.delete_dir(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the directory.
+        """
+    @abstractmethod
+    def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None:
+        """
+        Implement PyFileSystem.delete_dir_contents(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the directory.
+        missing_dir_ok : bool
+            if False an error should be raised if path does not exist
+        """
+    @abstractmethod
+    def delete_root_dir_contents(self) -> None:
+        """
+        Implement PyFileSystem.delete_dir_contents("/", accept_root_dir=True).
+        """
+    @abstractmethod
+    def delete_file(self, path: str) -> None:
+        """
+        Implement PyFileSystem.delete_file(...).
+
+        Parameters
+        ----------
+        path : str
+            path of the file.
+        """
+    @abstractmethod
+    def move(self, src: str, dest: str) -> None:
+        """
+        Implement PyFileSystem.move(...).
+
+        Parameters
+        ----------
+        src : str
+            path of what should be moved.
+        dest : str
+            path of where it should be moved to.
+        """
+
+    @abstractmethod
+    def copy_file(self, src: str, dest: str) -> None:
+        """
+        Implement PyFileSystem.copy_file(...).
+
+        Parameters
+        ----------
+        src : str
+            path of what should be copied.
+        dest : str
+            path of where it should be copied to.
+        """
+    @abstractmethod
+    def open_input_stream(self, path: str) -> NativeFile:
+        """
+        Implement PyFileSystem.open_input_stream(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        """
+    @abstractmethod
+    def open_input_file(self, path: str) -> NativeFile:
+        """
+        Implement PyFileSystem.open_input_file(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        """
+    @abstractmethod
+    def open_output_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
+        """
+        Implement PyFileSystem.open_output_stream(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        metadata :  mapping
+            Mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+        """
+
+    @abstractmethod
+    def open_append_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
+        """
+        Implement PyFileSystem.open_append_stream(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be opened.
+        metadata :  mapping
+            Mapping of string keys to string values.
+            Some filesystems support storing metadata along the file
+            (such as "Content-Type").
+        """
+    @abstractmethod
+    def normalize_path(self, path: str) -> str:
+        """
+        Implement PyFileSystem.normalize_path(...).
+
+        Parameters
+        ----------
+        path : str
+            path of what should be normalized.
+        """
diff --git a/python/pyarrow-stubs/_gcsfs.pyi b/python/pyarrow-stubs/_gcsfs.pyi
new file mode 100644
index 00000000000..4fc7ea68e48
--- /dev/null
+++ b/python/pyarrow-stubs/_gcsfs.pyi
@@ -0,0 +1,83 @@
+import datetime as dt
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+class GcsFileSystem(FileSystem):
+    """
+    Google Cloud Storage (GCS) backed FileSystem implementation
+
+    By default uses the process described in https://google.aip.dev/auth/4110
+    to resolve credentials. If not running on Google Cloud Platform (GCP),
+    this generally requires the environment variable
+    GOOGLE_APPLICATION_CREDENTIALS to point to a JSON file
+    containing credentials.
+
+    Note: GCS buckets are special and the operations available on them may be
+    limited or more expensive than expected compared to local file systems.
+
+    Note: When pickling a GcsFileSystem that uses default credentials, resolution
+    credentials are not stored in the serialized data. Therefore, when unpickling
+    it is assumed that the necessary credentials are in place for the target
+    process.
+
+    Parameters
+    ----------
+    anonymous : boolean, default False
+        Whether to connect anonymously.
+        If true, will not attempt to look up credentials using standard GCP
+        configuration methods.
+    access_token : str, default None
+        GCP access token.  If provided, temporary credentials will be fetched by
+        assuming this role; also, a `credential_token_expiration` must be
+        specified as well.
+    target_service_account : str, default None
+        An optional service account to try to impersonate when accessing GCS. This
+        requires the specified credential user or service account to have the necessary
+        permissions.
+    credential_token_expiration : datetime, default None
+        Expiration for credential generated with an access token. Must be specified
+        if `access_token` is specified.
+    default_bucket_location : str, default 'US'
+        GCP region to create buckets in.
+    scheme : str, default 'https'
+        GCS connection transport scheme.
+    endpoint_override : str, default None
+        Override endpoint with a connect string such as "localhost:9000"
+    default_metadata : mapping or pyarrow.KeyValueMetadata, default None
+        Default metadata for `open_output_stream`.  This will be ignored if
+        non-empty metadata is passed to `open_output_stream`.
+    retry_time_limit : timedelta, default None
+        Set the maximum amount of time the GCS client will attempt to retry
+        transient errors. Subsecond granularity is ignored.
+    project_id : str, default None
+        The GCP project identifier to use for creating buckets.
+        If not set, the library uses the GOOGLE_CLOUD_PROJECT environment
+        variable. Most I/O operations do not need a project id, only applications
+        that create new buckets need a project id.
+    """
+
+    def __init__(
+        self,
+        *,
+        anonymous: bool = False,
+        access_token: str | None = None,
+        target_service_account: str | None = None,
+        credential_token_expiration: dt.datetime | None = None,
+        default_bucket_location: str = "US",
+        scheme: str = "https",
+        endpoint_override: str | None = None,
+        default_metadata: dict | KeyValueMetadata | None = None,
+        retry_time_limit: dt.timedelta | None = None,
+        project_id: str | None = None,
+    ): ...
+    @property
+    def default_bucket_location(self) -> str:
+        """
+        The GCP location this filesystem will write to.
+        """
+    @property
+    def project_id(self) -> str:
+        """
+        The GCP project id this filesystem will use.
+        """
diff --git a/python/pyarrow-stubs/_hdfs.pyi b/python/pyarrow-stubs/_hdfs.pyi
new file mode 100644
index 00000000000..200f669379b
--- /dev/null
+++ b/python/pyarrow-stubs/_hdfs.pyi
@@ -0,0 +1,75 @@
+from _typeshed import StrPath
+
+from ._fs import FileSystem
+
+class HadoopFileSystem(FileSystem):
+    """
+    HDFS backed FileSystem implementation
+
+    Parameters
+    ----------
+    host : str
+        HDFS host to connect to. Set to "default" for fs.defaultFS from
+        core-site.xml.
+    port : int, default 8020
+        HDFS port to connect to. Set to 0 for default or logical (HA) nodes.
+    user : str, default None
+        Username when connecting to HDFS; None implies login user.
+    replication : int, default 3
+        Number of copies each block will have.
+    buffer_size : int, default 0
+        If 0, no buffering will happen otherwise the size of the temporary read
+        and write buffer.
+    default_block_size : int, default None
+        None means the default configuration for HDFS, a typical block size is
+        128 MB.
+    kerb_ticket : string or path, default None
+        If not None, the path to the Kerberos ticket cache.
+    extra_conf : dict, default None
+        Extra key/value pairs for configuration; will override any
+        hdfs-site.xml properties.
+
+    Examples
+    --------
+    >>> from pyarrow import fs
+    >>> hdfs = fs.HadoopFileSystem(
+    ...     host, port, user=user, kerb_ticket=ticket_cache_path
+    ... )  # doctest: +SKIP
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+    def __init__(
+        self,
+        host: str,
+        port: int = 8020,
+        *,
+        user: str | None = None,
+        replication: int = 3,
+        buffer_size: int = 0,
+        default_block_size: int | None = None,
+        kerb_ticket: StrPath | None = None,
+        extra_conf: dict | None = None,
+    ): ...
+    @staticmethod
+    def from_uri(uri: str) -> HadoopFileSystem:  # type: ignore[override]
+        """
+        Instantiate HadoopFileSystem object from an URI string.
+
+        The following two calls are equivalent
+
+        * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
+&replication=1')``
+        * ``HadoopFileSystem('localhost', port=8020, user='test', \
+replication=1)``
+
+        Parameters
+        ----------
+        uri : str
+            A string URI describing the connection to HDFS.
+            In order to change the user, replication, buffer_size or
+            default_block_size pass the values as query parts.
+
+        Returns
+        -------
+        HadoopFileSystem
+        """
diff --git a/python/pyarrow-stubs/_json.pyi b/python/pyarrow-stubs/_json.pyi
new file mode 100644
index 00000000000..43d2ae83cd8
--- /dev/null
+++ b/python/pyarrow-stubs/_json.pyi
@@ -0,0 +1,169 @@
+from typing import IO, Any, Literal
+
+from _typeshed import StrPath
+
+from .lib import MemoryPool, RecordBatchReader, Schema, Table, _Weakrefable
+
+class ReadOptions(_Weakrefable):
+    """
+    Options for reading JSON files.
+
+    Parameters
+    ----------
+    use_threads : bool, optional (default True)
+        Whether to use multiple threads to accelerate reading
+    block_size : int, optional
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual chunks in the Table.
+    """
+
+    use_threads: bool
+    """
+    Whether to use multiple threads to accelerate reading.
+    """
+    block_size: int
+    """
+    How much bytes to process at a time from the input stream.
+
+    This will determine multi-threading granularity as well as the size of
+    individual chunks in the Table.
+    """
+    def __init__(self, use_threads: bool | None = None, block_size: int | None = None): ...
+    def equals(self, other: ReadOptions) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ReadOptions
+
+        Returns
+        -------
+        bool
+        """
+
+class ParseOptions(_Weakrefable):
+    """
+    Options for parsing JSON files.
+
+    Parameters
+    ----------
+    explicit_schema : Schema, optional (default None)
+        Optional explicit schema (no type inference, ignores other fields).
+    newlines_in_values : bool, optional (default False)
+        Whether objects may be printed across multiple lines (for example
+        pretty printed). If false, input must end with an empty line.
+    unexpected_field_behavior : str, default "infer"
+        How JSON fields outside of explicit_schema (if given) are treated.
+
+        Possible behaviors:
+
+         - "ignore": unexpected JSON fields are ignored
+         - "error": error out on unexpected JSON fields
+         - "infer": unexpected JSON fields are type-inferred and included in
+           the output
+    """
+
+    explicit_schema: Schema
+    """
+    Optional explicit schema (no type inference, ignores other fields)
+    """
+    newlines_in_values: bool
+    """
+    Whether newline characters are allowed in JSON values.
+    Setting this to True reduces the performance of multi-threaded
+    JSON reading.
+    """
+    unexpected_field_behavior: Literal["ignore", "error", "infer"]
+    """
+    How JSON fields outside of explicit_schema (if given) are treated.
+
+    Possible behaviors:
+
+        - "ignore": unexpected JSON fields are ignored
+        - "error": error out on unexpected JSON fields
+        - "infer": unexpected JSON fields are type-inferred and included in
+        the output
+
+    Set to "infer" by default.
+    """
+    def __init__(
+        self,
+        explicit_schema: Schema | None = None,
+        newlines_in_values: bool | None = None,
+        unexpected_field_behavior: Literal["ignore", "error", "infer"] = "infer",
+    ): ...
+    def equals(self, other: ParseOptions) -> bool:
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ParseOptions
+
+        Returns
+        -------
+        bool
+        """
+
+class JSONStreamingReader(RecordBatchReader):
+    """An object that reads record batches incrementally from a JSON file.
+
+    Should not be instantiated directly by user code.
+    """
+
+def read_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Table:
+    """
+    Read a Table from a stream of JSON data.
+
+    Parameters
+    ----------
+    input_file : str, path or file-like object
+        The location of JSON data. Currently only the line-delimited JSON
+        format is supported.
+    read_options : pyarrow.json.ReadOptions, optional
+        Options for the JSON reader (see ReadOptions constructor for defaults).
+    parse_options : pyarrow.json.ParseOptions, optional
+        Options for the JSON parser
+        (see ParseOptions constructor for defaults).
+    memory_pool : MemoryPool, optional
+        Pool to allocate Table memory from.
+
+    Returns
+    -------
+    :class:`pyarrow.Table`
+        Contents of the JSON file as a in-memory table.
+    """
+
+def open_json(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> JSONStreamingReader:
+    """
+    Open a streaming reader of JSON data.
+
+    Reading using this function is always single-threaded.
+
+    Parameters
+    ----------
+    input_file : string, path or file-like object
+        The location of JSON data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.json.ReadOptions, optional
+        Options for the JSON reader (see pyarrow.json.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.json.ParseOptions, optional
+        Options for the JSON parser
+        (see pyarrow.json.ParseOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate RecordBatch memory from
+
+    Returns
+    -------
+    :class:`pyarrow.json.JSONStreamingReader`
+    """
diff --git a/python/pyarrow-stubs/_orc.pyi b/python/pyarrow-stubs/_orc.pyi
new file mode 100644
index 00000000000..71bf0dde9ba
--- /dev/null
+++ b/python/pyarrow-stubs/_orc.pyi
@@ -0,0 +1,56 @@
+from typing import IO, Literal
+
+from .lib import (
+    Buffer,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+class ORCReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def open(self, source: str | NativeFile | Buffer, use_memory_map: bool = True): ...
+    def metadata(self) -> KeyValueMetadata: ...
+    def schema(self) -> Schema: ...
+    def nrows(self) -> int: ...
+    def nstripes(self) -> int: ...
+    def file_version(self) -> str: ...
+    def software_version(self) -> str: ...
+    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+    def compression_size(self) -> int: ...
+    def row_index_stride(self) -> int: ...
+    def writer(self) -> str: ...
+    def writer_version(self) -> str: ...
+    def nstripe_statistics(self) -> int: ...
+    def content_length(self) -> int: ...
+    def stripe_statistics_length(self) -> int: ...
+    def file_footer_length(self) -> int: ...
+    def file_postscript_length(self) -> int: ...
+    def file_length(self) -> int: ...
+    def serialized_file_tail(self) -> int: ...
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch: ...
+    def read(self, columns: list[str] | None = None) -> Table: ...
+
+class ORCWriter(_Weakrefable):
+    def open(
+        self,
+        where: str | NativeFile | IO,
+        *,
+        file_version: str | None = None,
+        batch_size: int | None = None,
+        stripe_size: int | None = None,
+        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] | None = None,
+        compression_block_size: int | None = None,
+        compression_strategy: Literal["COMPRESSION", "SPEED"] | None = None,
+        row_index_stride: int | None = None,
+        padding_tolerance: float | None = None,
+        dictionary_key_size_threshold: float | None = None,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float | None = None,
+    ) -> None: ...
+    def write(self, table: Table) -> None: ...
+    def close(self) -> None: ...
diff --git a/python/pyarrow-stubs/_parquet.pyi b/python/pyarrow-stubs/_parquet.pyi
new file mode 100644
index 00000000000..a9187df0428
--- /dev/null
+++ b/python/pyarrow-stubs/_parquet.pyi
@@ -0,0 +1,445 @@
+from typing import IO, Any, Iterable, Iterator, Literal, Sequence, TypeAlias, TypedDict
+
+from _typeshed import StrPath
+
+from ._stubs_typing import Order
+from .lib import (
+    Buffer,
+    ChunkedArray,
+    KeyValueMetadata,
+    MemoryPool,
+    NativeFile,
+    RecordBatch,
+    Schema,
+    Table,
+    _Weakrefable,
+)
+
+_PhysicalType: TypeAlias = Literal[
+    "BOOLEAN",
+    "INT32",
+    "INT64",
+    "INT96",
+    "FLOAT",
+    "DOUBLE",
+    "BYTE_ARRAY",
+    "FIXED_LEN_BYTE_ARRAY",
+    "UNKNOWN",
+]
+_LogicTypeName: TypeAlias = Literal[
+    "UNDEFINED",
+    "STRING",
+    "MAP",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME",
+    "TIMESTAMP",
+    "INT",
+    "FLOAT16",
+    "JSON",
+    "BSON",
+    "UUID",
+    "NONE",
+    "UNKNOWN",
+]
+_ConvertedType: TypeAlias = Literal[
+    "NONE",
+    "UTF8",
+    "MAP",
+    "MAP_KEY_VALUE",
+    "LIST",
+    "ENUM",
+    "DECIMAL",
+    "DATE",
+    "TIME_MILLIS",
+    "TIME_MICROS",
+    "TIMESTAMP_MILLIS",
+    "TIMESTAMP_MICROS",
+    "UINT_8",
+    "UINT_16",
+    "UINT_32",
+    "UINT_64",
+    "INT_8",
+    "INT_16",
+    "INT_32",
+    "INT_64",
+    "JSON",
+    "BSON",
+    "INTERVAL",
+    "UNKNOWN",
+]
+_Encoding: TypeAlias = Literal[
+    "PLAIN",
+    "PLAIN_DICTIONARY",
+    "RLE",
+    "BIT_PACKED",
+    "DELTA_BINARY_PACKED",
+    "DELTA_LENGTH_BYTE_ARRAY",
+    "DELTA_BYTE_ARRAY",
+    "RLE_DICTIONARY",
+    "BYTE_STREAM_SPLIT",
+    "UNKNOWN",
+]
+_Compression: TypeAlias = Literal[
+    "UNCOMPRESSED",
+    "SNAPPY",
+    "GZIP",
+    "LZO",
+    "BROTLI",
+    "LZ4",
+    "ZSTD",
+    "UNKNOWN",
+]
+
+class _Statistics(TypedDict):
+    has_min_max: bool
+    min: Any | None
+    max: Any | None
+    null_count: int | None
+    distinct_count: int | None
+    num_values: int
+    physical_type: _PhysicalType
+
+class Statistics(_Weakrefable):
+    def to_dict(self) -> _Statistics: ...
+    def equals(self, other: Statistics) -> bool: ...
+    @property
+    def has_min_max(self) -> bool: ...
+    @property
+    def hash_null_count(self) -> bool: ...
+    @property
+    def has_distinct_count(self) -> bool: ...
+    @property
+    def min_raw(self) -> Any | None: ...
+    @property
+    def max_raw(self) -> Any | None: ...
+    @property
+    def min(self) -> Any | None: ...
+    @property
+    def max(self) -> Any | None: ...
+    @property
+    def null_count(self) -> int | None: ...
+    @property
+    def distinct_count(self) -> int | None: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+
+class ParquetLogicalType(_Weakrefable):
+    def to_json(self) -> str: ...
+    @property
+    def type(self) -> _LogicTypeName: ...
+
+class _ColumnChunkMetaData(TypedDict):
+    file_offset: int
+    file_path: str | None
+    physical_type: _PhysicalType
+    num_values: int
+    path_in_schema: str
+    is_stats_set: bool
+    statistics: Statistics | None
+    compression: _Compression
+    encodings: tuple[_Encoding, ...]
+    has_dictionary_page: bool
+    dictionary_page_offset: int | None
+    data_page_offset: int
+    total_compressed_size: int
+    total_uncompressed_size: int
+
+class ColumnChunkMetaData(_Weakrefable):
+    def to_dict(self) -> _ColumnChunkMetaData: ...
+    def equals(self, other: ColumnChunkMetaData) -> bool: ...
+    @property
+    def file_offset(self) -> int: ...
+    @property
+    def file_path(self) -> str | None: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def num_values(self) -> int: ...
+    @property
+    def path_in_schema(self) -> str: ...
+    @property
+    def is_stats_set(self) -> bool: ...
+    @property
+    def statistics(self) -> Statistics | None: ...
+    @property
+    def compression(self) -> _Compression: ...
+    @property
+    def encodings(self) -> tuple[_Encoding, ...]: ...
+    @property
+    def has_dictionary_page(self) -> bool: ...
+    @property
+    def dictionary_page_offset(self) -> int | None: ...
+    @property
+    def data_page_offset(self) -> int: ...
+    @property
+    def has_index_page(self) -> bool: ...
+    @property
+    def index_page_offset(self) -> int: ...
+    @property
+    def total_compressed_size(self) -> int: ...
+    @property
+    def total_uncompressed_size(self) -> int: ...
+    @property
+    def has_offset_index(self) -> bool: ...
+    @property
+    def has_column_index(self) -> bool: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+
+class _SortingColumn(TypedDict):
+    column_index: int
+    descending: bool
+    nulls_first: bool
+
+class SortingColumn:
+    def __init__(
+        self, column_index: int, descending: bool = False, nulls_first: bool = False
+    ) -> None: ...
+    @classmethod
+    def from_ordering(
+        cls,
+        schema: Schema,
+        sort_keys: Sequence[tuple[str, Order]],
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> tuple[SortingColumn, ...]: ...
+    @staticmethod
+    def to_ordering(
+        schema: Schema, sorting_columns: tuple[SortingColumn, ...]
+    ) -> tuple[Sequence[tuple[str, Order]], Literal["at_start", "at_end"]]: ...
+    def __hash__(self) -> int: ...
+    @property
+    def column_index(self) -> int: ...
+    @property
+    def descending(self) -> bool: ...
+    @property
+    def nulls_first(self) -> bool: ...
+    def to_dict(self) -> _SortingColumn: ...
+
+class _RowGroupMetaData(TypedDict):
+    num_columns: int
+    num_rows: int
+    total_byte_size: int
+    columns: list[ColumnChunkMetaData]
+    sorting_columns: list[SortingColumn]
+
+class RowGroupMetaData(_Weakrefable):
+    def __init__(self, parent: FileMetaData, index: int) -> None: ...
+    def equals(self, other: RowGroupMetaData) -> bool: ...
+    def column(self, i: int) -> ColumnChunkMetaData: ...
+    def to_dict(self) -> _RowGroupMetaData: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def total_byte_size(self) -> int: ...
+    @property
+    def sorting_columns(self) -> list[SortingColumn]: ...
+
+class _FileMetaData(TypedDict):
+    created_by: str
+    num_columns: int
+    num_rows: int
+    num_row_groups: int
+    format_version: str
+    serialized_size: int
+
+class FileMetaData(_Weakrefable):
+    def __hash__(self) -> int: ...
+    def to_dict(self) -> _FileMetaData: ...
+    def equals(self, other: FileMetaData) -> bool: ...
+    @property
+    def schema(self) -> ParquetSchema: ...
+    @property
+    def serialized_size(self) -> int: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    @property
+    def format_version(self) -> str: ...
+    @property
+    def created_by(self) -> str: ...
+    @property
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+    def row_group(self, i: int) -> RowGroupMetaData: ...
+    def set_file_path(self, path: str) -> None: ...
+    def append_row_groups(self, other: FileMetaData) -> None: ...
+    def write_metadata_file(self, where: StrPath | Buffer | NativeFile | IO) -> None: ...
+
+class ParquetSchema(_Weakrefable):
+    def __init__(self, container: FileMetaData) -> None: ...
+    def __getitem__(self, i: int) -> ColumnChunkMetaData: ...
+    def __hash__(self) -> int: ...
+    def __len__(self) -> int: ...
+    @property
+    def names(self) -> list[str]: ...
+    def to_arrow_schema(self) -> Schema: ...
+    def equals(self, other: ParquetSchema) -> bool: ...
+    def column(self, i: int) -> ColumnSchema: ...
+
+class ColumnSchema(_Weakrefable):
+    def __init__(self, schema: ParquetSchema, index: int) -> None: ...
+    def equals(self, other: ColumnSchema) -> bool: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def path(self) -> str: ...
+    @property
+    def max_definition_level(self) -> int: ...
+    @property
+    def max_repetition_level(self) -> int: ...
+    @property
+    def physical_type(self) -> _PhysicalType: ...
+    @property
+    def logical_type(self) -> ParquetLogicalType: ...
+    @property
+    def converted_type(self) -> _ConvertedType | None: ...
+    @property
+    def length(self) -> int | None: ...
+    @property
+    def precision(self) -> int | None: ...
+    @property
+    def scale(self) -> int | None: ...
+
+class ParquetReader(_Weakrefable):
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def open(
+        self,
+        source: StrPath | NativeFile | IO,
+        *,
+        use_memory_map: bool = False,
+        read_dictionary: Iterable[int] | Iterable[str] | None = None,
+        metadata: FileMetaData | None = None,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    @property
+    def column_paths(self) -> list[str]: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def schema_arrow(self) -> Schema: ...
+    @property
+    def num_row_groups(self) -> int: ...
+    def set_use_threads(self, use_threads: bool) -> None: ...
+    def set_batch_size(self, batch_size: int) -> None: ...
+    def iter_batches(
+        self,
+        batch_size: int,
+        row_groups: list[int],
+        column_indices: list[int] | None = None,
+        use_threads: bool = True,
+    ) -> Iterator[RecordBatch]: ...
+    def read_row_group(
+        self, i: int, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+    def read_row_groups(
+        self,
+        row_groups: list[int],
+        column_indices: list[int] | None = None,
+        use_threads: bool = True,
+    ) -> Table: ...
+    def read_all(
+        self, column_indices: list[int] | None = None, use_threads: bool = True
+    ) -> Table: ...
+    def scan_contents(self, column_indices: list[int] | None = None, batch_size: int = 65536): ...
+    def column_name_idx(self, column_name: str) -> int: ...
+    def read_column(self, column_index: int) -> ChunkedArray: ...
+    def close(self) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+
+class ParquetWriter(_Weakrefable):
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        schema: Schema,
+        use_dictionary: bool | list[str] | None = None,
+        compression: _Compression | dict[str, _Compression] | None = None,
+        version: str | None = None,
+        write_statistics: bool | list[str] | None = None,
+        memory_pool: MemoryPool | None = None,
+        use_deprecated_int96_timestamps: bool = False,
+        coerce_timestamps: Literal["ms", "us"] | None = None,
+        data_page_size: int | None = None,
+        allow_truncated_timestamps: bool = False,
+        compression_level: int | dict[str, int] | None = None,
+        use_byte_stream_split: bool | list[str] = False,
+        column_encoding: _Encoding | dict[str, _Encoding] | None = None,
+        writer_engine_version: str | None = None,
+        data_page_version: str | None = None,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileDecryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: tuple[SortingColumn, ...] | None = None,
+        store_decimal_as_integer: bool = False,
+    ): ...
+    def close(self) -> None: ...
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
+    def add_key_value_metadata(self, key_value_metadata: KeyValueMetadata) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData: ...
+    @property
+    def use_dictionary(self) -> bool | list[str] | None: ...
+    @property
+    def use_deprecated_int96_timestamps(self) -> bool: ...
+    @property
+    def use_byte_stream_split(self) -> bool | list[str]: ...
+    @property
+    def column_encoding(self) -> _Encoding | dict[str, _Encoding] | None: ...
+    @property
+    def coerce_timestamps(self) -> Literal["ms", "us"] | None: ...
+    @property
+    def allow_truncated_timestamps(self) -> bool: ...
+    @property
+    def compression(self) -> _Compression | dict[str, _Compression] | None: ...
+    @property
+    def compression_level(self) -> int | dict[str, int] | None: ...
+    @property
+    def data_page_version(self) -> str | None: ...
+    @property
+    def use_compliant_nested_type(self) -> bool: ...
+    @property
+    def version(self) -> str | None: ...
+    @property
+    def write_statistics(self) -> bool | list[str] | None: ...
+    @property
+    def writer_engine_version(self) -> str: ...
+    @property
+    def row_group_size(self) -> int: ...
+    @property
+    def data_page_size(self) -> int: ...
+    @property
+    def encryption_properties(self) -> FileDecryptionProperties: ...
+    @property
+    def write_batch_size(self) -> int: ...
+    @property
+    def dictionary_pagesize_limit(self) -> int: ...
+    @property
+    def store_schema(self) -> bool: ...
+    @property
+    def store_decimal_as_integer(self) -> bool: ...
+
+class FileEncryptionProperties: ...
+class FileDecryptionProperties: ...
diff --git a/python/pyarrow-stubs/_parquet_encryption.pyi b/python/pyarrow-stubs/_parquet_encryption.pyi
new file mode 100644
index 00000000000..c707edb844a
--- /dev/null
+++ b/python/pyarrow-stubs/_parquet_encryption.pyi
@@ -0,0 +1,67 @@
+import datetime as dt
+
+from typing import Callable
+
+from ._parquet import FileDecryptionProperties, FileEncryptionProperties
+from .lib import _Weakrefable
+
+class EncryptionConfiguration(_Weakrefable):
+    footer_key: str
+    column_keys: dict[str, list[str]]
+    encryption_algorithm: str
+    plaintext_footer: bool
+    double_wrapping: bool
+    cache_lifetime: dt.timedelta
+    internal_key_material: bool
+    data_key_length_bits: int
+
+    def __init__(
+        self,
+        footer_key: str,
+        *,
+        column_keys: dict[str, str | list[str]] | None = None,
+        encryption_algorithm: str | None = None,
+        plaintext_footer: bool | None = None,
+        double_wrapping: bool | None = None,
+        cache_lifetime: dt.timedelta | None = None,
+        internal_key_material: bool | None = None,
+        data_key_length_bits: int | None = None,
+    ) -> None: ...
+
+class DecryptionConfiguration(_Weakrefable):
+    cache_lifetime: dt.timedelta
+    def __init__(self, *, cache_lifetime: dt.timedelta | None = None): ...
+
+class KmsConnectionConfig(_Weakrefable):
+    kms_instance_id: str
+    kms_instance_url: str
+    key_access_token: str
+    custom_kms_conf: dict[str, str]
+    def __init__(
+        self,
+        *,
+        kms_instance_id: str | None = None,
+        kms_instance_url: str | None = None,
+        key_access_token: str | None = None,
+        custom_kms_conf: dict[str, str] | None = None,
+    ) -> None: ...
+    def refresh_key_access_token(self, value: str) -> None: ...
+
+class KmsClient(_Weakrefable):
+    def wrap_key(self, key_bytes: bytes, master_key_identifier: str) -> str: ...
+    def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> str: ...
+
+class CryptoFactory(_Weakrefable):
+    def __init__(self, kms_client_factory: Callable[[KmsConnectionConfig], KmsClient]): ...
+    def file_encryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        encryption_config: EncryptionConfiguration,
+    ) -> FileEncryptionProperties: ...
+    def file_decryption_properties(
+        self,
+        kms_connection_config: KmsConnectionConfig,
+        decryption_config: DecryptionConfiguration | None = None,
+    ) -> FileDecryptionProperties: ...
+    def remove_cache_entries_for_token(self, access_token: str) -> None: ...
+    def remove_cache_entries_for_all_tokens(self) -> None: ...
diff --git a/python/pyarrow-stubs/_s3fs.pyi b/python/pyarrow-stubs/_s3fs.pyi
new file mode 100644
index 00000000000..50f63cd7e32
--- /dev/null
+++ b/python/pyarrow-stubs/_s3fs.pyi
@@ -0,0 +1,75 @@
+import enum
+
+from typing import Literal, TypedDict
+from typing_extensions import Required, NotRequired
+
+from ._fs import FileSystem
+from .lib import KeyValueMetadata
+
+class _ProxyOptions(TypedDict):
+    schema: Required[Literal["http", "https"]]
+    host: Required[str]
+    port: Required[int]
+    username: NotRequired[str]
+    password: NotRequired[str]
+
+class S3LogLevel(enum.IntEnum):
+    Off = enum.auto()
+    Fatal = enum.auto()
+    Error = enum.auto()
+    Warn = enum.auto()
+    Info = enum.auto()
+    Debug = enum.auto()
+    Trace = enum.auto()
+
+Off = S3LogLevel.Off
+Fatal = S3LogLevel.Fatal
+Error = S3LogLevel.Error
+Warn = S3LogLevel.Warn
+Info = S3LogLevel.Info
+Debug = S3LogLevel.Debug
+Trace = S3LogLevel.Trace
+
+def initialize_s3(
+    log_level: S3LogLevel = S3LogLevel.Fatal, num_event_loop_threads: int = 1
+) -> None: ...
+def ensure_s3_initialized() -> None: ...
+def finalize_s3() -> None: ...
+def ensure_s3_finalized() -> None: ...
+def resolve_s3_region(bucket: str) -> str: ...
+
+class S3RetryStrategy:
+    max_attempts: int
+    def __init__(self, max_attempts=3) -> None: ...
+
+class AwsStandardS3RetryStrategy(S3RetryStrategy): ...
+class AwsDefaultS3RetryStrategy(S3RetryStrategy): ...
+
+class S3FileSystem(FileSystem):
+    def __init__(
+        self,
+        *,
+        access_key: str | None = None,
+        secret_key: str | None = None,
+        session_token: str | None = None,
+        anonymous: bool = False,
+        region: str | None = None,
+        request_timeout: float | None = None,
+        connect_timeout: float | None = None,
+        scheme: Literal["http", "https"] = "https",
+        endpoint_override: str | None = None,
+        background_writes: bool = True,
+        default_metadata: dict | KeyValueMetadata | None = None,
+        role_arn: str | None = None,
+        session_name: str | None = None,
+        external_id: str | None = None,
+        load_frequency: int = 900,
+        proxy_options: _ProxyOptions | str | None = None,
+        allow_bucket_creation: bool = False,
+        allow_bucket_deletion: bool = False,
+        check_directory_existence_before_creation: bool = False,
+        retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3),
+        force_virtual_addressing: bool = False,
+    ): ...
+    @property
+    def region(self) -> str: ...
diff --git a/python/pyarrow-stubs/_substrait.pyi b/python/pyarrow-stubs/_substrait.pyi
new file mode 100644
index 00000000000..ff226e9521b
--- /dev/null
+++ b/python/pyarrow-stubs/_substrait.pyi
@@ -0,0 +1,39 @@
+from typing import Any, Callable
+
+from ._compute import Expression
+from .lib import Buffer, RecordBatchReader, Schema, Table, _Weakrefable
+
+def run_query(
+    plan: Buffer | int,
+    *,
+    table_provider: Callable[[list[str], Schema], Table] | None = None,
+    use_threads: bool = True,
+) -> RecordBatchReader: ...
+def _parse_json_plan(plan: bytes) -> Buffer: ...
+
+class SubstraitSchema:
+    schema: Schema
+    expression: Expression
+    def __init__(self, schema: Schema, expression: Expression) -> None: ...
+    def to_pysubstrait(self) -> Any: ...
+
+def serialize_schema(schema: Schema) -> SubstraitSchema: ...
+def deserialize_schema(buf: Buffer | bytes) -> Schema: ...
+def serialize_expressions(
+    exprs: list[Expression],
+    names: list[str],
+    schema: Schema,
+    *,
+    allow_arrow_extensions: bool = False,
+) -> Buffer: ...
+
+class BoundExpressions(_Weakrefable):
+    @property
+    def schema(self) -> Schema: ...
+    @property
+    def expressions(self) -> dict[str, Expression]: ...
+    @classmethod
+    def from_substrait(cls, message: Buffer | bytes) -> BoundExpressions: ...
+
+def deserialize_expressions(buf: Buffer | bytes) -> BoundExpressions: ...
+def get_supported_functions() -> list[str]: ...
diff --git a/python/pyarrow-stubs/acero.pyi b/python/pyarrow-stubs/acero.pyi
new file mode 100644
index 00000000000..8a520bdc24a
--- /dev/null
+++ b/python/pyarrow-stubs/acero.pyi
@@ -0,0 +1,85 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+from typing import Literal
+
+from . import lib
+from .compute import Expression, FunctionOptions
+
+_StrOrExpr: TypeAlias = str | Expression
+
+class Declaration(lib._Weakrefable):
+    def __init__(
+        self,
+        factory_name: str,
+        options: ExecNodeOptions,
+        inputs: list[Declaration] | None = None,
+    ) -> None: ...
+    @classmethod
+    def from_sequence(cls, decls: list[Declaration]) -> Self: ...
+    def to_reader(self, use_threads: bool = True) -> lib.RecordBatchReader: ...
+    def to_table(self, use_threads: bool = True) -> lib.Table: ...
+
+class ExecNodeOptions(lib._Weakrefable): ...
+
+class TableSourceNodeOptions(ExecNodeOptions):
+    def __init__(self, table: lib.Table) -> None: ...
+
+class FilterNodeOptions(ExecNodeOptions):
+    def __init__(self, filter_expression: Expression) -> None: ...
+
+class ProjectNodeOptions(ExecNodeOptions):
+    def __init__(self, expressions: list[Expression], names: list[str] | None = None) -> None: ...
+
+class AggregateNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        aggregates: list[tuple[list[str], str, FunctionOptions, str]],
+        keys: list[_StrOrExpr] | None = None,
+    ) -> None: ...
+
+class OrderByNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        sort_keys: tuple[tuple[str, Literal["ascending", "descending"]], ...] = (),
+        *,
+        null_placement: Literal["at_start", "at_end"] = "at_end",
+    ) -> None: ...
+
+class HashJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        join_type: Literal[
+            "left semi",
+            "right semi",
+            "left anti",
+            "right anti",
+            "inner",
+            "left outer",
+            "right outer",
+            "full outer",
+        ],
+        left_keys: _StrOrExpr | list[_StrOrExpr],
+        right_keys: _StrOrExpr | list[_StrOrExpr],
+        left_output: list[_StrOrExpr] | None = None,
+        right_output: list[_StrOrExpr] | None = None,
+        output_suffix_for_left: str = "",
+        output_suffix_for_right: str = "",
+    ) -> None: ...
+
+class AsofJoinNodeOptions(ExecNodeOptions):
+    def __init__(
+        self,
+        left_on: _StrOrExpr,
+        left_by: _StrOrExpr | list[_StrOrExpr],
+        right_on: _StrOrExpr,
+        right_by: _StrOrExpr | list[_StrOrExpr],
+        tolerance: int,
+    ) -> None: ...
diff --git a/python/pyarrow-stubs/builder.pyi b/python/pyarrow-stubs/builder.pyi
new file mode 100644
index 00000000000..4a0e9ca4708
--- /dev/null
+++ b/python/pyarrow-stubs/builder.pyi
@@ -0,0 +1,89 @@
+from typing import Iterable
+
+from pyarrow.lib import MemoryPool, _Weakrefable
+
+from .array import StringArray, StringViewArray
+
+class StringBuilder(_Weakrefable):
+    """
+    Builder class for UTF8 strings.
+
+    This class exposes facilities for incrementally adding string values and
+    building the null bitmap for a pyarrow.Array (type='string').
+    """
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | None):
+        """
+        Append a single value to the builder.
+
+        The value can either be a string/bytes object or a null value
+        (np.nan or None).
+
+        Parameters
+        ----------
+        value : string/bytes or np.nan/None
+            The value to append to the string array builder.
+        """
+    def append_values(self, values: Iterable[str | bytes | None]):
+        """
+        Append all the values from an iterable.
+
+        Parameters
+        ----------
+        values : iterable of string/bytes or np.nan/None values
+            The values to append to the string array builder.
+        """
+    def finish(self) -> StringArray:
+        """
+        Return result of builder as an Array object; also resets the builder.
+
+        Returns
+        -------
+        array : pyarrow.Array
+        """
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+class StringViewBuilder(_Weakrefable):
+    """
+    Builder class for UTF8 string views.
+
+    This class exposes facilities for incrementally adding string values and
+    building the null bitmap for a pyarrow.Array (type='string_view').
+    """
+    def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+    def append(self, value: str | bytes | None):
+        """
+        Append a single value to the builder.
+
+        The value can either be a string/bytes object or a null value
+        (np.nan or None).
+
+        Parameters
+        ----------
+        value : string/bytes or np.nan/None
+            The value to append to the string array builder.
+        """
+    def append_values(self, values: Iterable[str | bytes | None]):
+        """
+        Append all the values from an iterable.
+
+        Parameters
+        ----------
+        values : iterable of string/bytes or np.nan/None values
+            The values to append to the string array builder.
+        """
+    def finish(self) -> StringViewArray:
+        """
+        Return result of builder as an Array object; also resets the builder.
+
+        Returns
+        -------
+        array : pyarrow.Array
+        """
+    @property
+    def null_count(self) -> int: ...
+    def __len__(self) -> int: ...
+
+__all__ = ["StringBuilder", "StringViewBuilder"]
diff --git a/python/pyarrow-stubs/cffi.pyi b/python/pyarrow-stubs/cffi.pyi
new file mode 100644
index 00000000000..2ae945c5974
--- /dev/null
+++ b/python/pyarrow-stubs/cffi.pyi
@@ -0,0 +1,4 @@
+import cffi
+
+c_source: str
+ffi: cffi.FFI
diff --git a/python/pyarrow-stubs/compat.pyi b/python/pyarrow-stubs/compat.pyi
new file mode 100644
index 00000000000..2ea013555c0
--- /dev/null
+++ b/python/pyarrow-stubs/compat.pyi
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+def encode_file_path(path: str | bytes) -> bytes: ...
+def tobytes(o: str | bytes) -> bytes: ...
+def frombytes(o: bytes, *, safe: bool = False): ...
+
+__all__ = ["encode_file_path", "tobytes", "frombytes"]
diff --git a/python/pyarrow-stubs/csv.pyi b/python/pyarrow-stubs/csv.pyi
new file mode 100644
index 00000000000..510229d7e72
--- /dev/null
+++ b/python/pyarrow-stubs/csv.pyi
@@ -0,0 +1,27 @@
+from pyarrow._csv import (
+    ISO8601,
+    ConvertOptions,
+    CSVStreamingReader,
+    CSVWriter,
+    InvalidRow,
+    ParseOptions,
+    ReadOptions,
+    WriteOptions,
+    open_csv,
+    read_csv,
+    write_csv,
+)
+
+__all__ = [
+    "ISO8601",
+    "ConvertOptions",
+    "CSVStreamingReader",
+    "CSVWriter",
+    "InvalidRow",
+    "ParseOptions",
+    "ReadOptions",
+    "WriteOptions",
+    "open_csv",
+    "read_csv",
+    "write_csv",
+]
diff --git a/python/pyarrow-stubs/cuda.pyi b/python/pyarrow-stubs/cuda.pyi
new file mode 100644
index 00000000000..e11baf7d4e7
--- /dev/null
+++ b/python/pyarrow-stubs/cuda.pyi
@@ -0,0 +1,25 @@
+from pyarrow._cuda import (
+    BufferReader,
+    BufferWriter,
+    Context,
+    CudaBuffer,
+    HostBuffer,
+    IpcMemHandle,
+    new_host_buffer,
+    read_message,
+    read_record_batch,
+    serialize_record_batch,
+)
+
+__all__ = [
+    "BufferReader",
+    "BufferWriter",
+    "Context",
+    "CudaBuffer",
+    "HostBuffer",
+    "IpcMemHandle",
+    "new_host_buffer",
+    "read_message",
+    "read_record_batch",
+    "serialize_record_batch",
+]
diff --git a/python/pyarrow-stubs/dataset.pyi b/python/pyarrow-stubs/dataset.pyi
index a145437bb52..98f1a38aa85 100644
--- a/python/pyarrow-stubs/dataset.pyi
+++ b/python/pyarrow-stubs/dataset.pyi
@@ -1,229 +1,229 @@
-# from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
-#
-# from _typeshed import StrPath
-# from pyarrow._dataset import (
-#     CsvFileFormat,
-#     CsvFragmentScanOptions,
-#     Dataset,
-#     DatasetFactory,
-#     DirectoryPartitioning,
-#     FeatherFileFormat,
-#     FileFormat,
-#     FileFragment,
-#     FilenamePartitioning,
-#     FileSystemDataset,
-#     FileSystemDatasetFactory,
-#     FileSystemFactoryOptions,
-#     FileWriteOptions,
-#     Fragment,
-#     FragmentScanOptions,
-#     HivePartitioning,
-#     InMemoryDataset,
-#     IpcFileFormat,
-#     IpcFileWriteOptions,
-#     JsonFileFormat,
-#     JsonFragmentScanOptions,
-#     Partitioning,
-#     PartitioningFactory,
-#     Scanner,
-#     TaggedRecordBatch,
-#     UnionDataset,
-#     UnionDatasetFactory,
-#     WrittenFile,
-#     get_partition_keys,
-# )
-# from pyarrow._dataset_orc import OrcFileFormat
-# from pyarrow._dataset_parquet import (
-#     ParquetDatasetFactory,
-#     ParquetFactoryOptions,
-#     ParquetFileFormat,
-#     ParquetFileFragment,
-#     ParquetFileWriteOptions,
-#     ParquetFragmentScanOptions,
-#     ParquetReadOptions,
-#     RowGroupInfo,
-# )
-# from pyarrow._dataset_parquet_encryption import (
-#     ParquetDecryptionConfig,
-#     ParquetEncryptionConfig,
-# )
-# from pyarrow.compute import Expression, field, scalar
-# from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
-#
-# from ._fs import SupportedFileSystem
-#
-# _orc_available: bool
-# _parquet_available: bool
-#
-# __all__ = [
-#     "CsvFileFormat",
-#     "CsvFragmentScanOptions",
-#     "Dataset",
-#     "DatasetFactory",
-#     "DirectoryPartitioning",
-#     "FeatherFileFormat",
-#     "FileFormat",
-#     "FileFragment",
-#     "FilenamePartitioning",
-#     "FileSystemDataset",
-#     "FileSystemDatasetFactory",
-#     "FileSystemFactoryOptions",
-#     "FileWriteOptions",
-#     "Fragment",
-#     "FragmentScanOptions",
-#     "HivePartitioning",
-#     "InMemoryDataset",
-#     "IpcFileFormat",
-#     "IpcFileWriteOptions",
-#     "JsonFileFormat",
-#     "JsonFragmentScanOptions",
-#     "Partitioning",
-#     "PartitioningFactory",
-#     "Scanner",
-#     "TaggedRecordBatch",
-#     "UnionDataset",
-#     "UnionDatasetFactory",
-#     "WrittenFile",
-#     "get_partition_keys",
-#     # Orc
-#     "OrcFileFormat",
-#     # Parquet
-#     "ParquetDatasetFactory",
-#     "ParquetFactoryOptions",
-#     "ParquetFileFormat",
-#     "ParquetFileFragment",
-#     "ParquetFileWriteOptions",
-#     "ParquetFragmentScanOptions",
-#     "ParquetReadOptions",
-#     "RowGroupInfo",
-#     # Parquet Encryption
-#     "ParquetDecryptionConfig",
-#     "ParquetEncryptionConfig",
-#     # Compute
-#     "Expression",
-#     "field",
-#     "scalar",
-#     # Dataset
-#     "partitioning",
-#     "parquet_dataset",
-#     "write_dataset",
-# ]
-#
-# _DatasetFormat: TypeAlias = Literal["parquet", "ipc", "arrow", "feather", "csv"]
-#
-# @overload
-# def partitioning(
-#     schema: Schema,
-# ) -> Partitioning: ...
-# @overload
-# def partitioning(
-#     schema: Schema,
-#     *,
-#     flavor: Literal["filename"],
-#     dictionaries: dict[str, Array] | None = None,
-# ) -> Partitioning: ...
-# @overload
-# def partitioning(
-#     schema: Schema,
-#     *,
-#     flavor: Literal["filename"],
-#     dictionaries: Literal["infer"],
-# ) -> PartitioningFactory: ...
-# @overload
-# def partitioning(
-#     field_names: list[str],
-#     *,
-#     flavor: Literal["filename"],
-# ) -> PartitioningFactory: ...
-# @overload
-# def partitioning(
-#     schema: Schema,
-#     *,
-#     flavor: Literal["hive"],
-#     dictionaries: Literal["infer"],
-# ) -> PartitioningFactory: ...
-# @overload
-# def partitioning(
-#     *,
-#     flavor: Literal["hive"],
-# ) -> PartitioningFactory: ...
-# @overload
-# def partitioning(
-#     schema: Schema,
-#     *,
-#     flavor: Literal["hive"],
-#     dictionaries: dict[str, Array] | None = None,
-# ) -> Partitioning: ...
-# def parquet_dataset(
-#     metadata_path: StrPath,
-#     schema: Schema | None = None,
-#     filesystem: SupportedFileSystem | None = None,
-#     format: ParquetFileFormat | None = None,
-#     partitioning: Partitioning | PartitioningFactory | None = None,
-#     partition_base_dir: str | None = None,
-# ) -> FileSystemDataset: ...
-# @overload
-# def dataset(
-#     source: StrPath | Sequence[StrPath],
-#     schema: Schema | None = None,
-#     format: FileFormat | _DatasetFormat | None = None,
-#     filesystem: SupportedFileSystem | str | None = None,
-#     partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
-#     partition_base_dir: str | None = None,
-#     exclude_invalid_files: bool | None = None,
-#     ignore_prefixes: list[str] | None = None,
-# ) -> FileSystemDataset: ...
-# @overload
-# def dataset(
-#     source: list[Dataset],
-#     schema: Schema | None = None,
-#     format: FileFormat | _DatasetFormat | None = None,
-#     filesystem: SupportedFileSystem | str | None = None,
-#     partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
-#     partition_base_dir: str | None = None,
-#     exclude_invalid_files: bool | None = None,
-#     ignore_prefixes: list[str] | None = None,
-# ) -> UnionDataset: ...
-# @overload
-# def dataset(
-#     source: Iterable[RecordBatch] | Iterable[Table] | RecordBatchReader,
-#     schema: Schema | None = None,
-#     format: FileFormat | _DatasetFormat | None = None,
-#     filesystem: SupportedFileSystem | str | None = None,
-#     partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
-#     partition_base_dir: str | None = None,
-#     exclude_invalid_files: bool | None = None,
-#     ignore_prefixes: list[str] | None = None,
-# ) -> InMemoryDataset: ...
-# @overload
-# def dataset(
-#     source: RecordBatch | Table,
-#     schema: Schema | None = None,
-#     format: FileFormat | _DatasetFormat | None = None,
-#     filesystem: SupportedFileSystem | str | None = None,
-#     partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
-#     partition_base_dir: str | None = None,
-#     exclude_invalid_files: bool | None = None,
-#     ignore_prefixes: list[str] | None = None,
-# ) -> InMemoryDataset: ...
-# def write_dataset(
-#     data: Dataset | Table | RecordBatch | RecordBatchReader | list[Table] | Iterable[RecordBatch],
-#     base_dir: StrPath,
-#     *,
-#     basename_template: str | None = None,
-#     format: FileFormat | _DatasetFormat | None = None,
-#     partitioning: Partitioning | list[str] | None = None,
-#     partitioning_flavor: str | None = None,
-#     schema: Schema | None = None,
-#     filesystem: SupportedFileSystem | None = None,
-#     file_options: FileWriteOptions | None = None,
-#     use_threads: bool = True,
-#     max_partitions: int = 1024,
-#     max_open_files: int = 1024,
-#     max_rows_per_file: int = 0,
-#     min_rows_per_group: int = 0,
-#     max_rows_per_group: int = 1024 * 1024,
-#     file_visitor: Callable[[str], None] | None = None,
-#     existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"] = "error",
-#     create_dir: bool = True,
-# ): ...
+from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
+
+from _typeshed import StrPath
+from pyarrow._dataset import (
+    CsvFileFormat,
+    CsvFragmentScanOptions,
+    Dataset,
+    DatasetFactory,
+    DirectoryPartitioning,
+    FeatherFileFormat,
+    FileFormat,
+    FileFragment,
+    FilenamePartitioning,
+    FileSystemDataset,
+    FileSystemDatasetFactory,
+    FileSystemFactoryOptions,
+    FileWriteOptions,
+    Fragment,
+    FragmentScanOptions,
+    HivePartitioning,
+    InMemoryDataset,
+    IpcFileFormat,
+    IpcFileWriteOptions,
+    JsonFileFormat,
+    JsonFragmentScanOptions,
+    Partitioning,
+    PartitioningFactory,
+    Scanner,
+    TaggedRecordBatch,
+    UnionDataset,
+    UnionDatasetFactory,
+    WrittenFile,
+    get_partition_keys,
+)
+from pyarrow._dataset_orc import OrcFileFormat
+from pyarrow._dataset_parquet import (
+    ParquetDatasetFactory,
+    ParquetFactoryOptions,
+    ParquetFileFormat,
+    ParquetFileFragment,
+    ParquetFileWriteOptions,
+    ParquetFragmentScanOptions,
+    ParquetReadOptions,
+    RowGroupInfo,
+)
+from pyarrow._dataset_parquet_encryption import (
+    ParquetDecryptionConfig,
+    ParquetEncryptionConfig,
+)
+from pyarrow.compute import Expression, field, scalar
+from pyarrow.lib import Array, RecordBatch, RecordBatchReader, Schema, Table
+
+from ._fs import SupportedFileSystem
+
+_orc_available: bool
+_parquet_available: bool
+
+__all__ = [
+    "CsvFileFormat",
+    "CsvFragmentScanOptions",
+    "Dataset",
+    "DatasetFactory",
+    "DirectoryPartitioning",
+    "FeatherFileFormat",
+    "FileFormat",
+    "FileFragment",
+    "FilenamePartitioning",
+    "FileSystemDataset",
+    "FileSystemDatasetFactory",
+    "FileSystemFactoryOptions",
+    "FileWriteOptions",
+    "Fragment",
+    "FragmentScanOptions",
+    "HivePartitioning",
+    "InMemoryDataset",
+    "IpcFileFormat",
+    "IpcFileWriteOptions",
+    "JsonFileFormat",
+    "JsonFragmentScanOptions",
+    "Partitioning",
+    "PartitioningFactory",
+    "Scanner",
+    "TaggedRecordBatch",
+    "UnionDataset",
+    "UnionDatasetFactory",
+    "WrittenFile",
+    "get_partition_keys",
+    # Orc
+    "OrcFileFormat",
+    # Parquet
+    "ParquetDatasetFactory",
+    "ParquetFactoryOptions",
+    "ParquetFileFormat",
+    "ParquetFileFragment",
+    "ParquetFileWriteOptions",
+    "ParquetFragmentScanOptions",
+    "ParquetReadOptions",
+    "RowGroupInfo",
+    # Parquet Encryption
+    "ParquetDecryptionConfig",
+    "ParquetEncryptionConfig",
+    # Compute
+    "Expression",
+    "field",
+    "scalar",
+    # Dataset
+    "partitioning",
+    "parquet_dataset",
+    "write_dataset",
+]
+
+_DatasetFormat: TypeAlias = Literal["parquet", "ipc", "arrow", "feather", "csv"]
+
+@overload
+def partitioning(
+    schema: Schema,
+) -> Partitioning: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["filename"],
+    dictionaries: dict[str, Array] | None = None,
+) -> Partitioning: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["filename"],
+    dictionaries: Literal["infer"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    field_names: list[str],
+    *,
+    flavor: Literal["filename"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["hive"],
+    dictionaries: Literal["infer"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    *,
+    flavor: Literal["hive"],
+) -> PartitioningFactory: ...
+@overload
+def partitioning(
+    schema: Schema,
+    *,
+    flavor: Literal["hive"],
+    dictionaries: dict[str, Array] | None = None,
+) -> Partitioning: ...
+def parquet_dataset(
+    metadata_path: StrPath,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    format: ParquetFileFormat | None = None,
+    partitioning: Partitioning | PartitioningFactory | None = None,
+    partition_base_dir: str | None = None,
+) -> FileSystemDataset: ...
+@overload
+def dataset(
+    source: StrPath | Sequence[StrPath],
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> FileSystemDataset: ...
+@overload
+def dataset(
+    source: list[Dataset],
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> UnionDataset: ...
+@overload
+def dataset(
+    source: Iterable[RecordBatch] | Iterable[Table] | RecordBatchReader,
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> InMemoryDataset: ...
+@overload
+def dataset(
+    source: RecordBatch | Table,
+    schema: Schema | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    filesystem: SupportedFileSystem | str | None = None,
+    partitioning: Partitioning | PartitioningFactory | str | list[str] | None = None,
+    partition_base_dir: str | None = None,
+    exclude_invalid_files: bool | None = None,
+    ignore_prefixes: list[str] | None = None,
+) -> InMemoryDataset: ...
+def write_dataset(
+    data: Dataset | Table | RecordBatch | RecordBatchReader | list[Table] | Iterable[RecordBatch],
+    base_dir: StrPath,
+    *,
+    basename_template: str | None = None,
+    format: FileFormat | _DatasetFormat | None = None,
+    partitioning: Partitioning | list[str] | None = None,
+    partitioning_flavor: str | None = None,
+    schema: Schema | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    file_options: FileWriteOptions | None = None,
+    use_threads: bool = True,
+    max_partitions: int = 1024,
+    max_open_files: int = 1024,
+    max_rows_per_file: int = 0,
+    min_rows_per_group: int = 0,
+    max_rows_per_group: int = 1024 * 1024,
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"] = "error",
+    create_dir: bool = True,
+): ...
diff --git a/python/pyarrow-stubs/feather.pyi b/python/pyarrow-stubs/feather.pyi
new file mode 100644
index 00000000000..9451ee15763
--- /dev/null
+++ b/python/pyarrow-stubs/feather.pyi
@@ -0,0 +1,50 @@
+from typing import IO, Literal
+
+import pandas as pd
+
+from _typeshed import StrPath
+from pyarrow._feather import FeatherError
+from pyarrow.lib import Table
+
+__all__ = [
+    "FeatherError",
+    "FeatherDataset",
+    "check_chunked_overflow",
+    "write_feather",
+    "read_feather",
+    "read_table",
+]
+
+class FeatherDataset:
+    path_or_paths: str | list[str]
+    validate_schema: bool
+
+    def __init__(self, path_or_paths: str | list[str], validate_schema: bool = True) -> None: ...
+    def read_table(self, columns: list[str] | None = None) -> Table: ...
+    def validate_schemas(self, piece, table: Table) -> None: ...
+    def read_pandas(
+        self, columns: list[str] | None = None, use_threads: bool = True
+    ) -> pd.DataFrame: ...
+
+def check_chunked_overflow(name: str, col) -> None: ...
+def write_feather(
+    df: pd.DataFrame | Table,
+    dest: StrPath | IO,
+    compression: Literal["zstd", "lz4", "uncompressed"] | None = None,
+    compression_level: int | None = None,
+    chunksize: int | None = None,
+    version: Literal[1, 2] = 2,
+) -> None: ...
+def read_feather(
+    source: StrPath | IO,
+    columns: list[str] | None = None,
+    use_threads: bool = True,
+    memory_map: bool = False,
+    **kwargs,
+) -> pd.DataFrame: ...
+def read_table(
+    source: StrPath | IO,
+    columns: list[str] | None = None,
+    memory_map: bool = False,
+    use_threads: bool = True,
+) -> Table: ...
diff --git a/python/pyarrow-stubs/flight.pyi b/python/pyarrow-stubs/flight.pyi
new file mode 100644
index 00000000000..9b806ccf305
--- /dev/null
+++ b/python/pyarrow-stubs/flight.pyi
@@ -0,0 +1,95 @@
+from pyarrow._flight import (
+    Action,
+    ActionType,
+    BasicAuth,
+    CallInfo,
+    CertKeyPair,
+    ClientAuthHandler,
+    ClientMiddleware,
+    ClientMiddlewareFactory,
+    DescriptorType,
+    FlightCallOptions,
+    FlightCancelledError,
+    FlightClient,
+    FlightDataStream,
+    FlightDescriptor,
+    FlightEndpoint,
+    FlightError,
+    FlightInfo,
+    FlightInternalError,
+    FlightMetadataReader,
+    FlightMetadataWriter,
+    FlightMethod,
+    FlightServerBase,
+    FlightServerError,
+    FlightStreamChunk,
+    FlightStreamReader,
+    FlightStreamWriter,
+    FlightTimedOutError,
+    FlightUnauthenticatedError,
+    FlightUnauthorizedError,
+    FlightUnavailableError,
+    FlightWriteSizeExceededError,
+    GeneratorStream,
+    Location,
+    MetadataRecordBatchReader,
+    MetadataRecordBatchWriter,
+    RecordBatchStream,
+    Result,
+    SchemaResult,
+    ServerAuthHandler,
+    ServerCallContext,
+    ServerMiddleware,
+    ServerMiddlewareFactory,
+    Ticket,
+    TracingServerMiddlewareFactory,
+    connect,
+)
+
+__all__ = [
+    "Action",
+    "ActionType",
+    "BasicAuth",
+    "CallInfo",
+    "CertKeyPair",
+    "ClientAuthHandler",
+    "ClientMiddleware",
+    "ClientMiddlewareFactory",
+    "DescriptorType",
+    "FlightCallOptions",
+    "FlightCancelledError",
+    "FlightClient",
+    "FlightDataStream",
+    "FlightDescriptor",
+    "FlightEndpoint",
+    "FlightError",
+    "FlightInfo",
+    "FlightInternalError",
+    "FlightMetadataReader",
+    "FlightMetadataWriter",
+    "FlightMethod",
+    "FlightServerBase",
+    "FlightServerError",
+    "FlightStreamChunk",
+    "FlightStreamReader",
+    "FlightStreamWriter",
+    "FlightTimedOutError",
+    "FlightUnauthenticatedError",
+    "FlightUnauthorizedError",
+    "FlightUnavailableError",
+    "FlightWriteSizeExceededError",
+    "GeneratorStream",
+    "Location",
+    "MetadataRecordBatchReader",
+    "MetadataRecordBatchWriter",
+    "RecordBatchStream",
+    "Result",
+    "SchemaResult",
+    "ServerAuthHandler",
+    "ServerCallContext",
+    "ServerMiddleware",
+    "ServerMiddlewareFactory",
+    "Ticket",
+    "TracingServerMiddlewareFactory",
+    "connect",
+]
diff --git a/python/pyarrow-stubs/fs.pyi b/python/pyarrow-stubs/fs.pyi
new file mode 100644
index 00000000000..6bf75616c13
--- /dev/null
+++ b/python/pyarrow-stubs/fs.pyi
@@ -0,0 +1,77 @@
+from pyarrow._fs import (  # noqa
+    FileSelector,
+    FileType,
+    FileInfo,
+    FileSystem,
+    LocalFileSystem,
+    SubTreeFileSystem,
+    _MockFileSystem,
+    FileSystemHandler,
+    PyFileSystem,
+    SupportedFileSystem,
+)
+from pyarrow._azurefs import AzureFileSystem
+from pyarrow._hdfs import HadoopFileSystem
+from pyarrow._gcsfs import GcsFileSystem
+from pyarrow._s3fs import (  # noqa
+    AwsDefaultS3RetryStrategy,
+    AwsStandardS3RetryStrategy,
+    S3FileSystem,
+    S3LogLevel,
+    S3RetryStrategy,
+    ensure_s3_initialized,
+    finalize_s3,
+    ensure_s3_finalized,
+    initialize_s3,
+    resolve_s3_region,
+)
+
+FileStats = FileInfo
+
+def copy_files(
+    source: str,
+    destination: str,
+    source_filesystem: SupportedFileSystem | None = None,
+    destination_filesystem: SupportedFileSystem | None = None,
+    *,
+    chunk_size: int = 1024 * 1024,
+    use_threads: bool = True,
+) -> None: ...
+
+class FSSpecHandler(FileSystemHandler):  # type: ignore[misc]
+    fs: SupportedFileSystem
+    def __init__(self, fs: SupportedFileSystem) -> None: ...
+
+__all__ = [
+    # _fs
+    "FileSelector",
+    "FileType",
+    "FileInfo",
+    "FileSystem",
+    "LocalFileSystem",
+    "SubTreeFileSystem",
+    "_MockFileSystem",
+    "FileSystemHandler",
+    "PyFileSystem",
+    # _azurefs
+    "AzureFileSystem",
+    # _hdfs
+    "HadoopFileSystem",
+    # _gcsfs
+    "GcsFileSystem",
+    # _s3fs
+    "AwsDefaultS3RetryStrategy",
+    "AwsStandardS3RetryStrategy",
+    "S3FileSystem",
+    "S3LogLevel",
+    "S3RetryStrategy",
+    "ensure_s3_initialized",
+    "finalize_s3",
+    "ensure_s3_finalized",
+    "initialize_s3",
+    "resolve_s3_region",
+    # fs
+    "FileStats",
+    "copy_files",
+    "FSSpecHandler",
+]
diff --git a/python/pyarrow-stubs/gandiva.pyi b/python/pyarrow-stubs/gandiva.pyi
new file mode 100644
index 00000000000..a344f885b29
--- /dev/null
+++ b/python/pyarrow-stubs/gandiva.pyi
@@ -0,0 +1,65 @@
+from typing import Iterable, Literal
+
+from .lib import Array, DataType, Field, MemoryPool, RecordBatch, Schema, _Weakrefable
+
+class Node(_Weakrefable):
+    def return_type(self) -> DataType: ...
+
+class Expression(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+class Condition(_Weakrefable):
+    def root(self) -> Node: ...
+    def result(self) -> Field: ...
+
+class SelectionVector(_Weakrefable):
+    def to_array(self) -> Array: ...
+
+class Projector(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+    def evaluate(
+        self, batch: RecordBatch, selection: SelectionVector | None = None
+    ) -> list[Array]: ...
+
+class Filter(_Weakrefable):
+    @property
+    def llvm_ir(self): ...
+    def evaluate(
+        self, batch: RecordBatch, pool: MemoryPool, dtype: DataType | str = "int32"
+    ) -> SelectionVector: ...
+
+class TreeExprBuilder(_Weakrefable):
+    def make_literal(self, value: float | str | bytes | bool, dtype: DataType) -> Node: ...
+    def make_expression(self, root_node: Node, return_field: Field) -> Expression: ...
+    def make_function(self, name: str, children: list[Node], return_type: DataType) -> Node: ...
+    def make_field(self, field: Field) -> Node: ...
+    def make_if(
+        self, condition: Node, this_node: Node, else_node: Node, return_type: DataType
+    ) -> Node: ...
+    def make_and(self, children: list[Node]) -> Node: ...
+    def make_or(self, children: list[Node]) -> Node: ...
+    def make_in_expression(self, node: Node, values: Iterable, dtype: DataType) -> Node: ...
+    def make_condition(self, condition: Node) -> Condition: ...
+
+class Configuration(_Weakrefable):
+    def __init__(self, optimize: bool = True, dump_ir: bool = False) -> None: ...
+
+def make_projector(
+    schema: Schema,
+    children: list[Expression],
+    pool: MemoryPool,
+    selection_mode: Literal["NONE", "UINT16", "UINT32", "UINT64"] = "NONE",
+    configuration: Configuration | None = None,
+) -> Projector: ...
+def make_filter(
+    schema: Schema, condition: Condition, configuration: Configuration | None = None
+) -> Filter: ...
+
+class FunctionSignature(_Weakrefable):
+    def return_type(self) -> DataType: ...
+    def param_types(self) -> list[DataType]: ...
+    def name(self) -> str: ...
+
+def get_registered_function_signatures() -> list[FunctionSignature]: ...
diff --git a/python/pyarrow-stubs/json.pyi b/python/pyarrow-stubs/json.pyi
new file mode 100644
index 00000000000..db1d35e0b8b
--- /dev/null
+++ b/python/pyarrow-stubs/json.pyi
@@ -0,0 +1,3 @@
+from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
+
+__all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/pyarrow-stubs/lib.pyi b/python/pyarrow-stubs/lib.pyi
index c0a3cd08386..57e23c3eaea 100644
--- a/python/pyarrow-stubs/lib.pyi
+++ b/python/pyarrow-stubs/lib.pyi
@@ -22,17 +22,16 @@ from typing import NamedTuple, Literal
 from typing_extensions import TypeVar
 
 from .array import *
-# TODO
-# from .benchmark import *
-# from .builder import *
-# from .compat import *
+from ._benchmark import *
+from .builder import *
+from .compat import *
 from .config import *
 from .device import *
 from .error import *
 from .io import *
 from ._ipc import *
 from .memory import *
-# from .pandas_shim import *
+from .pandas_shim import *
 from .scalar import *
 from .table import *
 from .tensor import *
diff --git a/python/pyarrow-stubs/orc.pyi b/python/pyarrow-stubs/orc.pyi
new file mode 100644
index 00000000000..2eba8d40a11
--- /dev/null
+++ b/python/pyarrow-stubs/orc.pyi
@@ -0,0 +1,279 @@
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Literal
+
+from _typeshed import StrPath
+
+from . import _orc
+from ._fs import SupportedFileSystem
+from .lib import KeyValueMetadata, NativeFile, RecordBatch, Schema, Table
+
+class ORCFile:
+    """
+    Reader interface for a single ORC file
+
+    Parameters
+    ----------
+    source : str or pyarrow.NativeFile
+        Readable source. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
+    """
+
+    reader: _orc.ORCReader
+    def __init__(self, source: StrPath | NativeFile | IO) -> None: ...
+    @property
+    def metadata(self) -> KeyValueMetadata:
+        """The file metadata, as an arrow KeyValueMetadata"""
+    @property
+    def schema(self) -> Schema:
+        """The file schema, as an arrow schema"""
+    @property
+    def nrows(self) -> int:
+        """The number of rows in the file"""
+    @property
+    def nstripes(self) -> int:
+        """The number of stripes in the file"""
+    @property
+    def file_version(self) -> str:
+        """Format version of the ORC file, must be 0.11 or 0.12"""
+    @property
+    def software_version(self) -> str:
+        """Software instance and version that wrote this file"""
+    @property
+    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]:
+        """Compression codec of the file"""
+    @property
+    def compression_size(self) -> int:
+        """Number of bytes to buffer for the compression codec in the file"""
+    @property
+    def writer(self) -> str:
+        """Name of the writer that wrote this file.
+        If the writer is unknown then its Writer ID
+        (a number) is returned"""
+    @property
+    def writer_version(self) -> str:
+        """Version of the writer"""
+    @property
+    def row_index_stride(self) -> int:
+        """Number of rows per an entry in the row index or 0
+        if there is no row index"""
+    @property
+    def nstripe_statistics(self) -> int:
+        """Number of stripe statistics"""
+    @property
+    def content_length(self) -> int:
+        """Length of the data stripes in the file in bytes"""
+    @property
+    def stripe_statistics_length(self) -> int:
+        """The number of compressed bytes in the file stripe statistics"""
+    @property
+    def file_footer_length(self) -> int:
+        """The number of compressed bytes in the file footer"""
+    @property
+    def file_postscript_length(self) -> int:
+        """The number of bytes in the file postscript"""
+    @property
+    def file_length(self) -> int:
+        """The number of bytes in the file"""
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch:
+        """Read a single stripe from the file.
+
+        Parameters
+        ----------
+        n : int
+            The stripe index
+        columns : list
+            If not None, only these columns will be read from the stripe. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+            Content of the stripe as a RecordBatch.
+        """
+    def read(self, columns: list[str] | None = None) -> Table:
+        """Read the whole file.
+
+        Parameters
+        ----------
+        columns : list
+            If not None, only these columns will be read from the file. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'. Output always follows the
+            ordering of the file and not the `columns` list.
+
+        Returns
+        -------
+        pyarrow.Table
+            Content of the file as a Table.
+        """
+
+class ORCWriter:
+    """
+    Writer interface for a single ORC file
+
+    Parameters
+    ----------
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    file_version : {"0.11", "0.12"}, default "0.12"
+        Determine which ORC file version to use.
+        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
+        is the older version
+        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
+        is the newer one.
+    batch_size : int, default 1024
+        Number of rows the ORC writer writes at a time.
+    stripe_size : int, default 64 * 1024 * 1024
+        Size of each ORC stripe in bytes.
+    compression : string, default 'uncompressed'
+        The compression codec.
+        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
+        Note that LZ0 is currently not supported.
+    compression_block_size : int, default 64 * 1024
+        Size of each compression block in bytes.
+    compression_strategy : string, default 'speed'
+        The compression strategy i.e. speed vs size reduction.
+        Valid values: {'SPEED', 'COMPRESSION'}
+    row_index_stride : int, default 10000
+        The row index stride i.e. the number of rows per
+        an entry in the row index.
+    padding_tolerance : double, default 0.0
+        The padding tolerance.
+    dictionary_key_size_threshold : double, default 0.0
+        The dictionary key size threshold. 0 to disable dictionary encoding.
+        1 to always enable dictionary encoding.
+    bloom_filter_columns : None, set-like or list-like, default None
+        Columns that use the bloom filter.
+    bloom_filter_fpp : double, default 0.05
+        Upper limit of the false-positive rate of the bloom filter.
+    """
+
+    writer: _orc.ORCWriter
+    is_open: bool
+    def __init__(
+        self,
+        where: StrPath | NativeFile | IO,
+        *,
+        file_version: str = "0.12",
+        batch_size: int = 1024,
+        stripe_size: int = 64 * 1024 * 1024,
+        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+        compression_block_size: int = 65536,
+        compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
+        row_index_stride: int = 10000,
+        padding_tolerance: float = 0.0,
+        dictionary_key_size_threshold: float = 0.0,
+        bloom_filter_columns: list[int] | None = None,
+        bloom_filter_fpp: float = 0.05,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    def write(self, table: Table) -> None:
+        """
+        Write the table into an ORC file. The schema of the table must
+        be equal to the schema used when opening the ORC file.
+
+        Parameters
+        ----------
+        table : pyarrow.Table
+            The table to be written into the ORC file
+        """
+    def close(self) -> None:
+        """
+        Close the ORC file
+        """
+
+def read_table(
+    source: StrPath | NativeFile | IO,
+    columns: list[str] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> Table:
+    """
+    Read a Table from an ORC file.
+
+    Parameters
+    ----------
+    source : str, pyarrow.NativeFile, or file-like object
+        If a string passed, can be a single file name. For file-like objects,
+        only read a single file. Use pyarrow.BufferReader to read a file
+        contained in a bytes or buffer-like object.
+    columns : list
+        If not None, only these columns will be read from the file. A column
+        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+        'a.c', and 'a.d.e'. Output always follows the ordering of the file and
+        not the `columns` list. If empty, no columns will be read. Note
+        that the table will still have the correct num_rows set despite having
+        no columns.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    """
+
+def write_table(
+    table: Table,
+    where: StrPath | NativeFile | IO,
+    *,
+    file_version: str = "0.12",
+    batch_size: int = 1024,
+    stripe_size: int = 64 * 1024 * 1024,
+    compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+    compression_block_size: int = 65536,
+    compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
+    row_index_stride: int = 10000,
+    padding_tolerance: float = 0.0,
+    dictionary_key_size_threshold: float = 0.0,
+    bloom_filter_columns: list[int] | None = None,
+    bloom_filter_fpp: float = 0.05,
+) -> None:
+    """
+    Write a table into an ORC file.
+
+    Parameters
+    ----------
+    table : pyarrow.lib.Table
+        The table to be written into the ORC file
+    where : str or pyarrow.io.NativeFile
+        Writable target. For passing Python file objects or byte buffers,
+        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
+        or pyarrow.io.FixedSizeBufferWriter.
+    file_version : {"0.11", "0.12"}, default "0.12"
+        Determine which ORC file version to use.
+        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
+        is the older version
+        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
+        is the newer one.
+    batch_size : int, default 1024
+        Number of rows the ORC writer writes at a time.
+    stripe_size : int, default 64 * 1024 * 1024
+        Size of each ORC stripe in bytes.
+    compression : string, default 'uncompressed'
+        The compression codec.
+        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
+        Note that LZ0 is currently not supported.
+    compression_block_size : int, default 64 * 1024
+        Size of each compression block in bytes.
+    compression_strategy : string, default 'speed'
+        The compression strategy i.e. speed vs size reduction.
+        Valid values: {'SPEED', 'COMPRESSION'}
+    row_index_stride : int, default 10000
+        The row index stride i.e. the number of rows per
+        an entry in the row index.
+    padding_tolerance : double, default 0.0
+        The padding tolerance.
+    dictionary_key_size_threshold : double, default 0.0
+        The dictionary key size threshold. 0 to disable dictionary encoding.
+        1 to always enable dictionary encoding.
+    bloom_filter_columns : None, set-like or list-like, default None
+        Columns that use the bloom filter.
+    bloom_filter_fpp : double, default 0.05
+        Upper limit of the false-positive rate of the bloom filter.
+    """
diff --git a/python/pyarrow-stubs/pandas_compat.pyi b/python/pyarrow-stubs/pandas_compat.pyi
new file mode 100644
index 00000000000..efbd05ac2fe
--- /dev/null
+++ b/python/pyarrow-stubs/pandas_compat.pyi
@@ -0,0 +1,54 @@
+from typing import Any, TypedDict, TypeVar
+
+import numpy as np
+import pandas as pd
+
+from pandas import DatetimeTZDtype
+
+from .lib import Array, DataType, Schema, Table
+
+_T = TypeVar("_T")
+
+def get_logical_type_map() -> dict[int, str]: ...
+def get_logical_type(arrow_type: DataType) -> str: ...
+def get_numpy_logical_type_map() -> dict[type[np.generic], str]: ...
+def get_logical_type_from_numpy(pandas_collection) -> str: ...
+def get_extension_dtype_info(column) -> tuple[str, dict[str, Any]]: ...
+
+class _ColumnMetadata(TypedDict):
+    name: str
+    field_name: str
+    pandas_type: int
+    numpy_type: str
+    metadata: dict | None
+
+def get_column_metadata(
+    column: pd.Series | pd.Index, name: str, arrow_type: DataType, field_name: str
+) -> _ColumnMetadata: ...
+def construct_metadata(
+    columns_to_convert: list[pd.Series],
+    df: pd.DataFrame,
+    column_names: list[str],
+    index_levels: list[pd.Index],
+    index_descriptors: list[dict],
+    preserve_index: bool,
+    types: list[DataType],
+    column_field_names: list[str] = ...,
+) -> dict[bytes, bytes]: ...
+def dataframe_to_types(
+    df: pd.DataFrame, preserve_index: bool | None, columns: list[str] | None = None
+) -> tuple[list[str], list[DataType], dict[bytes, bytes]]: ...
+def dataframe_to_arrays(
+    df: pd.DataFrame,
+    schema: Schema,
+    preserve_index: bool | None,
+    nthreads: int = 1,
+    columns: list[str] | None = None,
+    safe: bool = True,
+) -> tuple[Array, Schema, int]: ...
+def get_datetimetz_type(values: _T, dtype, type_) -> tuple[_T, DataType]: ...
+def make_datetimetz(unit: str, tz: str) -> DatetimeTZDtype: ...
+def table_to_dataframe(
+    options, table: Table, categories=None, ignore_metadata: bool = False, types_mapper=None
+) -> pd.DataFrame: ...
+def make_tz_aware(series: pd.Series, tz: str) -> pd.Series: ...
diff --git a/python/pyarrow-stubs/pandas_shim.pyi b/python/pyarrow-stubs/pandas_shim.pyi
new file mode 100644
index 00000000000..0e80fae4ebf
--- /dev/null
+++ b/python/pyarrow-stubs/pandas_shim.pyi
@@ -0,0 +1,51 @@
+from types import ModuleType
+from typing import Any, Iterable, TypeGuard
+
+import pandas as pd
+
+from numpy import dtype
+from pandas.core.dtypes.base import ExtensionDtype
+
+class _PandasAPIShim:
+    has_sparse: bool
+
+    def series(self, *args, **kwargs) -> pd.Series: ...
+    def data_frame(self, *args, **kwargs) -> pd.DataFrame: ...
+    @property
+    def have_pandas(self) -> bool: ...
+    @property
+    def compat(self) -> ModuleType: ...
+    @property
+    def pd(self) -> ModuleType: ...
+    def infer_dtype(self, obj: Iterable) -> str: ...
+    def pandas_dtype(self, dtype: str) -> dtype: ...
+    @property
+    def loose_version(self) -> Any: ...
+    @property
+    def version(self) -> str: ...
+    def is_v1(self) -> bool: ...
+    def is_ge_v21(self) -> bool: ...
+    def is_ge_v23(self) -> bool: ...
+    def is_ge_v3(self) -> bool: ...
+    @property
+    def categorical_type(self) -> type[pd.Categorical]: ...
+    @property
+    def datetimetz_type(self) -> type[pd.DatetimeTZDtype]: ...
+    @property
+    def extension_dtype(self) -> type[ExtensionDtype]: ...
+    def is_array_like(
+        self, obj: Any
+    ) -> TypeGuard[pd.Series | pd.Index | pd.Categorical | ExtensionDtype]: ...
+    def is_categorical(self, obj: Any) -> TypeGuard[pd.Categorical]: ...
+    def is_datetimetz(self, obj: Any) -> TypeGuard[pd.DatetimeTZDtype]: ...
+    def is_extension_array_dtype(self, obj: Any) -> TypeGuard[ExtensionDtype]: ...
+    def is_sparse(self, obj: Any) -> bool: ...
+    def is_data_frame(self, obj: Any) -> TypeGuard[pd.DataFrame]: ...
+    def is_series(self, obj: Any) -> TypeGuard[pd.Series]: ...
+    def is_index(self, obj: Any) -> TypeGuard[pd.Index]: ...
+    def get_values(self, obj: Any) -> bool: ...
+    def get_rangeindex_attribute(self, level, name): ...
+
+_pandas_api: _PandasAPIShim
+
+__all__ = ["_PandasAPIShim", "_pandas_api"]
diff --git a/python/pyarrow-stubs/parquet/__init__.pyi b/python/pyarrow-stubs/parquet/__init__.pyi
new file mode 100644
index 00000000000..151ee188f84
--- /dev/null
+++ b/python/pyarrow-stubs/parquet/__init__.pyi
@@ -0,0 +1 @@
+from .core import *  # noqa
diff --git a/python/pyarrow-stubs/parquet/core.pyi b/python/pyarrow-stubs/parquet/core.pyi
new file mode 100644
index 00000000000..56b2c8447d9
--- /dev/null
+++ b/python/pyarrow-stubs/parquet/core.pyi
@@ -0,0 +1,2061 @@
+import sys
+
+from pathlib import Path
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+from typing import IO, Callable, Iterator, Literal, Sequence
+
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from pyarrow import _parquet
+from pyarrow._compute import Expression
+from pyarrow._fs import FileSystem, SupportedFileSystem
+from pyarrow._parquet import (
+    ColumnChunkMetaData,
+    ColumnSchema,
+    FileDecryptionProperties,
+    FileEncryptionProperties,
+    FileMetaData,
+    ParquetLogicalType,
+    ParquetReader,
+    ParquetSchema,
+    RowGroupMetaData,
+    SortingColumn,
+    Statistics,
+)
+from pyarrow._stubs_typing import FilterTuple, SingleOrList
+from pyarrow.dataset import ParquetFileFragment, Partitioning
+from pyarrow.lib import NativeFile, RecordBatch, Schema, Table
+from typing_extensions import deprecated
+
+__all__ = (
+    "ColumnChunkMetaData",
+    "ColumnSchema",
+    "FileDecryptionProperties",
+    "FileEncryptionProperties",
+    "FileMetaData",
+    "ParquetDataset",
+    "ParquetFile",
+    "ParquetLogicalType",
+    "ParquetReader",
+    "ParquetSchema",
+    "ParquetWriter",
+    "RowGroupMetaData",
+    "SortingColumn",
+    "Statistics",
+    "read_metadata",
+    "read_pandas",
+    "read_schema",
+    "read_table",
+    "write_metadata",
+    "write_table",
+    "write_to_dataset",
+    "_filters_to_expression",
+    "filters_to_expression",
+)
+
+def filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression:
+    """
+    Check if filters are well-formed and convert to an ``Expression``.
+
+    Parameters
+    ----------
+    filters : List[Tuple] or List[List[Tuple]]
+
+    Notes
+    -----
+    See internal ``pyarrow._DNF_filter_doc`` attribute for more details.
+
+    Examples
+    --------
+
+    >>> filters_to_expression([("foo", "==", "bar")])
+    <pyarrow.compute.Expression (foo == "bar")>
+
+    Returns
+    -------
+    pyarrow.compute.Expression
+        An Expression representing the filters
+    """
+
+@deprecated("use filters_to_expression")
+def _filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
+_Compression: TypeAlias = Literal["gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"]
+
+class ParquetFile:
+    """
+    Reader interface for a single Parquet file.
+
+    Parameters
+    ----------
+    source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
+        Readable source. For passing bytes or buffer-like file containing a
+        Parquet file, use pyarrow.BufferReader.
+    metadata : FileMetaData, default None
+        Use existing metadata object, rather than reading from file.
+    common_metadata : FileMetaData, default None
+        Will be used in reads for pandas schema metadata if not found in the
+        main file's metadata, no other uses at the moment.
+    read_dictionary : list
+        List of column names to read directly as DictionaryArray.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    pre_buffer : bool, default False
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    decryption_properties : FileDecryptionProperties, default None
+        File decryption properties for Parquet Modular Encryption.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    page_checksum_verification : bool, default False
+        If True, verify the checksum for each page read from the file.
+
+    Examples
+    --------
+
+    Generate an example PyArrow Table and write it to Parquet file:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_table(table, "example.parquet")
+
+    Create a ``ParquetFile`` object from the Parquet file:
+
+    >>> parquet_file = pq.ParquetFile("example.parquet")
+
+    Read the data:
+
+    >>> parquet_file.read()
+    pyarrow.Table
+    n_legs: int64
+    animal: string
+    ----
+    n_legs: [[2,2,4,4,5,100]]
+    animal: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
+
+    Create a ParquetFile object with "animal" column as DictionaryArray:
+
+    >>> parquet_file = pq.ParquetFile("example.parquet", read_dictionary=["animal"])
+    >>> parquet_file.read()
+    pyarrow.Table
+    n_legs: int64
+    animal: dictionary<values=string, indices=int32, ordered=0>
+    ----
+    n_legs: [[2,2,4,4,5,100]]
+    animal: [  -- dictionary:
+    ["Flamingo","Parrot",...,"Brittle stars","Centipede"]  -- indices:
+    [0,1,2,3,4,5]]
+    """
+
+    reader: ParquetReader
+    common_metadata: FileMetaData
+
+    def __init__(
+        self,
+        source: str | Path | NativeFile | IO,
+        *,
+        metadata: FileMetaData | None = None,
+        common_metadata: FileMetaData | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        pre_buffer: bool = False,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        filesystem: SupportedFileSystem | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> None: ...
+    @property
+    def metadata(self) -> FileMetaData:
+        """
+        Return the Parquet metadata.
+        """
+    @property
+    def schema(self) -> ParquetSchema:
+        """
+        Return the Parquet schema, unconverted to Arrow types
+        """
+    @property
+    def schema_arrow(self) -> Schema:
+        """
+        Return the inferred Arrow schema, converted from the whole Parquet
+        file's schema
+
+        Examples
+        --------
+        Generate an example Parquet file:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        Read the Arrow schema:
+
+        >>> parquet_file.schema_arrow
+        n_legs: int64
+        animal: string
+        """
+    @property
+    def num_row_groups(self) -> int:
+        """
+        Return the number of row groups of the Parquet file.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.num_row_groups
+        1
+        """
+    def close(self, force: bool = False) -> None: ...
+    @property
+    def closed(self) -> bool: ...
+    def read_row_group(
+        self,
+        i: int,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read a single row group from a Parquet file.
+
+        Parameters
+        ----------
+        i : int
+            Index of the individual row group that we want to read.
+        columns : list
+            If not None, only these columns will be read from the row group. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.table.Table
+            Content of the row group as a table (of columns)
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.read_row_group(0)
+        pyarrow.Table
+        n_legs: int64
+        animal: string
+        ----
+        n_legs: [[2,2,4,4,5,100]]
+        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
+        """
+    def read_row_groups(
+        self,
+        row_groups: list,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read a multiple row groups from a Parquet file.
+
+        Parameters
+        ----------
+        row_groups : list
+            Only these row groups will be read from the file.
+        columns : list
+            If not None, only these columns will be read from the row group. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.table.Table
+            Content of the row groups as a table (of columns).
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.read_row_groups([0, 0])
+        pyarrow.Table
+        n_legs: int64
+        animal: string
+        ----
+        n_legs: [[2,2,4,4,5,...,2,4,4,5,100]]
+        animal: [["Flamingo","Parrot","Dog",...,"Brittle stars","Centipede"]]
+        """
+    def iter_batches(
+        self,
+        batch_size: int = 65536,
+        row_groups: list | None = None,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Iterator[RecordBatch]:
+        """
+        Read streaming batches from a Parquet file.
+
+        Parameters
+        ----------
+        batch_size : int, default 64K
+            Maximum number of records to yield per batch. Batches may be
+            smaller if there aren't enough rows in the file.
+        row_groups : list
+            Only these row groups will be read from the file.
+        columns : list
+            If not None, only these columns will be read from the file. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : boolean, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : boolean, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Yields
+        ------
+        pyarrow.RecordBatch
+            Contents of each batch as a record batch
+
+        Examples
+        --------
+        Generate an example Parquet file:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+        >>> for i in parquet_file.iter_batches():
+        ...     print("RecordBatch")
+        ...     print(i.to_pandas())
+        RecordBatch
+           n_legs         animal
+        0       2       Flamingo
+        1       2         Parrot
+        2       4            Dog
+        3       4          Horse
+        4       5  Brittle stars
+        5     100      Centipede
+        """
+    def read(
+        self,
+        columns: list | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read a Table from Parquet format.
+
+        Parameters
+        ----------
+        columns : list
+            If not None, only these columns will be read from the file. A
+            column name may be a prefix of a nested field, e.g. 'a' will select
+            'a.b', 'a.c', and 'a.d.e'.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.table.Table
+            Content of the file as a table (of columns).
+
+        Examples
+        --------
+        Generate an example Parquet file:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        Read a Table:
+
+        >>> parquet_file.read(columns=["animal"])
+        pyarrow.Table
+        animal: string
+        ----
+        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
+        """
+    def scan_contents(self, columns: list | None = None, batch_size: int = 65536) -> int:
+        """
+        Read contents of file for the given columns and batch size.
+
+        Notes
+        -----
+        This function's primary purpose is benchmarking.
+        The scan is executed on a single thread.
+
+        Parameters
+        ----------
+        columns : list of integers, default None
+            Select columns to read, if None scan all columns.
+        batch_size : int, default 64K
+            Number of rows to read at a time internally.
+
+        Returns
+        -------
+        num_rows : int
+            Number of rows in file
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "example.parquet")
+        >>> parquet_file = pq.ParquetFile("example.parquet")
+
+        >>> parquet_file.scan_contents()
+        6
+        """
+
+class ParquetWriter:
+    """
+    Class for incrementally building a Parquet file for Arrow tables.
+
+    Parameters
+    ----------
+    where : path or file-like object
+    schema : pyarrow.Schema
+    version : {"1.0", "2.4", "2.6"}, default "2.6"
+        Determine which Parquet logical types are available for use, whether the
+        reduced set from the Parquet 1.x.x format or the expanded logical types
+        added in later format versions.
+        Files written with version='2.4' or '2.6' may not be readable in all
+        Parquet implementations, so version='1.0' is likely the choice that
+        maximizes file compatibility.
+        UINT32 and some logical types are only available with version '2.4'.
+        Nanosecond timestamps are only available with version '2.6'.
+        Other features such as compression algorithms or the new serialized
+        data page format must be enabled separately (see 'compression' and
+        'data_page_version').
+    use_dictionary : bool or list, default True
+        Specify if we should use dictionary encoding in general or only for
+        some columns.
+        When encoding the column, if the dictionary size is too large, the
+        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
+        doesn't support dictionary encoding.
+    compression : str or dict, default 'snappy'
+        Specify the compression codec, either on a general basis or per-column.
+        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
+    write_statistics : bool or list, default True
+        Specify if we should write statistics in general (default is True) or only
+        for some columns.
+    use_deprecated_int96_timestamps : bool, default None
+        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
+        by flavor argument. This take priority over the coerce_timestamps option.
+    coerce_timestamps : str, default None
+        Cast timestamps to a particular resolution. If omitted, defaults are chosen
+        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
+        nanoseconds are cast to microseconds ('us'), while for
+        ``version='2.6'`` (the default), they are written natively without loss
+        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
+        as Parquet does not have any temporal type with seconds resolution.
+        If the casting results in loss of data, it will raise an exception
+        unless ``allow_truncated_timestamps=True`` is given.
+        Valid values: {None, 'ms', 'us'}
+    allow_truncated_timestamps : bool, default False
+        Allow loss of data when coercing timestamps to a particular
+        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
+        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
+        will NOT result in the truncation exception being ignored unless
+        ``coerce_timestamps`` is not None.
+    data_page_size : int, default None
+        Set a target threshold for the approximate encoded size of data
+        pages within a column chunk (in bytes). If None, use the default data page
+        size of 1MByte.
+    flavor : {'spark'}, default None
+        Sanitize schema or set other compatibility options to work with
+        various target systems.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred from `where` if path-like, else
+        `where` is already a file-like object so no filesystem is needed.
+    compression_level : int or dict, default None
+        Specify the compression level for a codec, either on a general basis or
+        per-column. If None is passed, arrow selects the compression level for
+        the compression codec in use. The compression level has a different
+        meaning for each codec, so you have to read the documentation of the
+        codec you are using.
+        An exception is thrown if the compression codec does not allow specifying
+        a compression level.
+    use_byte_stream_split : bool or list, default False
+        Specify if the byte_stream_split encoding should be used in general or
+        only for some columns. If both dictionary and byte_stream_stream are
+        enabled, then dictionary is preferred.
+        The byte_stream_split encoding is valid for integer, floating-point
+        and fixed-size binary data types (including decimals); it should be
+        combined with a compression codec so as to achieve size reduction.
+    column_encoding : string or dict, default None
+        Specify the encoding scheme on a per column basis.
+        Can only be used when ``use_dictionary`` is set to False, and
+        cannot be used in combination with ``use_byte_stream_split``.
+        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
+        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
+        Certain encodings are only compatible with certain data types.
+        Please refer to the encodings section of `Reading and writing Parquet
+        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
+    data_page_version : {"1.0", "2.0"}, default "1.0"
+        The serialized Parquet data page format version to write, defaults to
+        1.0. This does not impact the file schema logical types and Arrow to
+        Parquet type casting behavior; for that use the "version" option.
+    use_compliant_nested_type : bool, default True
+        Whether to write compliant Parquet nested type (lists) as defined
+        `here <https://github.com/apache/parquet-format/blob/master/
+        LogicalTypes.md#nested-types>`_, defaults to ``True``.
+        For ``use_compliant_nested_type=True``, this will write into a list
+        with 3-level structure where the middle level, named ``list``,
+        is a repeated group with a single field named ``element``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                        <element-repetition> <element-type> element;
+                }
+            }
+
+        For ``use_compliant_nested_type=False``, this will also write into a list
+        with 3-level structure, where the name of the single field of the middle
+        level ``list`` is taken from the element name for nested columns in Arrow,
+        which defaults to ``item``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                    <element-repetition> <element-type> item;
+                }
+            }
+    encryption_properties : FileEncryptionProperties, default None
+        File encryption properties for Parquet Modular Encryption.
+        If None, no encryption will be done.
+        The encryption properties can be created using:
+        ``CryptoFactory.file_encryption_properties()``.
+    write_batch_size : int, default None
+        Number of values to write to a page at a time. If None, use the default of
+        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
+        are exceeding the ``data_page_size`` due to large column values, lowering
+        the batch size can help keep page sizes closer to the intended size.
+    dictionary_pagesize_limit : int, default None
+        Specify the dictionary page size limit per row group. If None, use the
+        default 1MB.
+    store_schema : bool, default True
+        By default, the Arrow schema is serialized and stored in the Parquet
+        file metadata (in the "ARROW:schema" key). When reading the file,
+        if this key is available, it will be used to more faithfully recreate
+        the original Arrow data. For example, for tz-aware timestamp columns
+        it will restore the timezone (Parquet only stores the UTC values without
+        timezone), or columns with duration type will be restored from the int64
+        Parquet column.
+    write_page_index : bool, default False
+        Whether to write a page index in general for all columns.
+        Writing statistics to the page index disables the old method of writing
+        statistics to each data page header. The page index makes statistics-based
+        filtering more efficient than the page header, as it gathers all the
+        statistics for a Parquet file in a single place, avoiding scattered I/O.
+        Note that the page index is not yet used on the read size by PyArrow.
+    write_page_checksum : bool, default False
+        Whether to write page checksums in general for all columns.
+        Page checksums enable detection of data corruption, which might occur during
+        transmission or in the storage.
+    sorting_columns : Sequence of SortingColumn, default None
+        Specify the sort order of the data being written. The writer does not sort
+        the data nor does it verify that the data is sorted. The sort order is
+        written to the row group metadata, which can then be used by readers.
+    store_decimal_as_integer : bool, default False
+        Allow decimals with 1 <= precision <= 18 to be stored as integers.
+        In Parquet, DECIMAL can be stored in any of the following physical types:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: precision is limited by the array size.
+            Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
+        - binary: precision is unlimited. The minimum number of bytes to store the
+            unscaled value is used.
+
+        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
+        When enabled, the writer will use the following physical types to store decimals:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: for precision > 18.
+
+        As a consequence, decimal columns stored in integer types are more compact.
+    writer_engine_version : unused
+    **options : dict
+        If options contains a key `metadata_collector` then the
+        corresponding value is assumed to be a list (or any object with
+        `.append` method) that will be filled with the file metadata instance
+        of the written file.
+
+    Examples
+    --------
+    Generate an example PyArrow Table and RecordBatch:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> batch = pa.record_batch(
+    ...     [
+    ...         [2, 2, 4, 4, 5, 100],
+    ...         ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     ],
+    ...     names=["n_legs", "animal"],
+    ... )
+
+    create a ParquetWriter object:
+
+    >>> import pyarrow.parquet as pq
+    >>> writer = pq.ParquetWriter("example.parquet", table.schema)
+
+    and write the Table into the Parquet file:
+
+    >>> writer.write_table(table)
+    >>> writer.close()
+
+    >>> pq.read_table("example.parquet").to_pandas()
+       n_legs         animal
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+
+    create a ParquetWriter object for the RecordBatch:
+
+    >>> writer2 = pq.ParquetWriter("example2.parquet", batch.schema)
+
+    and write the RecordBatch into the Parquet file:
+
+    >>> writer2.write_batch(batch)
+    >>> writer2.close()
+
+    >>> pq.read_table("example2.parquet").to_pandas()
+       n_legs         animal
+    0       2       Flamingo
+    1       2         Parrot
+    2       4            Dog
+    3       4          Horse
+    4       5  Brittle stars
+    5     100      Centipede
+    """
+
+    flavor: str
+    schema_changed: bool
+    schema: ParquetSchema
+    where: str | Path | IO
+    file_handler: NativeFile | None
+    writer: _parquet.ParquetWriter
+    is_open: bool
+
+    def __init__(
+        self,
+        where: str | Path | IO | NativeFile,
+        schema: Schema,
+        filesystem: SupportedFileSystem | None = None,
+        flavor: str | None = None,
+        version: Literal["1.0", "2.4", "2.6"] = ...,
+        use_dictionary: bool = True,
+        compression: _Compression | dict[str, _Compression] = "snappy",
+        write_statistics: bool | list = True,
+        use_deprecated_int96_timestamps: bool | None = None,
+        compression_level: int | dict | None = None,
+        use_byte_stream_split: bool | list = False,
+        column_encoding: str | dict | None = None,
+        writer_engine_version=None,
+        data_page_version: Literal["1.0", "2.0"] = ...,
+        use_compliant_nested_type: bool = True,
+        encryption_properties: FileEncryptionProperties | None = None,
+        write_batch_size: int | None = None,
+        dictionary_pagesize_limit: int | None = None,
+        store_schema: bool = True,
+        write_page_index: bool = False,
+        write_page_checksum: bool = False,
+        sorting_columns: Sequence[SortingColumn] | None = None,
+        store_decimal_as_integer: bool = False,
+        **options,
+    ) -> None: ...
+    def __enter__(self) -> Self: ...
+    def __exit__(self, *args, **kwargs) -> Literal[False]: ...
+    def write(
+        self, table_or_batch: RecordBatch | Table, row_group_size: int | None = None
+    ) -> None:
+        """
+        Write RecordBatch or Table to the Parquet file.
+
+        Parameters
+        ----------
+        table_or_batch : {RecordBatch, Table}
+        row_group_size : int, default None
+            Maximum number of rows in each written row group. If None,
+            the row group size will be the minimum of the input
+            table or batch length and 1024 * 1024.
+        """
+    def write_batch(self, batch: RecordBatch, row_group_size: int | None = None) -> None:
+        """
+        Write RecordBatch to the Parquet file.
+
+        Parameters
+        ----------
+        batch : RecordBatch
+        row_group_size : int, default None
+            Maximum number of rows in written row group. If None, the
+            row group size will be the minimum of the RecordBatch
+            size and 1024 * 1024.  If set larger than 64Mi then 64Mi
+            will be used instead.
+        """
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None:
+        """
+        Write Table to the Parquet file.
+
+        Parameters
+        ----------
+        table : Table
+        row_group_size : int, default None
+            Maximum number of rows in each written row group. If None,
+            the row group size will be the minimum of the Table size
+            and 1024 * 1024.  If set larger than 64Mi then 64Mi will
+            be used instead.
+
+        """
+    def close(self) -> None:
+        """
+        Close the connection to the Parquet file.
+        """
+    def add_key_value_metadata(self, key_value_metadata: dict[str, str]) -> None:
+        """
+        Add key-value metadata to the file.
+        This will overwrite any existing metadata with the same key.
+
+        Parameters
+        ----------
+        key_value_metadata : dict
+            Keys and values must be string-like / coercible to bytes.
+        """
+
+class ParquetDataset:
+    """
+    Encapsulates details of reading a complete Parquet dataset possibly
+    consisting of multiple files and partitions in subdirectories.
+
+    Parameters
+    ----------
+    path_or_paths : str or List[str]
+        A directory name, single file name, or list of file names.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    schema : pyarrow.parquet.Schema
+        Optionally provide the Schema for the Dataset, in which case it will
+        not be inferred from the source.
+    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
+        Rows which do not match the filter predicate will be removed from scanned
+        data. Partition keys embedded in a nested directory structure will be
+        exploited to avoid loading files at all if they contain no matching rows.
+        Within-file level filtering and different partitioning schemes are supported.
+
+        Predicates are expressed using an ``Expression`` or using
+        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
+        DNF allows arbitrary boolean logical combinations of single column predicates.
+        The innermost tuples each describe a single column predicate. The list of inner
+        predicates is interpreted as a conjunction (AND), forming a more selective and
+        multiple column predicate. Finally, the most outer list combines these filters
+        as a disjunction (OR).
+
+        Predicates may also be passed as List[Tuple]. This form is interpreted
+        as a single conjunction. To express OR in predicates, one must
+        use the (preferred) List[List[Tuple]] notation.
+
+        Each tuple has format: (``key``, ``op``, ``value``) and compares the
+        ``key`` with the ``value``.
+        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
+        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
+        ``value`` must be a collection such as a ``list``, a ``set`` or a
+        ``tuple``.
+
+        Examples:
+
+        Using the ``Expression`` API:
+
+        .. code-block:: python
+
+            import pyarrow.compute as pc
+            pc.field('x') = 0
+            pc.field('y').isin(['a', 'b', 'c'])
+            ~pc.field('y').isin({'a', 'b'})
+
+        Using the DNF format:
+
+        .. code-block:: python
+
+            ("x", "=", 0)
+            ("y", "in", ["a", "b", "c"])
+            ("z", "not in", {"a", "b"})
+
+
+    read_dictionary : list, default None
+        List of names or column paths (for nested types) to read directly
+        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+        a flat column as dictionary-encoded pass the column name. For
+        nested types, you must pass the full column "path", which could be
+        something like level1.level2.list.item. Refer to the Parquet
+        file's schema to obtain the paths.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
+        The partitioning scheme for a partitioned dataset. The default of "hive"
+        assumes directory names with key=value pairs like "/year=2009/month=11".
+        In addition, a scheme like "/2009/11" is also supported, in which case
+        you need to specify the field names or a full schema. See the
+        ``pyarrow.dataset.partitioning()`` function for more details.
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process.
+        This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+    pre_buffer : bool, default True
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3, GCS). If True, Arrow will use a
+        background I/O thread pool. If using a filesystem layer that itself
+        performs readahead (e.g. fsspec's S3FS), disable readahead for best
+        results. Set to False if you want to prioritize minimal memory usage
+        over maximum speed.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular resolution
+        (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
+        timestamps will be inferred as timestamps in nanoseconds.
+    decryption_properties : FileDecryptionProperties or None
+        File-level decryption properties.
+        The decryption properties can be created using
+        ``CryptoFactory.file_decryption_properties()``.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    page_checksum_verification : bool, default False
+        If True, verify the page checksum for each page read from the file.
+
+    Examples
+    --------
+    Generate an example PyArrow Table and write it to a partitioned dataset:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, root_path="dataset_v2", partition_cols=["year"])
+
+    create a ParquetDataset object from the dataset source:
+
+    >>> dataset = pq.ParquetDataset("dataset_v2/")
+
+    and read the data:
+
+    >>> dataset.read().to_pandas()
+       n_legs         animal  year
+    0       5  Brittle stars  2019
+    1       2       Flamingo  2020
+    2       4            Dog  2021
+    3     100      Centipede  2021
+    4       2         Parrot  2022
+    5       4          Horse  2022
+
+    create a ParquetDataset object with filter:
+
+    >>> dataset = pq.ParquetDataset("dataset_v2/", filters=[("n_legs", "=", 4)])
+    >>> dataset.read().to_pandas()
+       n_legs animal  year
+    0       4    Dog  2021
+    1       4  Horse  2022
+    """
+    def __init__(
+        self,
+        path_or_paths: SingleOrList[str]
+        | SingleOrList[Path]
+        | SingleOrList[NativeFile]
+        | SingleOrList[IO],
+        filesystem: SupportedFileSystem | None = None,
+        schema: Schema | None = None,
+        *,
+        filters: Expression | FilterTuple | list[FilterTuple] | None = None,
+        read_dictionary: list[str] | None = None,
+        memory_map: bool = False,
+        buffer_size: int = 0,
+        partitioning: str | list[str] | Partitioning | None = "hive",
+        ignore_prefixes: list[str] | None = None,
+        pre_buffer: bool = True,
+        coerce_int96_timestamp_unit: str | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+    ): ...
+    def equals(self, other: ParquetDataset) -> bool: ...
+    @property
+    def schema(self) -> Schema:
+        """
+        Schema of the Dataset.
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_schema", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_schema/")
+
+        Read the schema:
+
+        >>> dataset.schema
+        n_legs: int64
+        animal: string
+        year: dictionary<values=int32, indices=int32, ordered=0>
+        """
+    def read(
+        self,
+        columns: list[str] | None = None,
+        use_threads: bool = True,
+        use_pandas_metadata: bool = False,
+    ) -> Table:
+        """
+        Read (multiple) Parquet files as a single pyarrow.Table.
+
+        Parameters
+        ----------
+        columns : List[str]
+            Names of columns to read from the dataset. The partition fields
+            are not automatically included.
+        use_threads : bool, default True
+            Perform multi-threaded column reads.
+        use_pandas_metadata : bool, default False
+            If True and file has custom pandas schema metadata, ensure that
+            index columns are also loaded.
+
+        Returns
+        -------
+        pyarrow.Table
+            Content of the file as a table (of columns).
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_read", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_read/")
+
+        Read the dataset:
+
+        >>> dataset.read(columns=["n_legs"])
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[5],[2],[4,100],[2,4]]
+        """
+    def read_pandas(self, **kwargs) -> Table:
+        """
+        Read dataset including pandas metadata, if any. Other arguments passed
+        through to :func:`read`, see docstring for further details.
+
+        Parameters
+        ----------
+        **kwargs : optional
+            Additional options for :func:`read`
+
+        Examples
+        --------
+        Generate an example parquet file:
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> table = pa.Table.from_pandas(df)
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_table(table, "table_V2.parquet")
+        >>> dataset = pq.ParquetDataset("table_V2.parquet")
+
+        Read the dataset with pandas metadata:
+
+        >>> dataset.read_pandas(columns=["n_legs"])
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,2,4,4,5,100]]
+
+        >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata
+        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...}
+        """
+    @property
+    def fragments(self) -> list[ParquetFileFragment]:
+        """
+        A list of the Dataset source fragments or pieces with absolute
+        file paths.
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_fragments", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_fragments/")
+
+        List the fragments:
+
+        >>> dataset.fragments
+        [<pyarrow.dataset.ParquetFileFragment path=dataset_v2_fragments/...
+        """
+    @property
+    def files(self) -> list[str]:
+        """
+        A list of absolute Parquet file paths in the Dataset source.
+
+        Examples
+        --------
+        Generate an example dataset:
+
+        >>> import pyarrow as pa
+        >>> table = pa.table(
+        ...     {
+        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+        ...         "n_legs": [2, 2, 4, 4, 5, 100],
+        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+        ...     }
+        ... )
+        >>> import pyarrow.parquet as pq
+        >>> pq.write_to_dataset(table, root_path="dataset_v2_files", partition_cols=["year"])
+        >>> dataset = pq.ParquetDataset("dataset_v2_files/")
+
+        List the files:
+
+        >>> dataset.files
+        ['dataset_v2_files/year=2019/...-0.parquet', ...
+        """
+    @property
+    def filesystem(self) -> FileSystem:
+        """
+        The filesystem type of the Dataset source.
+        """
+    @property
+    def partitioning(self) -> Partitioning:
+        """
+        The partitioning of the Dataset source, if discovered.
+        """
+
+def read_table(
+    source: SingleOrList[str] | SingleOrList[Path] | SingleOrList[NativeFile] | SingleOrList[IO],
+    *,
+    columns: list | None = None,
+    use_threads: bool = True,
+    schema: Schema | None = None,
+    use_pandas_metadata: bool = False,
+    read_dictionary: list[str] | None = None,
+    memory_map: bool = False,
+    buffer_size: int = 0,
+    partitioning: str | list[str] | Partitioning | None = "hive",
+    filesystem: SupportedFileSystem | None = None,
+    filters: Expression | FilterTuple | list[FilterTuple] | None = None,
+    ignore_prefixes: list[str] | None = None,
+    pre_buffer: bool = True,
+    coerce_int96_timestamp_unit: str | None = None,
+    decryption_properties: FileDecryptionProperties | None = None,
+    thrift_string_size_limit: int | None = None,
+    thrift_container_size_limit: int | None = None,
+    page_checksum_verification: bool = False,
+) -> Table:
+    """
+    Read a Table from Parquet format
+
+    Parameters
+    ----------
+    source : str, pyarrow.NativeFile, or file-like object
+        If a string passed, can be a single file name or directory name. For
+        file-like objects, only read a single file. Use pyarrow.BufferReader to
+        read a file contained in a bytes or buffer-like object.
+    columns : list
+        If not None, only these columns will be read from the file. A column
+        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
+        that the table will still have the correct num_rows set despite having
+        no columns.
+    use_threads : bool, default True
+        Perform multi-threaded column reads.
+    schema : Schema, optional
+        Optionally provide the Schema for the parquet dataset, in which case it
+        will not be inferred from the source.
+    use_pandas_metadata : bool, default False
+        If True and file has custom pandas schema metadata, ensure that
+        index columns are also loaded.
+    read_dictionary : list, default None
+        List of names or column paths (for nested types) to read directly
+        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+        a flat column as dictionary-encoded pass the column name. For
+        nested types, you must pass the full column "path", which could be
+        something like level1.level2.list.item. Refer to the Parquet
+        file's schema to obtain the paths.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
+        The partitioning scheme for a partitioned dataset. The default of "hive"
+        assumes directory names with key=value pairs like "/year=2009/month=11".
+        In addition, a scheme like "/2009/11" is also supported, in which case
+        you need to specify the field names or a full schema. See the
+        ``pyarrow.dataset.partitioning()`` function for more details.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
+        Rows which do not match the filter predicate will be removed from scanned
+        data. Partition keys embedded in a nested directory structure will be
+        exploited to avoid loading files at all if they contain no matching rows.
+        Within-file level filtering and different partitioning schemes are supported.
+
+        Predicates are expressed using an ``Expression`` or using
+        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
+        DNF allows arbitrary boolean logical combinations of single column predicates.
+        The innermost tuples each describe a single column predicate. The list of inner
+        predicates is interpreted as a conjunction (AND), forming a more selective and
+        multiple column predicate. Finally, the most outer list combines these filters
+        as a disjunction (OR).
+
+        Predicates may also be passed as List[Tuple]. This form is interpreted
+        as a single conjunction. To express OR in predicates, one must
+        use the (preferred) List[List[Tuple]] notation.
+
+        Each tuple has format: (``key``, ``op``, ``value``) and compares the
+        ``key`` with the ``value``.
+        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
+        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
+        ``value`` must be a collection such as a ``list``, a ``set`` or a
+        ``tuple``.
+
+        Examples:
+
+        Using the ``Expression`` API:
+
+        .. code-block:: python
+
+            import pyarrow.compute as pc
+            pc.field('x') = 0
+            pc.field('y').isin(['a', 'b', 'c'])
+            ~pc.field('y').isin({'a', 'b'})
+
+        Using the DNF format:
+
+        .. code-block:: python
+
+            ("x", "=", 0)
+            ("y", "in", ["a", "b", "c"])
+            ("z", "not in", {"a", "b"})
+
+
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process.
+        This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+    pre_buffer : bool, default True
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool. If using a filesystem layer that itself
+        performs readahead (e.g. fsspec's S3FS), disable readahead for best
+        results.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    decryption_properties : FileDecryptionProperties or None
+        File-level decryption properties.
+        The decryption properties can be created using
+        ``CryptoFactory.file_decryption_properties()``.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    page_checksum_verification : bool, default False
+        If True, verify the checksum for each page read from the file.
+
+    Returns
+    -------
+    pyarrow.Table
+        Content of the file as a table (of columns)
+
+
+    Examples
+    --------
+
+    Generate an example PyArrow Table and write it to a partitioned dataset:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, root_path="dataset_name_2", partition_cols=["year"])
+
+    Read the data:
+
+    >>> pq.read_table("dataset_name_2").to_pandas()
+       n_legs         animal  year
+    0       5  Brittle stars  2019
+    1       2       Flamingo  2020
+    2       4            Dog  2021
+    3     100      Centipede  2021
+    4       2         Parrot  2022
+    5       4          Horse  2022
+
+
+    Read only a subset of columns:
+
+    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"])
+    pyarrow.Table
+    n_legs: int64
+    animal: string
+    ----
+    n_legs: [[5],[2],[4,100],[2,4]]
+    animal: [["Brittle stars"],["Flamingo"],["Dog","Centipede"],["Parrot","Horse"]]
+
+    Read a subset of columns and read one column as DictionaryArray:
+
+    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"], read_dictionary=["animal"])
+    pyarrow.Table
+    n_legs: int64
+    animal: dictionary<values=string, indices=int32, ordered=0>
+    ----
+    n_legs: [[5],[2],[4,100],[2,4]]
+    animal: [  -- dictionary:
+    ["Brittle stars"]  -- indices:
+    [0],  -- dictionary:
+    ["Flamingo"]  -- indices:
+    [0],  -- dictionary:
+    ["Dog","Centipede"]  -- indices:
+    [0,1],  -- dictionary:
+    ["Parrot","Horse"]  -- indices:
+    [0,1]]
+
+    Read the table with filter:
+
+    >>> pq.read_table(
+    ...     "dataset_name_2", columns=["n_legs", "animal"], filters=[("n_legs", "<", 4)]
+    ... ).to_pandas()
+       n_legs    animal
+    0       2  Flamingo
+    1       2    Parrot
+
+    Read data from a single Parquet file:
+
+    >>> pq.write_table(table, "example.parquet")
+    >>> pq.read_table("dataset_name_2").to_pandas()
+       n_legs         animal  year
+    0       5  Brittle stars  2019
+    1       2       Flamingo  2020
+    2       4            Dog  2021
+    3     100      Centipede  2021
+    4       2         Parrot  2022
+    5       4          Horse  2022
+    """
+
+def read_pandas(
+    source: str | Path | NativeFile | IO, columns: list | None = None, **kwargs
+) -> Table:
+    """
+
+    Read a Table from Parquet format, also reading DataFrame
+    index values if known in the file metadata
+
+    Parameters
+    ----------
+    source : str, pyarrow.NativeFile, or file-like object
+        If a string passed, can be a single file name or directory name. For
+        file-like objects, only read a single file. Use pyarrow.BufferReader to
+        read a file contained in a bytes or buffer-like object.
+    columns : list
+        If not None, only these columns will be read from the file. A column
+        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
+        that the table will still have the correct num_rows set despite having
+        no columns.
+    use_threads : bool, default True
+        Perform multi-threaded column reads.
+    schema : Schema, optional
+        Optionally provide the Schema for the parquet dataset, in which case it
+        will not be inferred from the source.
+    read_dictionary : list, default None
+        List of names or column paths (for nested types) to read directly
+        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
+        a flat column as dictionary-encoded pass the column name. For
+        nested types, you must pass the full column "path", which could be
+        something like level1.level2.list.item. Refer to the Parquet
+        file's schema to obtain the paths.
+    memory_map : bool, default False
+        If the source is a file path, use a memory map to read file, which can
+        improve performance in some environments.
+    buffer_size : int, default 0
+        If positive, perform read buffering when deserializing individual
+        column chunks. Otherwise IO calls are unbuffered.
+    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
+        The partitioning scheme for a partitioned dataset. The default of "hive"
+        assumes directory names with key=value pairs like "/year=2009/month=11".
+        In addition, a scheme like "/2009/11" is also supported, in which case
+        you need to specify the field names or a full schema. See the
+        ``pyarrow.dataset.partitioning()`` function for more details.
+    **kwargs
+        additional options for :func:`read_table`
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
+        Rows which do not match the filter predicate will be removed from scanned
+        data. Partition keys embedded in a nested directory structure will be
+        exploited to avoid loading files at all if they contain no matching rows.
+        Within-file level filtering and different partitioning schemes are supported.
+
+        Predicates are expressed using an ``Expression`` or using
+        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
+        DNF allows arbitrary boolean logical combinations of single column predicates.
+        The innermost tuples each describe a single column predicate. The list of inner
+        predicates is interpreted as a conjunction (AND), forming a more selective and
+        multiple column predicate. Finally, the most outer list combines these filters
+        as a disjunction (OR).
+
+        Predicates may also be passed as List[Tuple]. This form is interpreted
+        as a single conjunction. To express OR in predicates, one must
+        use the (preferred) List[List[Tuple]] notation.
+
+        Each tuple has format: (``key``, ``op``, ``value``) and compares the
+        ``key`` with the ``value``.
+        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
+        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
+        ``value`` must be a collection such as a ``list``, a ``set`` or a
+        ``tuple``.
+
+        Examples:
+
+        Using the ``Expression`` API:
+
+        .. code-block:: python
+
+            import pyarrow.compute as pc
+            pc.field('x') = 0
+            pc.field('y').isin(['a', 'b', 'c'])
+            ~pc.field('y').isin({'a', 'b'})
+
+        Using the DNF format:
+
+        .. code-block:: python
+
+            ("x", "=", 0)
+            ("y", "in", ["a", "b", "c"])
+            ("z", "not in", {"a", "b"})
+
+
+    ignore_prefixes : list, optional
+        Files matching any of these prefixes will be ignored by the
+        discovery process.
+        This is matched to the basename of a path.
+        By default this is ['.', '_'].
+        Note that discovery happens only if a directory is passed as source.
+    pre_buffer : bool, default True
+        Coalesce and issue file reads in parallel to improve performance on
+        high-latency filesystems (e.g. S3). If True, Arrow will use a
+        background I/O thread pool. If using a filesystem layer that itself
+        performs readahead (e.g. fsspec's S3FS), disable readahead for best
+        results.
+    coerce_int96_timestamp_unit : str, default None
+        Cast timestamps that are stored in INT96 format to a particular
+        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
+        and therefore INT96 timestamps will be inferred as timestamps
+        in nanoseconds.
+    decryption_properties : FileDecryptionProperties or None
+        File-level decryption properties.
+        The decryption properties can be created using
+        ``CryptoFactory.file_decryption_properties()``.
+    thrift_string_size_limit : int, default None
+        If not None, override the maximum total string size allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    thrift_container_size_limit : int, default None
+        If not None, override the maximum total size of containers allocated
+        when decoding Thrift structures. The default limit should be
+        sufficient for most Parquet files.
+    page_checksum_verification : bool, default False
+        If True, verify the checksum for each page read from the file.
+
+    Returns
+    -------
+    pyarrow.Table
+        Content of the file as a Table of Columns, including DataFrame
+        indexes as columns
+    """
+
+def write_table(
+    table: Table,
+    where: str | Path | NativeFile | IO,
+    row_group_size: int | None = None,
+    version: Literal["1.0", "2.4", "2.6"] = "2.6",
+    use_dictionary: bool = True,
+    compression: _Compression | dict[str, _Compression] = "snappy",
+    write_statistics: bool | list = True,
+    use_deprecated_int96_timestamps: bool | None = None,
+    coerce_timestamps: str | None = None,
+    allow_truncated_timestamps: bool = False,
+    data_page_size: int | None = None,
+    flavor: str | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    compression_level: int | dict | None = None,
+    use_byte_stream_split: bool = False,
+    column_encoding: str | dict | None = None,
+    data_page_version: Literal["1.0", "2.0"] = ...,
+    use_compliant_nested_type: bool = True,
+    encryption_properties: FileEncryptionProperties | None = None,
+    write_batch_size: int | None = None,
+    dictionary_pagesize_limit: int | None = None,
+    store_schema: bool = True,
+    write_page_index: bool = False,
+    write_page_checksum: bool = False,
+    sorting_columns: Sequence[SortingColumn] | None = None,
+    store_decimal_as_integer: bool = False,
+    **kwargs,
+) -> None:
+    """
+
+    Write a Table to Parquet format.
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+    where : string or pyarrow.NativeFile
+    row_group_size : int
+        Maximum number of rows in each written row group. If None, the
+        row group size will be the minimum of the Table size and
+        1024 * 1024.
+    version : {"1.0", "2.4", "2.6"}, default "2.6"
+        Determine which Parquet logical types are available for use, whether the
+        reduced set from the Parquet 1.x.x format or the expanded logical types
+        added in later format versions.
+        Files written with version='2.4' or '2.6' may not be readable in all
+        Parquet implementations, so version='1.0' is likely the choice that
+        maximizes file compatibility.
+        UINT32 and some logical types are only available with version '2.4'.
+        Nanosecond timestamps are only available with version '2.6'.
+        Other features such as compression algorithms or the new serialized
+        data page format must be enabled separately (see 'compression' and
+        'data_page_version').
+    use_dictionary : bool or list, default True
+        Specify if we should use dictionary encoding in general or only for
+        some columns.
+        When encoding the column, if the dictionary size is too large, the
+        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
+        doesn't support dictionary encoding.
+    compression : str or dict, default 'snappy'
+        Specify the compression codec, either on a general basis or per-column.
+        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
+    write_statistics : bool or list, default True
+        Specify if we should write statistics in general (default is True) or only
+        for some columns.
+    use_deprecated_int96_timestamps : bool, default None
+        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
+        by flavor argument. This take priority over the coerce_timestamps option.
+    coerce_timestamps : str, default None
+        Cast timestamps to a particular resolution. If omitted, defaults are chosen
+        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
+        nanoseconds are cast to microseconds ('us'), while for
+        ``version='2.6'`` (the default), they are written natively without loss
+        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
+        as Parquet does not have any temporal type with seconds resolution.
+        If the casting results in loss of data, it will raise an exception
+        unless ``allow_truncated_timestamps=True`` is given.
+        Valid values: {None, 'ms', 'us'}
+    allow_truncated_timestamps : bool, default False
+        Allow loss of data when coercing timestamps to a particular
+        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
+        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
+        will NOT result in the truncation exception being ignored unless
+        ``coerce_timestamps`` is not None.
+    data_page_size : int, default None
+        Set a target threshold for the approximate encoded size of data
+        pages within a column chunk (in bytes). If None, use the default data page
+        size of 1MByte.
+    flavor : {'spark'}, default None
+        Sanitize schema or set other compatibility options to work with
+        various target systems.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred from `where` if path-like, else
+        `where` is already a file-like object so no filesystem is needed.
+    compression_level : int or dict, default None
+        Specify the compression level for a codec, either on a general basis or
+        per-column. If None is passed, arrow selects the compression level for
+        the compression codec in use. The compression level has a different
+        meaning for each codec, so you have to read the documentation of the
+        codec you are using.
+        An exception is thrown if the compression codec does not allow specifying
+        a compression level.
+    use_byte_stream_split : bool or list, default False
+        Specify if the byte_stream_split encoding should be used in general or
+        only for some columns. If both dictionary and byte_stream_stream are
+        enabled, then dictionary is preferred.
+        The byte_stream_split encoding is valid for integer, floating-point
+        and fixed-size binary data types (including decimals); it should be
+        combined with a compression codec so as to achieve size reduction.
+    column_encoding : string or dict, default None
+        Specify the encoding scheme on a per column basis.
+        Can only be used when ``use_dictionary`` is set to False, and
+        cannot be used in combination with ``use_byte_stream_split``.
+        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
+        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
+        Certain encodings are only compatible with certain data types.
+        Please refer to the encodings section of `Reading and writing Parquet
+        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
+    data_page_version : {"1.0", "2.0"}, default "1.0"
+        The serialized Parquet data page format version to write, defaults to
+        1.0. This does not impact the file schema logical types and Arrow to
+        Parquet type casting behavior; for that use the "version" option.
+    use_compliant_nested_type : bool, default True
+        Whether to write compliant Parquet nested type (lists) as defined
+        `here <https://github.com/apache/parquet-format/blob/master/
+        LogicalTypes.md#nested-types>`_, defaults to ``True``.
+        For ``use_compliant_nested_type=True``, this will write into a list
+        with 3-level structure where the middle level, named ``list``,
+        is a repeated group with a single field named ``element``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                      <element-repetition> <element-type> element;
+                }
+            }
+
+        For ``use_compliant_nested_type=False``, this will also write into a list
+        with 3-level structure, where the name of the single field of the middle
+        level ``list`` is taken from the element name for nested columns in Arrow,
+        which defaults to ``item``::
+
+            <list-repetition> group <name> (LIST) {
+                repeated group list {
+                    <element-repetition> <element-type> item;
+                }
+            }
+    encryption_properties : FileEncryptionProperties, default None
+        File encryption properties for Parquet Modular Encryption.
+        If None, no encryption will be done.
+        The encryption properties can be created using:
+        ``CryptoFactory.file_encryption_properties()``.
+    write_batch_size : int, default None
+        Number of values to write to a page at a time. If None, use the default of
+        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
+        are exceeding the ``data_page_size`` due to large column values, lowering
+        the batch size can help keep page sizes closer to the intended size.
+    dictionary_pagesize_limit : int, default None
+        Specify the dictionary page size limit per row group. If None, use the
+        default 1MB.
+    store_schema : bool, default True
+        By default, the Arrow schema is serialized and stored in the Parquet
+        file metadata (in the "ARROW:schema" key). When reading the file,
+        if this key is available, it will be used to more faithfully recreate
+        the original Arrow data. For example, for tz-aware timestamp columns
+        it will restore the timezone (Parquet only stores the UTC values without
+        timezone), or columns with duration type will be restored from the int64
+        Parquet column.
+    write_page_index : bool, default False
+        Whether to write a page index in general for all columns.
+        Writing statistics to the page index disables the old method of writing
+        statistics to each data page header. The page index makes statistics-based
+        filtering more efficient than the page header, as it gathers all the
+        statistics for a Parquet file in a single place, avoiding scattered I/O.
+        Note that the page index is not yet used on the read size by PyArrow.
+    write_page_checksum : bool, default False
+        Whether to write page checksums in general for all columns.
+        Page checksums enable detection of data corruption, which might occur during
+        transmission or in the storage.
+    sorting_columns : Sequence of SortingColumn, default None
+        Specify the sort order of the data being written. The writer does not sort
+        the data nor does it verify that the data is sorted. The sort order is
+        written to the row group metadata, which can then be used by readers.
+    store_decimal_as_integer : bool, default False
+        Allow decimals with 1 <= precision <= 18 to be stored as integers.
+        In Parquet, DECIMAL can be stored in any of the following physical types:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: precision is limited by the array size.
+          Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
+        - binary: precision is unlimited. The minimum number of bytes to store the
+          unscaled value is used.
+
+        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
+        When enabled, the writer will use the following physical types to store decimals:
+        - int32: for 1 <= precision <= 9.
+        - int64: for 10 <= precision <= 18.
+        - fixed_len_byte_array: for precision > 18.
+
+        As a consequence, decimal columns stored in integer types are more compact.
+
+    **kwargs : optional
+        Additional options for ParquetWriter
+
+    Examples
+    --------
+    Generate an example PyArrow Table:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    and write the Table into Parquet file:
+
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_table(table, "example.parquet")
+
+    Defining row group size for the Parquet file:
+
+    >>> pq.write_table(table, "example.parquet", row_group_size=3)
+
+    Defining row group compression (default is Snappy):
+
+    >>> pq.write_table(table, "example.parquet", compression="none")
+
+    Defining row group compression and encoding per-column:
+
+    >>> pq.write_table(
+    ...     table,
+    ...     "example.parquet",
+    ...     compression={"n_legs": "snappy", "animal": "gzip"},
+    ...     use_dictionary=["n_legs", "animal"],
+    ... )
+
+    Defining column encoding per-column:
+
+    >>> pq.write_table(
+    ...     table, "example.parquet", column_encoding={"animal": "PLAIN"}, use_dictionary=False
+    ... )
+    """
+
+def write_to_dataset(
+    table: Table,
+    root_path: str | Path,
+    partition_cols: list[str] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    schema: Schema | None = None,
+    partitioning: Partitioning | list[str] | None = None,
+    basename_template: str | None = None,
+    use_threads: bool | None = None,
+    file_visitor: Callable[[str], None] | None = None,
+    existing_data_behavior: Literal["overwrite_or_ignore", "error", "delete_matching"]
+    | None = None,
+    **kwargs,
+) -> None:
+    """
+    Wrapper around dataset.write_dataset for writing a Table to
+    Parquet format by partitions.
+    For each combination of partition columns and values,
+    a subdirectories are created in the following
+        manner:
+
+        root_dir/
+          group1=value1
+            group2=value1
+              <uuid>.parquet
+            group2=value2
+              <uuid>.parquet
+          group1=valueN
+            group2=value1
+              <uuid>.parquet
+            group2=valueN
+              <uuid>.parquet
+
+    Parameters
+    ----------
+    table : pyarrow.Table
+    root_path : str, pathlib.Path
+        The root directory of the dataset.
+    partition_cols : list,
+        Column names by which to partition the dataset.
+        Columns are partitioned in the order they are given.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+    schema : Schema, optional
+        This Schema of the dataset.
+    partitioning : Partitioning or list[str], optional
+        The partitioning scheme specified with the
+        ``pyarrow.dataset.partitioning()`` function or a list of field names.
+        When providing a list of field names, you can use
+        ``partitioning_flavor`` to drive which partitioning type should be
+        used.
+    basename_template : str, optional
+        A template string used to generate basenames of written data files.
+        The token '{i}' will be replaced with an automatically incremented
+        integer. If not specified, it defaults to "guid-{i}.parquet".
+    use_threads : bool, default True
+        Write files in parallel. If enabled, then maximum parallelism will be
+        used determined by the number of available CPU cores.
+    file_visitor : function
+        If set, this function will be called with a WrittenFile instance
+        for each file created during the call.  This object will have both
+        a path attribute and a metadata attribute.
+
+        The path attribute will be a string containing the path to
+        the created file.
+
+        The metadata attribute will be the parquet metadata of the file.
+        This metadata will have the file path attribute set and can be used
+        to build a _metadata file.  The metadata attribute will be None if
+        the format is not parquet.
+
+        Example visitor which simple collects the filenames created::
+
+            visited_paths = []
+
+            def file_visitor(written_file):
+                visited_paths.append(written_file.path)
+
+    existing_data_behavior : 'overwrite_or_ignore' | 'error' | 'delete_matching'
+        Controls how the dataset will handle data that already exists in
+        the destination. The default behaviour is 'overwrite_or_ignore'.
+
+        'overwrite_or_ignore' will ignore any existing data and will
+        overwrite files with the same name as an output file.  Other
+        existing files will be ignored.  This behavior, in combination
+        with a unique basename_template for each write, will allow for
+        an append workflow.
+
+        'error' will raise an error if any data exists in the destination.
+
+        'delete_matching' is useful when you are writing a partitioned
+        dataset.  The first time each partition directory is encountered
+        the entire directory will be deleted.  This allows you to overwrite
+        old partitions completely.
+    **kwargs : dict,
+        Used as additional kwargs for :func:`pyarrow.dataset.write_dataset`
+        function for matching kwargs, and remainder to
+        :func:`pyarrow.dataset.ParquetFileFormat.make_write_options`.
+        See the docstring of :func:`write_table` and
+        :func:`pyarrow.dataset.write_dataset` for the available options.
+        Using `metadata_collector` in kwargs allows one to collect the
+        file metadata instances of dataset pieces. The file paths in the
+        ColumnChunkMetaData will be set relative to `root_path`.
+
+    Examples
+    --------
+    Generate an example PyArrow Table:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    and write it to a partitioned dataset:
+
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, root_path="dataset_name_3", partition_cols=["year"])
+    >>> pq.ParquetDataset("dataset_name_3").files
+    ['dataset_name_3/year=2019/...-0.parquet', ...
+
+    Write a single Parquet file into the root folder:
+
+    >>> pq.write_to_dataset(table, root_path="dataset_name_4")
+    >>> pq.ParquetDataset("dataset_name_4/").files
+    ['dataset_name_4/...-0.parquet']
+    """
+
+def write_metadata(
+    schema: Schema,
+    where: str | NativeFile,
+    metadata_collector: list[FileMetaData] | None = None,
+    filesystem: SupportedFileSystem | None = None,
+    **kwargs,
+) -> None:
+    """
+    Write metadata-only Parquet file from schema. This can be used with
+    `write_to_dataset` to generate `_common_metadata` and `_metadata` sidecar
+    files.
+
+    Parameters
+    ----------
+    schema : pyarrow.Schema
+    where : string or pyarrow.NativeFile
+    metadata_collector : list
+        where to collect metadata information.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred from `where` if path-like, else
+        `where` is already a file-like object so no filesystem is needed.
+    **kwargs : dict,
+        Additional kwargs for ParquetWriter class. See docstring for
+        `ParquetWriter` for more information.
+
+    Examples
+    --------
+    Generate example data:
+
+    >>> import pyarrow as pa
+    >>> table = pa.table(
+    ...     {
+    ...         "n_legs": [2, 2, 4, 4, 5, 100],
+    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
+    ...     }
+    ... )
+
+    Write a dataset and collect metadata information.
+
+    >>> metadata_collector = []
+    >>> import pyarrow.parquet as pq
+    >>> pq.write_to_dataset(table, "dataset_metadata", metadata_collector=metadata_collector)
+
+    Write the `_common_metadata` parquet file without row groups statistics.
+
+    >>> pq.write_metadata(table.schema, "dataset_metadata/_common_metadata")
+
+    Write the `_metadata` parquet file with row groups statistics.
+
+    >>> pq.write_metadata(
+    ...     table.schema, "dataset_metadata/_metadata", metadata_collector=metadata_collector
+    ... )
+    """
+
+def read_metadata(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> FileMetaData:
+    """
+    Read FileMetaData from footer of a single Parquet file.
+
+    Parameters
+    ----------
+    where : str (file path) or file-like object
+    memory_map : bool, default False
+        Create memory map when the source is a file path.
+    decryption_properties : FileDecryptionProperties, default None
+        Decryption properties for reading encrypted Parquet files.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+
+    Returns
+    -------
+    metadata : FileMetaData
+        The metadata of the Parquet file
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.parquet as pq
+    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
+    >>> pq.write_table(table, "example.parquet")
+
+    >>> pq.read_metadata("example.parquet")
+    <pyarrow._parquet.FileMetaData object at ...>
+      created_by: parquet-cpp-arrow version ...
+      num_columns: 2
+      num_rows: 3
+      num_row_groups: 1
+      format_version: 2.6
+      serialized_size: ...
+    """
+
+def read_schema(
+    where: str | Path | IO | NativeFile,
+    memory_map: bool = False,
+    decryption_properties: FileDecryptionProperties | None = None,
+    filesystem: SupportedFileSystem | None = None,
+) -> Schema:
+    """
+    Read effective Arrow schema from Parquet file metadata.
+
+    Parameters
+    ----------
+    where : str (file path) or file-like object
+    memory_map : bool, default False
+        Create memory map when the source is a file path.
+    decryption_properties : FileDecryptionProperties, default None
+        Decryption properties for reading encrypted Parquet files.
+    filesystem : FileSystem, default None
+        If nothing passed, will be inferred based on path.
+        Path will try to be found in the local on-disk filesystem otherwise
+        it will be parsed as an URI to determine the filesystem.
+
+    Returns
+    -------
+    schema : pyarrow.Schema
+        The schema of the Parquet file
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> import pyarrow.parquet as pq
+    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
+    >>> pq.write_table(table, "example.parquet")
+
+    >>> pq.read_schema("example.parquet")
+    n_legs: int64
+    animal: string
+    """
diff --git a/python/pyarrow-stubs/parquet/encryption.pyi b/python/pyarrow-stubs/parquet/encryption.pyi
new file mode 100644
index 00000000000..5a77dae7ef7
--- /dev/null
+++ b/python/pyarrow-stubs/parquet/encryption.pyi
@@ -0,0 +1,15 @@
+from pyarrow._parquet_encryption import (
+    CryptoFactory,
+    DecryptionConfiguration,
+    EncryptionConfiguration,
+    KmsClient,
+    KmsConnectionConfig,
+)
+
+__all__ = [
+    "CryptoFactory",
+    "DecryptionConfiguration",
+    "EncryptionConfiguration",
+    "KmsClient",
+    "KmsConnectionConfig",
+]
diff --git a/python/pyarrow-stubs/substrait.pyi b/python/pyarrow-stubs/substrait.pyi
new file mode 100644
index 00000000000..a56a8a5b40f
--- /dev/null
+++ b/python/pyarrow-stubs/substrait.pyi
@@ -0,0 +1,21 @@
+from pyarrow._substrait import (
+    BoundExpressions,
+    SubstraitSchema,
+    deserialize_expressions,
+    deserialize_schema,
+    get_supported_functions,
+    run_query,
+    serialize_expressions,
+    serialize_schema,
+)
+
+__all__ = [
+    "BoundExpressions",
+    "get_supported_functions",
+    "run_query",
+    "deserialize_expressions",
+    "serialize_expressions",
+    "deserialize_schema",
+    "serialize_schema",
+    "SubstraitSchema",
+]

From 45f301c533346d1183504f6aa60973dfeaddbf31 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 16 Sep 2025 03:00:02 +0200
Subject: [PATCH 15/26] workflow

---
 .github/workflows/python.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 8630dab7e93..443f336a975 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -142,11 +142,11 @@ jobs:
         run: |-
             python -m pip install mypy pyright scipy-stubs pandas-stubs types-python-dateutil types-requests griffe libcst
             pushd python;
-            pip install -e .
-            python -m mypy pyarrow/*.pyi pyarrow/__lib_pxi/*.pyi pyarrow/tests/test_array.py pyarrow/tests/test_io.py
-            python -m pyright pyarrow/*.pyi pyarrow/__lib_pxi/*.pyi
-            python ../dev/update_stub_docstrings.py -f ./pyarrow
-            git status --porcelain=1
+            # pip install -e .
+            python -m mypy pyarrow-stubs/ pyarrow/tests/test_array.py pyarrow/tests/test_io.py
+            python -m pyright pyarrow-stubs/
+            # python ../dev/update_stub_docstrings.py -f ./pyarrow
+            # git status --porcelain=1
 
   macos:
     name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} Python 3

From 3e516e2c560b95ca619689b9ccb5ad257eb432ac Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 16 Sep 2025 17:56:10 +0200
Subject: [PATCH 16/26] work

---
 .github/workflows/python.yml         |    6 +-
 python/pyarrow-stubs/__init__.pyi    |   52 +-
 python/pyarrow-stubs/_benchmark.pyi  |    3 -
 python/pyarrow-stubs/_csv.pyi        | 1101 ++++++++++++++------------
 python/pyarrow-stubs/_cuda.pyi       |   53 +-
 python/pyarrow-stubs/_fs.pyi         |    4 +-
 python/pyarrow-stubs/compute.pyi     |    2 +-
 python/pyarrow-stubs/lib.pyi         |   52 +-
 python/pyarrow-stubs/pandas_shim.pyi |   22 +-
 9 files changed, 649 insertions(+), 646 deletions(-)
 delete mode 100644 python/pyarrow-stubs/_benchmark.pyi

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 443f336a975..700218024a5 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -140,11 +140,11 @@ jobs:
 
       - name: Type check with mypy and pyright
         run: |-
-            python -m pip install mypy pyright scipy-stubs pandas-stubs types-python-dateutil types-requests griffe libcst
+            python -m pip install mypy pyright scipy-stubs pandas-stubs types-python-dateutil types-psutil types-requests griffe libcst
             pushd python;
             # pip install -e .
-            python -m mypy pyarrow-stubs/ pyarrow/tests/test_array.py pyarrow/tests/test_io.py
-            python -m pyright pyarrow-stubs/
+            mypy pyarrow-stubs pyarrow/tests/test_array.py pyarrow/tests/test_io.py
+            pyright pyarrow-stubs
             # python ../dev/update_stub_docstrings.py -f ./pyarrow
             # git status --porcelain=1
 
diff --git a/python/pyarrow-stubs/__init__.pyi b/python/pyarrow-stubs/__init__.pyi
index d74b486fd55..6df38801de1 100644
--- a/python/pyarrow-stubs/__init__.pyi
+++ b/python/pyarrow-stubs/__init__.pyi
@@ -39,14 +39,8 @@ from pyarrow.lib import (
     set_io_thread_count,
 )
 
-def show_versions() -> None:
-    """
-    Print various version information, to help with error reporting.
-    """
-def show_info() -> None:
-    """
-    Print detailed version and platform information, for error reporting
-    """
+def show_versions() -> None: ...
+def show_info() -> None: ...
 def _module_is_available(module: str) -> bool: ...
 def _filesystem_is_available(fs: str) -> bool: ...
 
@@ -335,14 +329,15 @@ from pyarrow.lib import (
     ArrowSerializationError,
 )
 
-from .ipc import serialize_pandas, deserialize_pandas
+from pyarrow.ipc import serialize_pandas, deserialize_pandas
+import pyarrow.ipc as ipc
 
-import types as types
+import pyarrow.types as types
 
 # ----------------------------------------------------------------------
 # Deprecations
 
-from .util import _deprecate_api, _deprecate_class
+from pyarrow.util import _deprecate_api, _deprecate_class
 
 from pyarrow.ipc import (
     Message,
@@ -357,39 +352,13 @@ from pyarrow.ipc import (
 # ----------------------------------------------------------------------
 # Returning absolute path to the pyarrow include directory (if bundled, e.g. in
 # wheels)
-def get_include() -> str:
-    """
-    Return absolute path to directory containing Arrow C++ include
-    headers. Similar to numpy.get_include
-    """
+def get_include() -> str: ...
 def _get_pkg_config_executable() -> str: ...
 def _has_pkg_config(pkgname: str) -> bool: ...
 def _read_pkg_config_variable(pkgname: str, cli_args: list[str]) -> str: ...
-def get_libraries() -> list[str]:
-    """
-    Return list of library names to include in the `libraries` argument for C
-    or Cython extensions using pyarrow
-    """
-def create_library_symlinks() -> None:
-    """
-    With Linux and macOS wheels, the bundled shared libraries have an embedded
-    ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them
-    with -larrow won't work unless we create symlinks at locations like
-    site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses
-    prior problems we had with shipping two copies of the shared libraries to
-    permit third party projects like turbodbc to build their C++ extensions
-    against the pyarrow wheels.
-
-    This function must only be invoked once and only when the shared libraries
-    are bundled with the Python package, which should only apply to wheel-based
-    installs. It requires write access to the site-packages/pyarrow directory
-    and so depending on your system may need to be run with root.
-    """
-def get_library_dirs() -> list[str]:
-    """
-    Return lists of directories likely to contain Arrow C++ libraries for
-    linking C or Cython extensions using pyarrow
-    """
+def get_libraries() -> list[str]: ...
+def create_library_symlinks() -> None: ...
+def get_library_dirs() -> list[str]: ...
 
 __all__ = [
     "__version__",
@@ -681,6 +650,7 @@ __all__ = [
     "ArrowSerializationError",
     "serialize_pandas",
     "deserialize_pandas",
+    "ipc",
     "types",
     "_deprecate_api",
     "_deprecate_class",
diff --git a/python/pyarrow-stubs/_benchmark.pyi b/python/pyarrow-stubs/_benchmark.pyi
deleted file mode 100644
index 048973301dc..00000000000
--- a/python/pyarrow-stubs/_benchmark.pyi
+++ /dev/null
@@ -1,3 +0,0 @@
-from pyarrow.lib import benchmark_PandasObjectIsNull
-
-__all__ = ["benchmark_PandasObjectIsNull"]
diff --git a/python/pyarrow-stubs/_csv.pyi b/python/pyarrow-stubs/_csv.pyi
index ad52b2f380f..2f49f8c9a6c 100644
--- a/python/pyarrow-stubs/_csv.pyi
+++ b/python/pyarrow-stubs/_csv.pyi
@@ -1,556 +1,641 @@
-from typing import Any
+from dataclasses import dataclass, field
+from typing import IO, Any, Callable, Literal
 
-import cuda  # type: ignore[import-not-found]
-
-from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-not-found]
+from _typeshed import StrPath
 
 from . import lib
-from ._stubs_typing import ArrayLike
 
-class Context(lib._Weakrefable):
+@dataclass(kw_only=True)
+class ReadOptions(lib._Weakrefable):
     """
-    CUDA driver context.
+    Options for reading CSV files.
+
+    Parameters
+    ----------
+    use_threads : bool, optional (default True)
+        Whether to use multiple threads to accelerate reading
+    block_size : int, optional
+        How much bytes to process at a time from the input stream.
+        This will determine multi-threading granularity as well as
+        the size of individual record batches or table chunks.
+        Minimum valid value for block size is 1
+    skip_rows : int, optional (default 0)
+        The number of rows to skip before the column names (if any)
+        and the CSV data.
+    skip_rows_after_names : int, optional (default 0)
+        The number of rows to skip after the column names.
+        This number can be larger than the number of rows in one
+        block, and empty rows are counted.
+        The order of application is as follows:
+        - `skip_rows` is applied (if non-zero);
+        - column names are read (unless `column_names` is set);
+        - `skip_rows_after_names` is applied (if non-zero).
+    column_names : list, optional
+        The column names of the target table.  If empty, fall back on
+        `autogenerate_column_names`.
+    autogenerate_column_names : bool, optional (default False)
+        Whether to autogenerate column names if `column_names` is empty.
+        If true, column names will be of the form "f0", "f1"...
+        If false, column names will be read from the first CSV row
+        after `skip_rows`.
+    encoding : str, optional (default 'utf8')
+        The character encoding of the CSV data.  Columns that cannot
+        decode using this encoding can still be read as Binary.
+
+    Examples
+    --------
+
+    Defining an example data:
+
+    >>> import io
+    >>> s = "1,2,3\\nFlamingo,2,2022-03-01\\nHorse,4,2022-03-02\\nBrittle stars,5,2022-03-03\\nCentipede,100,2022-03-04"
+    >>> print(s)
+    1,2,3
+    Flamingo,2,2022-03-01
+    Horse,4,2022-03-02
+    Brittle stars,5,2022-03-03
+    Centipede,100,2022-03-04
+
+    Ignore the first numbered row and substitute it with defined
+    or autogenerated column names:
+
+    >>> from pyarrow import csv
+    >>> read_options = csv.ReadOptions(column_names=["animals", "n_legs", "entry"], skip_rows=1)
+    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: date32[day]
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    n_legs: [[2,4,5,100]]
+    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+
+    >>> read_options = csv.ReadOptions(autogenerate_column_names=True, skip_rows=1)
+    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
+    pyarrow.Table
+    f0: string
+    f1: int64
+    f2: date32[day]
+    ----
+    f0: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    f1: [[2,4,5,100]]
+    f2: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
+
+    Remove the first 2 rows of the data:
+
+    >>> read_options = csv.ReadOptions(skip_rows_after_names=2)
+    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
+    pyarrow.Table
+    1: string
+    2: int64
+    3: date32[day]
+    ----
+    1: [["Brittle stars","Centipede"]]
+    2: [[5,100]]
+    3: [[2022-03-03,2022-03-04]]
     """
 
-    def __init__(self, device_number: int = 0, handle: int | None = None) -> None:
-        """
-        Create a CUDA driver context for a particular device.
-
-        If a CUDA context handle is passed, it is wrapped, otherwise
-        a default CUDA context for the given device is requested.
-
-        Parameters
-        ----------
-        device_number : int (default 0)
-          Specify the GPU device for which the CUDA driver context is
-          requested.
-        handle : int, optional
-          Specify CUDA handle for a shared context that has been created
-          by another library.
-        """
-    @staticmethod
-    def from_numba(context: _numba_driver.Context | None = None) -> Context:
-        """
-        Create a Context instance from a Numba CUDA context.
-
-        Parameters
-        ----------
-        context : {numba.cuda.cudadrv.driver.Context, None}
-          A Numba CUDA context instance.
-          If None, the current Numba context is used.
-
-        Returns
-        -------
-        shared_context : pyarrow.cuda.Context
-          Context instance.
-        """
-    def to_numba(self) -> _numba_driver.Context:
-        """
-        Convert Context to a Numba CUDA context.
-
-        Returns
-        -------
-        context : numba.cuda.cudadrv.driver.Context
-          Numba CUDA context instance.
-        """
-    @staticmethod
-    def get_num_devices() -> int:
-        """Return the number of GPU devices."""
-    @property
-    def device_number(self) -> int:
-        """Return context device number."""
-    @property
-    def handle(self) -> int:
-        """Return pointer to context handle."""
-    def synchronize(self) -> None:
-        """Blocks until the device has completed all preceding requested
-        tasks.
-        """
-    @property
-    def bytes_allocated(self) -> int:
-        """Return the number of allocated bytes."""
-    def get_device_address(self, address: int) -> int:
-        """Return the device address that is reachable from kernels running in
-        the context
-
-        Parameters
-        ----------
-        address : int
-          Specify memory address value
-
-        Returns
-        -------
-        device_address : int
-          Device address accessible from device context
-
-        Notes
-        -----
-        The device address is defined as a memory address accessible
-        by device. While it is often a device memory address but it
-        can be also a host memory address, for instance, when the
-        memory is allocated as host memory (using cudaMallocHost or
-        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
-        or the host memory is page-locked (using cudaHostRegister).
-        """
-    def new_buffer(self, nbytes: int) -> CudaBuffer:
-        """Return new device buffer.
-
-        Parameters
-        ----------
-        nbytes : int
-          Specify the number of bytes to be allocated.
-
-        Returns
-        -------
-        buf : CudaBuffer
-          Allocated buffer.
-        """
-    @property
-    def memory_manager(self) -> lib.MemoryManager:
-        """
-        The default memory manager tied to this context's device.
-
-        Returns
-        -------
-        MemoryManager
-        """
-    @property
-    def device(self) -> lib.Device:
-        """
-        The device instance associated with this context.
-
-        Returns
-        -------
-        Device
-        """
-    def foreign_buffer(self, address: int, size: int, base: Any | None = None) -> CudaBuffer:
-        """
-        Create device buffer from address and size as a view.
-
-        The caller is responsible for allocating and freeing the
-        memory. When `address==size==0` then a new zero-sized buffer
-        is returned.
-
-        Parameters
-        ----------
-        address : int
-          Specify the starting address of the buffer. The address can
-          refer to both device or host memory but it must be
-          accessible from device after mapping it with
-          `get_device_address` method.
-        size : int
-          Specify the size of device buffer in bytes.
-        base : {None, object}
-          Specify object that owns the referenced memory.
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of device reachable memory.
-
-        """
-    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer:
-        """Open existing CUDA IPC memory handle
-
-        Parameters
-        ----------
-        ipc_handle : IpcMemHandle
-          Specify opaque pointer to CUipcMemHandle (driver API).
-
-        Returns
-        -------
-        buf : CudaBuffer
-          referencing device buffer
-        """
-    def buffer_from_data(
-        self,
-        data: CudaBuffer | HostBuffer | lib.Buffer | ArrayLike,
-        offset: int = 0,
-        size: int = -1,
-    ) -> CudaBuffer:
-        """Create device buffer and initialize with data.
-
-        Parameters
-        ----------
-        data : {CudaBuffer, HostBuffer, Buffer, array-like}
-          Specify data to be copied to device buffer.
-        offset : int
-          Specify the offset of input buffer for device data
-          buffering. Default: 0.
-        size : int
-          Specify the size of device buffer in bytes. Default: all
-          (starting from input offset)
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer with copied data.
-        """
-    def buffer_from_object(self, obj: Any) -> CudaBuffer:
-        """Create device buffer view of arbitrary object that references
-        device accessible memory.
-
-        When the object contains a non-contiguous view of device
-        accessible memory then the returned device buffer will contain
-        contiguous view of the memory, that is, including the
-        intermediate data that is otherwise invisible to the input
-        object.
-
-        Parameters
-        ----------
-        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
-          Specify an object that holds (device or host) address that
-          can be accessed from device. This includes objects with
-          types defined in pyarrow.cuda as well as arbitrary objects
-          that implement the CUDA array interface as defined by numba.
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of device accessible memory.
-
-        """
-
-class IpcMemHandle(lib._Weakrefable):
-    """A serializable container for a CUDA IPC handle."""
-    @staticmethod
-    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle:
-        """Create IpcMemHandle from opaque buffer (e.g. from another
-        process)
-
-        Parameters
-        ----------
-        opaque_handle :
-          a CUipcMemHandle as a const void*
-
-        Returns
-        -------
-        ipc_handle : IpcMemHandle
-        """
-    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer:
-        """Write IpcMemHandle to a Buffer
-
-        Parameters
-        ----------
-        pool : {MemoryPool, None}
-          Specify a pool to allocate memory from
-
-        Returns
-        -------
-        buf : Buffer
-          The serialized buffer.
-        """
-
-class CudaBuffer(lib.Buffer):
-    """An Arrow buffer with data located in a GPU device.
-
-    To create a CudaBuffer instance, use Context.device_buffer().
-
-    The memory allocated in a CudaBuffer is freed when the buffer object
-    is deleted.
+    use_threads: bool = field(default=True, kw_only=False)
+    block_size: int | None = None
+    skip_rows: int = 0
+    skip_rows_after_names: int = 0
+    column_names: list[str] | None = None
+    autogenerate_column_names: bool = False
+    encoding: str = "utf8"
+
+    def validate(self) -> None: ...
+
+@dataclass(kw_only=True)
+class ParseOptions(lib._Weakrefable):
     """
+    Options for parsing CSV files.
 
-    @staticmethod
-    def from_buffer(buf: lib.Buffer) -> CudaBuffer:
-        """Convert back generic buffer into CudaBuffer
-
-        Parameters
-        ----------
-        buf : Buffer
-          Specify buffer containing CudaBuffer
-
-        Returns
-        -------
-        dbuf : CudaBuffer
-          Resulting device buffer.
-        """
-    @staticmethod
-    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer:
-        """Create a CudaBuffer view from numba MemoryPointer instance.
-
-        Parameters
-        ----------
-        mem :  numba.cuda.cudadrv.driver.MemoryPointer
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of numba MemoryPointer.
-        """
-    def to_numba(self) -> _numba_driver.MemoryPointer:
-        """Return numba memory pointer of CudaBuffer instance."""
-    def copy_to_host(
-        self,
-        position: int = 0,
-        nbytes: int = -1,
-        buf: lib.Buffer | None = None,
-        memory_pool: lib.MemoryPool | None = None,
-        resizable: bool = False,
-    ) -> lib.Buffer:
-        """Copy memory from GPU device to CPU host
-
-        Caller is responsible for ensuring that all tasks affecting
-        the memory are finished. Use
-
-          `<CudaBuffer instance>.context.synchronize()`
-
-        when needed.
-
-        Parameters
-        ----------
-        position : int
-          Specify the starting position of the source data in GPU
-          device buffer. Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          the position until host buffer is full).
-        buf : Buffer
-          Specify a pre-allocated output buffer in host. Default: None
-          (allocate new output buffer).
-        memory_pool : MemoryPool
-        resizable : bool
-          Specify extra arguments to allocate_buffer. Used only when
-          buf is None.
-
-        Returns
-        -------
-        buf : Buffer
-          Output buffer in host.
-
-        """
-    def copy_from_host(
-        self, data: lib.Buffer | ArrayLike, position: int = 0, nbytes: int = -1
-    ) -> int:
-        """Copy data from host to device.
-
-        The device buffer must be pre-allocated.
-
-        Parameters
-        ----------
-        data : {Buffer, array-like}
-          Specify data in host. It can be array-like that is valid
-          argument to py_buffer
-        position : int
-          Specify the starting position of the copy in device buffer.
-          Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          source until device buffer, starting from position, is full)
-
-        Returns
-        -------
-        nbytes : int
-          Number of bytes copied.
-        """
-    def copy_from_device(self, buf: CudaBuffer, position: int = 0, nbytes: int = -1) -> int:
-        """Copy data from device to device.
-
-        Parameters
-        ----------
-        buf : CudaBuffer
-          Specify source device buffer.
-        position : int
-          Specify the starting position of the copy in device buffer.
-          Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          source until device buffer, starting from position, is full)
-
-        Returns
-        -------
-        nbytes : int
-          Number of bytes copied.
-
-        """
-    def export_for_ipc(self) -> IpcMemHandle:
-        """
-        Expose this device buffer as IPC memory which can be used in other
-        processes.
-
-        After calling this function, this device memory will not be
-        freed when the CudaBuffer is destructed.
-
-        Returns
-        -------
-        ipc_handle : IpcMemHandle
-          The exported IPC handle
-
-        """
-    @property
-    def context(self) -> Context:
-        """Returns the CUDA driver context of this buffer."""
-    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer:
-        """Return slice of device buffer
-
-        Parameters
-        ----------
-        offset : int, default 0
-          Specify offset from the start of device buffer to slice
-        length : int, default None
-          Specify the length of slice (default is until end of device
-          buffer starting from offset). If the length is larger than
-          the data available, the returned slice will have a size of
-          the available data starting from the offset.
-
-        Returns
-        -------
-        sliced : CudaBuffer
-          Zero-copy slice of device buffer.
-
-        """
-    def to_pybytes(self) -> bytes:
-        """Return device buffer content as Python bytes."""
-
-class HostBuffer(lib.Buffer):
-    """Device-accessible CPU memory created using cudaHostAlloc.
-
-    To create a HostBuffer instance, use
-
-      cuda.new_host_buffer(<nbytes>)
+    Parameters
+    ----------
+    delimiter : 1-character string, optional (default ',')
+        The character delimiting individual cells in the CSV data.
+    quote_char : 1-character string or False, optional (default '"')
+        The character used optionally for quoting CSV values
+        (False if quoting is not allowed).
+    double_quote : bool, optional (default True)
+        Whether two quotes in a quoted CSV value denote a single quote
+        in the data.
+    escape_char : 1-character string or False, optional (default False)
+        The character used optionally for escaping special characters
+        (False if escaping is not allowed).
+    newlines_in_values : bool, optional (default False)
+        Whether newline characters are allowed in CSV values.
+        Setting this to True reduces the performance of multi-threaded
+        CSV reading.
+    ignore_empty_lines : bool, optional (default True)
+        Whether empty lines are ignored in CSV input.
+        If False, an empty line is interpreted as containing a single empty
+        value (assuming a one-column CSV file).
+    invalid_row_handler : callable, optional (default None)
+        If not None, this object is called for each CSV row that fails
+        parsing (because of a mismatching number of columns).
+        It should accept a single InvalidRow argument and return either
+        "skip" or "error" depending on the desired outcome.
+
+    Examples
+    --------
+
+    Defining an example file from bytes object:
+
+    >>> import io
+    >>> s = (
+    ...     "animals;n_legs;entry\\n"
+    ...     "Flamingo;2;2022-03-01\\n"
+    ...     "# Comment here:\\n"
+    ...     "Horse;4;2022-03-02\\n"
+    ...     "Brittle stars;5;2022-03-03\\n"
+    ...     "Centipede;100;2022-03-04"
+    ... )
+    >>> print(s)
+    animals;n_legs;entry
+    Flamingo;2;2022-03-01
+    # Comment here:
+    Horse;4;2022-03-02
+    Brittle stars;5;2022-03-03
+    Centipede;100;2022-03-04
+    >>> source = io.BytesIO(s.encode())
+
+    Read the data from a file skipping rows with comments
+    and defining the delimiter:
+
+    >>> from pyarrow import csv
+    >>> def skip_comment(row):
+    ...     if row.text.startswith("# "):
+    ...         return "skip"
+    ...     else:
+    ...         return "error"
+    >>> parse_options = csv.ParseOptions(delimiter=";", invalid_row_handler=skip_comment)
+    >>> csv.read_csv(source, parse_options=parse_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: date32[day]
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    n_legs: [[2,4,5,100]]
+    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
     """
-    @property
-    def size(self) -> int: ...
 
-class BufferReader(lib.NativeFile):
-    """File interface for zero-copy read from CUDA buffers.
+    delimiter: str = field(default=",", kw_only=False)
+    quote_char: str | Literal[False] = '"'
+    double_quote: bool = True
+    escape_char: str | Literal[False] = False
+    newlines_in_values: bool = False
+    ignore_empty_lines: bool = True
+    invalid_row_handler: Callable[[InvalidRow], Literal["skip", "error"]] | None = None
 
-    Note: Read methods return pointers to device memory. This means
-    you must be careful using this interface with any Arrow code which
-    may expect to be able to do anything other than pointer arithmetic
-    on the returned buffers.
+    def validate(self) -> None: ...
+
+@dataclass(kw_only=True)
+class ConvertOptions(lib._Weakrefable):
     """
-    def __init__(self, obj: CudaBuffer) -> None: ...
-    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer:
-        """Return a slice view of the underlying device buffer.
+    Options for converting CSV data.
 
-        The slice will start at the current reader position and will
-        have specified size in bytes.
+    Parameters
+    ----------
+    check_utf8 : bool, optional (default True)
+        Whether to check UTF8 validity of string columns.
+    column_types : pyarrow.Schema or dict, optional
+        Explicitly map column names to column types. Passing this argument
+        disables type inference on the defined columns.
+    null_values : list, optional
+        A sequence of strings that denote nulls in the data
+        (defaults are appropriate in most cases). Note that by default,
+        string columns are not checked for null values. To enable
+        null checking for those, specify ``strings_can_be_null=True``.
+    true_values : list, optional
+        A sequence of strings that denote true booleans in the data
+        (defaults are appropriate in most cases).
+    false_values : list, optional
+        A sequence of strings that denote false booleans in the data
+        (defaults are appropriate in most cases).
+    decimal_point : 1-character string, optional (default '.')
+        The character used as decimal point in floating-point and decimal
+        data.
+    strings_can_be_null : bool, optional (default False)
+        Whether string / binary columns can have null values.
+        If true, then strings in null_values are considered null for
+        string columns.
+        If false, then all strings are valid string values.
+    quoted_strings_can_be_null : bool, optional (default True)
+        Whether quoted values can be null.
+        If true, then strings in "null_values" are also considered null
+        when they appear quoted in the CSV file. Otherwise, quoted values
+        are never considered null.
+    include_columns : list, optional
+        The names of columns to include in the Table.
+        If empty, the Table will include all columns from the CSV file.
+        If not empty, only these columns will be included, in this order.
+    include_missing_columns : bool, optional (default False)
+        If false, columns in `include_columns` but not in the CSV file will
+        error out.
+        If true, columns in `include_columns` but not in the CSV file will
+        produce a column of nulls (whose type is selected using
+        `column_types`, or null by default).
+        This option is ignored if `include_columns` is empty.
+    auto_dict_encode : bool, optional (default False)
+        Whether to try to automatically dict-encode string / binary data.
+        If true, then when type inference detects a string or binary column,
+        it it dict-encoded up to `auto_dict_max_cardinality` distinct values
+        (per chunk), after which it switches to regular encoding.
+        This setting is ignored for non-inferred columns (those in
+        `column_types`).
+    auto_dict_max_cardinality : int, optional
+        The maximum dictionary cardinality for `auto_dict_encode`.
+        This value is per chunk.
+    timestamp_parsers : list, optional
+        A sequence of strptime()-compatible format strings, tried in order
+        when attempting to infer or convert timestamp values (the special
+        value ISO8601() can also be given).  By default, a fast built-in
+        ISO-8601 parser is used.
+
+    Examples
+    --------
+
+    Defining an example data:
+
+    >>> import io
+    >>> s = (
+    ...     "animals,n_legs,entry,fast\\n"
+    ...     "Flamingo,2,01/03/2022,Yes\\n"
+    ...     "Horse,4,02/03/2022,Yes\\n"
+    ...     "Brittle stars,5,03/03/2022,No\\n"
+    ...     "Centipede,100,04/03/2022,No\\n"
+    ...     ",6,05/03/2022,"
+    ... )
+    >>> print(s)
+    animals,n_legs,entry,fast
+    Flamingo,2,01/03/2022,Yes
+    Horse,4,02/03/2022,Yes
+    Brittle stars,5,03/03/2022,No
+    Centipede,100,04/03/2022,No
+    ,6,05/03/2022,
+
+    Change the type of a column:
+
+    >>> import pyarrow as pa
+    >>> from pyarrow import csv
+    >>> convert_options = csv.ConvertOptions(column_types={"n_legs": pa.float64()})
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: double
+    entry: string
+    fast: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [["01/03/2022","02/03/2022","03/03/2022","04/03/2022","05/03/2022"]]
+    fast: [["Yes","Yes","No","No",""]]
+
+    Define a date parsing format to get a timestamp type column
+    (in case dates are not in ISO format and not converted by default):
+
+    >>> convert_options = csv.ConvertOptions(timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"])
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: timestamp[s]
+    fast: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
+    fast: [["Yes","Yes","No","No",""]]
+
+    Specify a subset of columns to be read:
+
+    >>> convert_options = csv.ConvertOptions(include_columns=["animals", "n_legs"])
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+
+    List additional column to be included as a null typed column:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["animals", "n_legs", "location"], include_missing_columns=True
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    location: null
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    location: [5 nulls]
+
+    Define columns as dictionary type (by default only the
+    string/binary columns are dictionary encoded):
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"], auto_dict_encode=True
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: dictionary<values=string, indices=int32, ordered=0>
+    n_legs: int64
+    entry: timestamp[s]
+    fast: dictionary<values=string, indices=int32, ordered=0>
+    ----
+    animals: [  -- dictionary:
+    ["Flamingo","Horse","Brittle stars","Centipede",""]  -- indices:
+    [0,1,2,3,4]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
+    fast: [  -- dictionary:
+    ["Yes","No",""]  -- indices:
+    [0,0,1,1,2]]
+
+    Set upper limit for the number of categories. If the categories
+    is more than the limit, the conversion to dictionary will not
+    happen:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["animals"], auto_dict_encode=True, auto_dict_max_cardinality=2
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+
+    Set empty strings to missing values:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["animals", "n_legs"], strings_can_be_null=True
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",null]]
+    n_legs: [[2,4,5,100,6]]
+
+    Define values to be True and False when converting a column
+    into a bool type:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...     include_columns=["fast"], false_values=["No"], true_values=["Yes"]
+    ... )
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    fast: bool
+    ----
+    fast: [[true,true,false,false,null]]
+    """
 
-        Parameters
-        ----------
-        nbytes : int, default None
-          Specify the number of bytes to read. Default: None (read all
-          remaining bytes).
+    check_utf8: bool = field(default=True, kw_only=False)
+    column_types: lib.Schema | dict | None = None
+    null_values: list[str] | None = None
+    true_values: list[str] | None = None
+    false_values: list[str] | None = None
+    decimal_point: str = "."
+    strings_can_be_null: bool = False
+    quoted_strings_can_be_null: bool = True
+    include_columns: list[str] | None = None
+    include_missing_columns: bool = False
+    auto_dict_encode: bool = False
+    auto_dict_max_cardinality: int | None = None
+    timestamp_parsers: list[str] | None = None
+
+    def validate(self) -> None: ...
+
+@dataclass(kw_only=True)
+class WriteOptions(lib._Weakrefable):
+    """
+    Options for writing CSV files.
 
-        Returns
-        -------
-        cbuf : CudaBuffer
-          New device buffer.
+    Parameters
+    ----------
+    include_header : bool, optional (default True)
+        Whether to write an initial header line with column names
+    batch_size : int, optional (default 1024)
+        How many rows to process together when converting and writing
+        CSV data
+    delimiter : 1-character string, optional (default ",")
+        The character delimiting individual cells in the CSV data.
+    quoting_style : str, optional (default "needed")
+        Whether to quote values, and if so, which quoting style to use.
+        The following values are accepted:
+
+        - "needed" (default): only enclose values in quotes when needed.
+        - "all_valid": enclose all valid values in quotes; nulls are not quoted.
+        - "none": do not enclose any values in quotes; values containing
+          special characters (such as quotes, cell delimiters or line endings)
+          will raise an error.
+    """
 
-        """
+    include_header: bool = field(default=True, kw_only=False)
+    batch_size: int = 1024
+    delimiter: str = ","
+    quoting_style: Literal["needed", "all_valid", "none"] = "needed"
 
-class BufferWriter(lib.NativeFile):
-    """File interface for writing to CUDA buffers.
+    def validate(self) -> None: ...
 
-    By default writes are unbuffered. Use set_buffer_size to enable
-    buffering.
+@dataclass
+class InvalidRow(lib._Weakrefable):
     """
-    def __init__(self, obj: CudaBuffer) -> None: ...
-    def writeat(self, position: int, data: ArrayLike) -> None:
-        """Write data to buffer starting from position.
-
-        Parameters
-        ----------
-        position : int
-          Specify device buffer position where the data will be
-          written.
-        data : array-like
-          Specify data, the data instance must implement buffer
-          protocol.
-        """
-    @property
-    def buffer_size(self) -> int:
-        """Returns size of host (CPU) buffer, 0 for unbuffered"""
-    @buffer_size.setter
-    def buffer_size(self, buffer_size: int):
-        """Set CPU buffer size to limit calls to cudaMemcpy
-
-        Parameters
-        ----------
-        buffer_size : int
-          Specify the size of CPU buffer to allocate in bytes.
-        """
-    @property
-    def num_bytes_buffered(self) -> int:
-        """Returns number of bytes buffered on host"""
-
-def new_host_buffer(size: int, device: int = 0) -> HostBuffer:
-    """Return buffer with CUDA-accessible memory on CPU host
+    Description of an invalid row in a CSV file.
 
     Parameters
     ----------
-    size : int
-      Specify the number of bytes to be allocated.
-    device : int
-      Specify GPU device number.
+    expected_columns : int
+        The expected number of columns in the row.
+    actual_columns : int
+        The actual number of columns in the row.
+    number : int or None
+        The physical row number if known, otherwise None.
+    text : str
+        The contents of the row.
+    """
 
-    Returns
-    -------
-    dbuf : HostBuffer
-      Allocated host buffer
+    expected_columns: int
+    actual_columns: int
+    number: int | None
+    text: str
+
+class CSVWriter(lib._CRecordBatchWriter):
+    """
+    Writer to create a CSV file.
+
+    Parameters
+    ----------
+    sink : str, path, pyarrow.OutputStream or file-like object
+        The location where to write the CSV data.
+    schema : pyarrow.Schema
+        The schema of the data to be written.
+    write_options : pyarrow.csv.WriteOptions
+        Options to configure writing the CSV data.
+    memory_pool : MemoryPool, optional
+        Pool for temporary allocations.
     """
 
-def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer:
-    """Write record batch message to GPU device memory
+    def __init__(
+        self,
+        # TODO: OutputStream
+        sink: StrPath | IO[Any],
+        schema: lib.Schema,
+        write_options: WriteOptions | None = None,
+        *,
+        memory_pool: lib.MemoryPool | None = None,
+    ) -> None: ...
+
+class CSVStreamingReader(lib.RecordBatchReader): ...
+
+ISO8601: lib._Weakrefable
+
+def open_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> CSVStreamingReader:
+    """
+    Open a streaming reader of CSV data.
+
+    Reading using this function is always single-threaded.
 
     Parameters
     ----------
-    batch : RecordBatch
-      Record batch to write
-    ctx : Context
-      CUDA Context to allocate device memory from
+    input_file : string, path or file-like object
+        The location of CSV data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.csv.ReadOptions, optional
+        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.csv.ParseOptions, optional
+        Options for the CSV parser
+        (see pyarrow.csv.ParseOptions constructor for defaults)
+    convert_options : pyarrow.csv.ConvertOptions, optional
+        Options for converting CSV data
+        (see pyarrow.csv.ConvertOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate RecordBatch memory from
 
     Returns
     -------
-    dbuf : CudaBuffer
-      device buffer which contains the record batch message
+    :class:`pyarrow.csv.CSVStreamingReader`
     """
 
-def read_message(
-    source: CudaBuffer | cuda.BufferReader, pool: lib.MemoryManager | None = None
-) -> lib.Message:
-    """Read Arrow IPC message located on GPU device
+def read_csv(
+    input_file: StrPath | IO[Any],
+    read_options: ReadOptions | None = None,
+    parse_options: ParseOptions | None = None,
+    convert_options: ConvertOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> lib.Table:
+    """
+    Read a Table from a stream of CSV data.
 
     Parameters
     ----------
-    source : {CudaBuffer, cuda.BufferReader}
-      Device buffer or reader of device buffer.
-    pool : MemoryPool (optional)
-      Pool to allocate CPU memory for the metadata
+    input_file : string, path or file-like object
+        The location of CSV data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.csv.ReadOptions, optional
+        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.csv.ParseOptions, optional
+        Options for the CSV parser
+        (see pyarrow.csv.ParseOptions constructor for defaults)
+    convert_options : pyarrow.csv.ConvertOptions, optional
+        Options for converting CSV data
+        (see pyarrow.csv.ConvertOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate Table memory from
 
     Returns
     -------
-    message : Message
-      The deserialized message, body still on device
+    :class:`pyarrow.Table`
+        Contents of the CSV file as a in-memory table.
+
+    Examples
+    --------
+
+    Defining an example file from bytes object:
+
+    >>> import io
+    >>> s = (
+    ...     "animals,n_legs,entry\\n"
+    ...     "Flamingo,2,2022-03-01\\n"
+    ...     "Horse,4,2022-03-02\\n"
+    ...     "Brittle stars,5,2022-03-03\\n"
+    ...     "Centipede,100,2022-03-04"
+    ... )
+    >>> print(s)
+    animals,n_legs,entry
+    Flamingo,2,2022-03-01
+    Horse,4,2022-03-02
+    Brittle stars,5,2022-03-03
+    Centipede,100,2022-03-04
+    >>> source = io.BytesIO(s.encode())
+
+    Reading from the file
+
+    >>> from pyarrow import csv
+    >>> csv.read_csv(source)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: date32[day]
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+    n_legs: [[2,4,5,100]]
+    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
     """
 
-def read_record_batch(
-    buffer: lib.Buffer,
-    object: lib.Schema,
-    *,
-    dictionary_memo: lib.DictionaryMemo | None = None,
-    pool: lib.MemoryPool | None = None,
-) -> lib.RecordBatch:
-    """Construct RecordBatch referencing IPC message located on CUDA device.
-
-    While the metadata is copied to host memory for deserialization,
-    the record batch data remains on the device.
+def write_csv(
+    data: lib.RecordBatch | lib.Table,
+    output_file: StrPath | lib.NativeFile | IO[Any],
+    write_options: WriteOptions | None = None,
+    memory_pool: lib.MemoryPool | None = None,
+) -> None:
+    """
+    Write record batch or table to a CSV file.
 
     Parameters
     ----------
-    buffer :
-      Device buffer containing the complete IPC message
-    schema : Schema
-      The schema for the record batch
-    dictionary_memo : DictionaryMemo, optional
-        If message contains dictionaries, must pass a populated
-        DictionaryMemo
-    pool : MemoryPool (optional)
-      Pool to allocate metadata from
+    data : pyarrow.RecordBatch or pyarrow.Table
+        The data to write.
+    output_file : string, path, pyarrow.NativeFile, or file-like object
+        The location where to write the CSV data.
+    write_options : pyarrow.csv.WriteOptions
+        Options to configure writing the CSV data.
+    memory_pool : MemoryPool, optional
+        Pool for temporary allocations.
 
-    Returns
-    -------
-    batch : RecordBatch
-      Reconstructed record batch, with device pointers
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> from pyarrow import csv
+
+    >>> legs = pa.array([2, 4, 5, 100])
+    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
+    >>> entry_date = pa.array(["01/03/2022", "02/03/2022", "03/03/2022", "04/03/2022"])
+    >>> table = pa.table([animals, legs, entry_date], names=["animals", "n_legs", "entry"])
+
+    >>> csv.write_csv(table, "animals.csv")
+
+    >>> write_options = csv.WriteOptions(include_header=False)
+    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
 
+    >>> write_options = csv.WriteOptions(delimiter=";")
+    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
     """
diff --git a/python/pyarrow-stubs/_cuda.pyi b/python/pyarrow-stubs/_cuda.pyi
index 94f1b33e2e0..da769f1713f 100644
--- a/python/pyarrow-stubs/_cuda.pyi
+++ b/python/pyarrow-stubs/_cuda.pyi
@@ -2,13 +2,12 @@ from typing import Any
 
 import cuda  # type: ignore[import-not-found]
 
-from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-not-found]
+from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-untyped]
 
-# from . import lib
-from .lib import _Weakrefable, Buffer, MemoryPool, NativeFile, RecordBatch, Schema, DictionaryMemo, Message, MemoryManager, Device
+from . import lib
 from ._stubs_typing import ArrayLike
 
-class Context(_Weakrefable):
+class Context(lib._Weakrefable):
     """
     CUDA driver context.
     """
@@ -107,7 +106,7 @@ class Context(_Weakrefable):
           Allocated buffer.
         """
     @property
-    def memory_manager(self) -> MemoryManager:
+    def memory_manager(self) -> lib.MemoryManager:
         """
         The default memory manager tied to this context's device.
 
@@ -116,7 +115,7 @@ class Context(_Weakrefable):
         MemoryManager
         """
     @property
-    def device(self) -> Device:
+    def device(self) -> lib.Device:
         """
         The device instance associated with this context.
 
@@ -165,7 +164,7 @@ class Context(_Weakrefable):
         """
     def buffer_from_data(
         self,
-        data: CudaBuffer | HostBuffer | Buffer | ArrayLike,
+        data: CudaBuffer | HostBuffer | lib.Buffer | ArrayLike,
         offset: int = 0,
         size: int = -1,
     ) -> CudaBuffer:
@@ -212,10 +211,10 @@ class Context(_Weakrefable):
 
         """
 
-class IpcMemHandle(_Weakrefable):
+class IpcMemHandle(lib._Weakrefable):
     """A serializable container for a CUDA IPC handle."""
     @staticmethod
-    def from_buffer(opaque_handle: Buffer) -> IpcMemHandle:
+    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle:
         """Create IpcMemHandle from opaque buffer (e.g. from another
         process)
 
@@ -228,7 +227,7 @@ class IpcMemHandle(_Weakrefable):
         -------
         ipc_handle : IpcMemHandle
         """
-    def serialize(self, pool: MemoryPool | None = None) -> Buffer:
+    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer:
         """Write IpcMemHandle to a Buffer
 
         Parameters
@@ -242,7 +241,7 @@ class IpcMemHandle(_Weakrefable):
           The serialized buffer.
         """
 
-class CudaBuffer(Buffer):
+class CudaBuffer(lib.Buffer):
     """An Arrow buffer with data located in a GPU device.
 
     To create a CudaBuffer instance, use Context.device_buffer().
@@ -252,7 +251,7 @@ class CudaBuffer(Buffer):
     """
 
     @staticmethod
-    def from_buffer(buf: Buffer) -> CudaBuffer:
+    def from_buffer(buf: lib.Buffer) -> CudaBuffer:
         """Convert back generic buffer into CudaBuffer
 
         Parameters
@@ -284,10 +283,10 @@ class CudaBuffer(Buffer):
         self,
         position: int = 0,
         nbytes: int = -1,
-        buf: Buffer | None = None,
-        memory_pool: MemoryPool | None = None,
+        buf: lib.Buffer | None = None,
+        memory_pool: lib.MemoryPool | None = None,
         resizable: bool = False,
-    ) -> Buffer:
+    ) -> lib.Buffer:
         """Copy memory from GPU device to CPU host
 
         Caller is responsible for ensuring that all tasks affecting
@@ -320,7 +319,7 @@ class CudaBuffer(Buffer):
 
         """
     def copy_from_host(
-        self, data: Buffer | ArrayLike, position: int = 0, nbytes: int = -1
+        self, data: lib.Buffer | ArrayLike, position: int = 0, nbytes: int = -1
     ) -> int:
         """Copy data from host to device.
 
@@ -402,7 +401,7 @@ class CudaBuffer(Buffer):
     def to_pybytes(self) -> bytes:
         """Return device buffer content as Python bytes."""
 
-class HostBuffer(Buffer):
+class HostBuffer(lib.Buffer):
     """Device-accessible CPU memory created using cudaHostAlloc.
 
     To create a HostBuffer instance, use
@@ -412,7 +411,7 @@ class HostBuffer(Buffer):
     @property
     def size(self) -> int: ...
 
-class BufferReader(NativeFile):
+class BufferReader(lib.NativeFile):
     """File interface for zero-copy read from CUDA buffers.
 
     Note: Read methods return pointers to device memory. This means
@@ -440,7 +439,7 @@ class BufferReader(NativeFile):
 
         """
 
-class BufferWriter(NativeFile):
+class BufferWriter(lib.NativeFile):
     """File interface for writing to CUDA buffers.
 
     By default writes are unbuffered. Use set_buffer_size to enable
@@ -491,7 +490,7 @@ def new_host_buffer(size: int, device: int = 0) -> HostBuffer:
       Allocated host buffer
     """
 
-def serialize_record_batch(batch: RecordBatch, ctx: Context) -> CudaBuffer:
+def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer:
     """Write record batch message to GPU device memory
 
     Parameters
@@ -508,8 +507,8 @@ def serialize_record_batch(batch: RecordBatch, ctx: Context) -> CudaBuffer:
     """
 
 def read_message(
-    source: CudaBuffer | cuda.BufferReader, pool: MemoryManager | None = None
-) -> Message:
+    source: CudaBuffer | cuda.BufferReader, pool: lib.MemoryManager | None = None
+) -> lib.Message:
     """Read Arrow IPC message located on GPU device
 
     Parameters
@@ -526,12 +525,12 @@ def read_message(
     """
 
 def read_record_batch(
-    buffer: Buffer,
-    object: Schema,
+    buffer: lib.Buffer,
+    object: lib.Schema,
     *,
-    dictionary_memo: DictionaryMemo | None = None,
-    pool: MemoryPool | None = None,
-) -> RecordBatch:
+    dictionary_memo: lib.DictionaryMemo | None = None,
+    pool: lib.MemoryPool | None = None,
+) -> lib.RecordBatch:
     """Construct RecordBatch referencing IPC message located on CUDA device.
 
     While the metadata is copied to host memory for deserialization,
diff --git a/python/pyarrow-stubs/_fs.pyi b/python/pyarrow-stubs/_fs.pyi
index 9b0f0ceaa20..df6f30ab509 100644
--- a/python/pyarrow-stubs/_fs.pyi
+++ b/python/pyarrow-stubs/_fs.pyi
@@ -19,8 +19,6 @@ from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
 
 from .lib import NativeFile, _Weakrefable
 
-SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
-
 class FileType(enum.IntFlag):
     NotFound = enum.auto()
     Unknown = enum.auto()
@@ -999,3 +997,5 @@ class FileSystemHandler(ABC):
         path : str
             path of what should be normalized.
         """
+
+SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
diff --git a/python/pyarrow-stubs/compute.pyi b/python/pyarrow-stubs/compute.pyi
index 5c816773c62..8ee0a929ffd 100644
--- a/python/pyarrow-stubs/compute.pyi
+++ b/python/pyarrow-stubs/compute.pyi
@@ -216,9 +216,9 @@ NumericOrDurationScalar: TypeAlias = NumericScalar | lib.DurationScalar
 NumericOrTemporalScalar: TypeAlias = NumericScalar | TemporalScalar
 
 _NumericOrTemporalScalarT = TypeVar("_NumericOrTemporalScalarT", bound=NumericOrTemporalScalar)
+_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
 NumericArray: TypeAlias = ArrayOrChunkedArray[_NumericScalarT]
 _NumericArrayT = TypeVar("_NumericArrayT", bound=NumericArray)
-_NumericScalarT = TypeVar("_NumericScalarT", bound=NumericScalar)
 _NumericOrDurationT = TypeVar("_NumericOrDurationT", bound=NumericOrDurationScalar)
 NumericOrDurationArray: TypeAlias = ArrayOrChunkedArray[NumericOrDurationScalar]
 _NumericOrDurationArrayT = TypeVar("_NumericOrDurationArrayT", bound=NumericOrDurationArray)
diff --git a/python/pyarrow-stubs/lib.pyi b/python/pyarrow-stubs/lib.pyi
index 57e23c3eaea..565feb4b3db 100644
--- a/python/pyarrow-stubs/lib.pyi
+++ b/python/pyarrow-stubs/lib.pyi
@@ -16,13 +16,10 @@
 # under the License.
 
 # ruff: noqa: F403
-from collections.abc import Mapping
-import datetime as dt
-from typing import NamedTuple, Literal
-from typing_extensions import TypeVar
+from typing import NamedTuple
 
 from .array import *
-from ._benchmark import *
+# from .benchmark import *
 from .builder import *
 from .compat import *
 from .config import *
@@ -83,51 +80,6 @@ def is_threading_enabled() -> bool:
     threading doesn't work (e.g. Emscripten).
     """
 
-def ensure_metadata(
-    meta: Mapping[bytes | str, bytes | str] | KeyValueMetadata | None, allow_none: bool = False
-) -> KeyValueMetadata | None: ...
-
-def tzinfo_to_string(tz: dt.tzinfo) -> str:
-    """
-    Converts a time zone object into a string indicating the name of a time
-    zone, one of:
-    * As used in the Olson time zone database (the "tz database" or
-      "tzdata"), such as "America/New_York"
-    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
-
-    Parameters
-    ----------
-      tz : datetime.tzinfo
-        Time zone object
-
-    Returns
-    -------
-      name : str
-        Time zone name
-    """
-
-def string_to_tzinfo(name: str) -> dt.tzinfo:
-    """
-    Convert a time zone name into a time zone object.
-
-    Supported input strings are:
-    * As used in the Olson time zone database (the "tz database" or
-      "tzdata"), such as "America/New_York"
-    * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
-
-    Parameters
-    ----------
-      name: str
-        Time zone name.
-
-    Returns
-    -------
-      tz : datetime.tzinfo
-        Time zone object
-    """
-
-def ensure_type(ty: _DataTypeT | None, allow_none: Literal[True] | Literal[False] | None = None) -> _DataTypeT | None: ...
-
 Type_NA: int
 Type_BOOL: int
 Type_UINT8: int
diff --git a/python/pyarrow-stubs/pandas_shim.pyi b/python/pyarrow-stubs/pandas_shim.pyi
index 0e80fae4ebf..2e5f1502fb6 100644
--- a/python/pyarrow-stubs/pandas_shim.pyi
+++ b/python/pyarrow-stubs/pandas_shim.pyi
@@ -1,7 +1,7 @@
 from types import ModuleType
 from typing import Any, Iterable, TypeGuard
 
-import pandas as pd
+from pandas import Categorical, DatetimeTZDtype, Index, Series, DataFrame
 
 from numpy import dtype
 from pandas.core.dtypes.base import ExtensionDtype
@@ -9,8 +9,8 @@ from pandas.core.dtypes.base import ExtensionDtype
 class _PandasAPIShim:
     has_sparse: bool
 
-    def series(self, *args, **kwargs) -> pd.Series: ...
-    def data_frame(self, *args, **kwargs) -> pd.DataFrame: ...
+    def series(self, *args, **kwargs) -> Series: ...
+    def data_frame(self, *args, **kwargs) -> DataFrame: ...
     @property
     def have_pandas(self) -> bool: ...
     @property
@@ -28,21 +28,21 @@ class _PandasAPIShim:
     def is_ge_v23(self) -> bool: ...
     def is_ge_v3(self) -> bool: ...
     @property
-    def categorical_type(self) -> type[pd.Categorical]: ...
+    def categorical_type(self) -> type[Categorical]: ...
     @property
-    def datetimetz_type(self) -> type[pd.DatetimeTZDtype]: ...
+    def datetimetz_type(self) -> type[DatetimeTZDtype]: ...
     @property
     def extension_dtype(self) -> type[ExtensionDtype]: ...
     def is_array_like(
         self, obj: Any
-    ) -> TypeGuard[pd.Series | pd.Index | pd.Categorical | ExtensionDtype]: ...
-    def is_categorical(self, obj: Any) -> TypeGuard[pd.Categorical]: ...
-    def is_datetimetz(self, obj: Any) -> TypeGuard[pd.DatetimeTZDtype]: ...
+    ) -> TypeGuard[Series | Index | Categorical | ExtensionDtype]: ...
+    def is_categorical(self, obj: Any) -> TypeGuard[Categorical]: ...
+    def is_datetimetz(self, obj: Any) -> TypeGuard[DatetimeTZDtype]: ...
     def is_extension_array_dtype(self, obj: Any) -> TypeGuard[ExtensionDtype]: ...
     def is_sparse(self, obj: Any) -> bool: ...
-    def is_data_frame(self, obj: Any) -> TypeGuard[pd.DataFrame]: ...
-    def is_series(self, obj: Any) -> TypeGuard[pd.Series]: ...
-    def is_index(self, obj: Any) -> TypeGuard[pd.Index]: ...
+    def is_data_frame(self, obj: Any) -> TypeGuard[DataFrame]: ...
+    def is_series(self, obj: Any) -> TypeGuard[Series]: ...
+    def is_index(self, obj: Any) -> TypeGuard[Index]: ...
     def get_values(self, obj: Any) -> bool: ...
     def get_rangeindex_attribute(self, level, name): ...
 

From 95769edf85945ce86ac13500a21900804c2c243c Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 16 Sep 2025 18:06:02 +0200
Subject: [PATCH 17/26] add license

---
 python/pyarrow-stubs/_compute.pyi               | 17 +++++++++++++++++
 python/pyarrow-stubs/_csv.pyi                   | 17 +++++++++++++++++
 python/pyarrow-stubs/_cuda.pyi                  | 17 +++++++++++++++++
 python/pyarrow-stubs/_dataset.pyi               | 17 +++++++++++++++++
 python/pyarrow-stubs/_dataset_orc.pyi           | 17 +++++++++++++++++
 python/pyarrow-stubs/_dataset_parquet.pyi       | 17 +++++++++++++++++
 .../_dataset_parquet_encryption.pyi             | 17 +++++++++++++++++
 python/pyarrow-stubs/_feather.pyi               | 17 +++++++++++++++++
 python/pyarrow-stubs/_flight.pyi                | 17 +++++++++++++++++
 python/pyarrow-stubs/_fs.pyi                    | 17 +++++++++++++++++
 python/pyarrow-stubs/_gcsfs.pyi                 | 17 +++++++++++++++++
 python/pyarrow-stubs/_hdfs.pyi                  | 17 +++++++++++++++++
 python/pyarrow-stubs/_ipc.pyi                   | 17 +++++++++++++++++
 python/pyarrow-stubs/_json.pyi                  | 17 +++++++++++++++++
 python/pyarrow-stubs/_orc.pyi                   | 17 +++++++++++++++++
 python/pyarrow-stubs/_parquet.pyi               | 17 +++++++++++++++++
 python/pyarrow-stubs/_parquet_encryption.pyi    | 17 +++++++++++++++++
 python/pyarrow-stubs/_s3fs.pyi                  | 17 +++++++++++++++++
 python/pyarrow-stubs/_substrait.pyi             | 17 +++++++++++++++++
 python/pyarrow-stubs/acero.pyi                  | 17 +++++++++++++++++
 python/pyarrow-stubs/builder.pyi                | 17 +++++++++++++++++
 python/pyarrow-stubs/cffi.pyi                   | 17 +++++++++++++++++
 python/pyarrow-stubs/compute.pyi                | 17 +++++++++++++++++
 python/pyarrow-stubs/config.pyi                 | 17 +++++++++++++++++
 python/pyarrow-stubs/csv.pyi                    | 17 +++++++++++++++++
 python/pyarrow-stubs/cuda.pyi                   | 17 +++++++++++++++++
 python/pyarrow-stubs/dataset.pyi                | 17 +++++++++++++++++
 python/pyarrow-stubs/device.pyi                 | 17 +++++++++++++++++
 python/pyarrow-stubs/error.pyi                  | 17 +++++++++++++++++
 python/pyarrow-stubs/feather.pyi                | 17 +++++++++++++++++
 python/pyarrow-stubs/flight.pyi                 | 17 +++++++++++++++++
 python/pyarrow-stubs/fs.pyi                     | 17 +++++++++++++++++
 python/pyarrow-stubs/gandiva.pyi                | 17 +++++++++++++++++
 python/pyarrow-stubs/interchange/__init__.pyi   | 16 ++++++++++++++++
 python/pyarrow-stubs/interchange/buffer.pyi     | 17 +++++++++++++++++
 python/pyarrow-stubs/interchange/column.pyi     | 17 +++++++++++++++++
 python/pyarrow-stubs/interchange/dataframe.pyi  | 17 +++++++++++++++++
 .../interchange/from_dataframe.pyi              | 17 +++++++++++++++++
 python/pyarrow-stubs/ipc.pyi                    | 17 +++++++++++++++++
 python/pyarrow-stubs/json.pyi                   | 17 +++++++++++++++++
 python/pyarrow-stubs/orc.pyi                    | 17 +++++++++++++++++
 python/pyarrow-stubs/pandas_compat.pyi          | 17 +++++++++++++++++
 python/pyarrow-stubs/pandas_shim.pyi            | 17 +++++++++++++++++
 python/pyarrow-stubs/parquet/__init__.pyi       | 17 +++++++++++++++++
 python/pyarrow-stubs/parquet/core.pyi           | 17 +++++++++++++++++
 python/pyarrow-stubs/parquet/encryption.pyi     | 17 +++++++++++++++++
 python/pyarrow-stubs/substrait.pyi              | 17 +++++++++++++++++
 python/pyarrow-stubs/table.pyi                  | 17 +++++++++++++++++
 python/pyarrow-stubs/util.pyi                   | 17 +++++++++++++++++
 49 files changed, 832 insertions(+)

diff --git a/python/pyarrow-stubs/_compute.pyi b/python/pyarrow-stubs/_compute.pyi
index 3d61ae42787..e8360b48edc 100644
--- a/python/pyarrow-stubs/_compute.pyi
+++ b/python/pyarrow-stubs/_compute.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import (
     Any,
     Callable,
diff --git a/python/pyarrow-stubs/_csv.pyi b/python/pyarrow-stubs/_csv.pyi
index 2f49f8c9a6c..c490d6be93a 100644
--- a/python/pyarrow-stubs/_csv.pyi
+++ b/python/pyarrow-stubs/_csv.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from dataclasses import dataclass, field
 from typing import IO, Any, Callable, Literal
 
diff --git a/python/pyarrow-stubs/_cuda.pyi b/python/pyarrow-stubs/_cuda.pyi
index da769f1713f..c96951b863c 100644
--- a/python/pyarrow-stubs/_cuda.pyi
+++ b/python/pyarrow-stubs/_cuda.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Any
 
 import cuda  # type: ignore[import-not-found]
diff --git a/python/pyarrow-stubs/_dataset.pyi b/python/pyarrow-stubs/_dataset.pyi
index e0f38d54eff..3665bdba00b 100644
--- a/python/pyarrow-stubs/_dataset.pyi
+++ b/python/pyarrow-stubs/_dataset.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow-stubs/_dataset_orc.pyi b/python/pyarrow-stubs/_dataset_orc.pyi
index 9c4ac04198f..d4e5784750f 100644
--- a/python/pyarrow-stubs/_dataset_orc.pyi
+++ b/python/pyarrow-stubs/_dataset_orc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from ._dataset import FileFormat
 
 class OrcFileFormat(FileFormat):
diff --git a/python/pyarrow-stubs/_dataset_parquet.pyi b/python/pyarrow-stubs/_dataset_parquet.pyi
index cbcc17235f1..007d3404a18 100644
--- a/python/pyarrow-stubs/_dataset_parquet.pyi
+++ b/python/pyarrow-stubs/_dataset_parquet.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from dataclasses import dataclass
 from typing import IO, Any, Iterable, TypedDict
 
diff --git a/python/pyarrow-stubs/_dataset_parquet_encryption.pyi b/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
index 7623275b865..be40c0b39b3 100644
--- a/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
+++ b/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from ._dataset_parquet import ParquetFileWriteOptions, ParquetFragmentScanOptions
 from ._parquet import FileDecryptionProperties
 from ._parquet_encryption import CryptoFactory, EncryptionConfiguration, KmsConnectionConfig
diff --git a/python/pyarrow-stubs/_feather.pyi b/python/pyarrow-stubs/_feather.pyi
index 8bb914ba45d..373fe38cdce 100644
--- a/python/pyarrow-stubs/_feather.pyi
+++ b/python/pyarrow-stubs/_feather.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO
 
 from _typeshed import StrPath
diff --git a/python/pyarrow-stubs/_flight.pyi b/python/pyarrow-stubs/_flight.pyi
index 4450c42df49..a79475a8796 100644
--- a/python/pyarrow-stubs/_flight.pyi
+++ b/python/pyarrow-stubs/_flight.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import asyncio
 import enum
 import sys
diff --git a/python/pyarrow-stubs/_fs.pyi b/python/pyarrow-stubs/_fs.pyi
index df6f30ab509..1f3667ef413 100644
--- a/python/pyarrow-stubs/_fs.pyi
+++ b/python/pyarrow-stubs/_fs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 import enum
 import sys
diff --git a/python/pyarrow-stubs/_gcsfs.pyi b/python/pyarrow-stubs/_gcsfs.pyi
index 4fc7ea68e48..0ced106615a 100644
--- a/python/pyarrow-stubs/_gcsfs.pyi
+++ b/python/pyarrow-stubs/_gcsfs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 
 from ._fs import FileSystem
diff --git a/python/pyarrow-stubs/_hdfs.pyi b/python/pyarrow-stubs/_hdfs.pyi
index 200f669379b..ed367379171 100644
--- a/python/pyarrow-stubs/_hdfs.pyi
+++ b/python/pyarrow-stubs/_hdfs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from _typeshed import StrPath
 
 from ._fs import FileSystem
diff --git a/python/pyarrow-stubs/_ipc.pyi b/python/pyarrow-stubs/_ipc.pyi
index fc48cae3c04..1676e49e962 100644
--- a/python/pyarrow-stubs/_ipc.pyi
+++ b/python/pyarrow-stubs/_ipc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 import sys
 
diff --git a/python/pyarrow-stubs/_json.pyi b/python/pyarrow-stubs/_json.pyi
index 43d2ae83cd8..f416b4b29c6 100644
--- a/python/pyarrow-stubs/_json.pyi
+++ b/python/pyarrow-stubs/_json.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO, Any, Literal
 
 from _typeshed import StrPath
diff --git a/python/pyarrow-stubs/_orc.pyi b/python/pyarrow-stubs/_orc.pyi
index 71bf0dde9ba..7587cc121c3 100644
--- a/python/pyarrow-stubs/_orc.pyi
+++ b/python/pyarrow-stubs/_orc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO, Literal
 
 from .lib import (
diff --git a/python/pyarrow-stubs/_parquet.pyi b/python/pyarrow-stubs/_parquet.pyi
index a9187df0428..c75337cbf3b 100644
--- a/python/pyarrow-stubs/_parquet.pyi
+++ b/python/pyarrow-stubs/_parquet.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO, Any, Iterable, Iterator, Literal, Sequence, TypeAlias, TypedDict
 
 from _typeshed import StrPath
diff --git a/python/pyarrow-stubs/_parquet_encryption.pyi b/python/pyarrow-stubs/_parquet_encryption.pyi
index c707edb844a..e1228cbdb5a 100644
--- a/python/pyarrow-stubs/_parquet_encryption.pyi
+++ b/python/pyarrow-stubs/_parquet_encryption.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 
 from typing import Callable
diff --git a/python/pyarrow-stubs/_s3fs.pyi b/python/pyarrow-stubs/_s3fs.pyi
index 50f63cd7e32..f1399bc4b1e 100644
--- a/python/pyarrow-stubs/_s3fs.pyi
+++ b/python/pyarrow-stubs/_s3fs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 
 from typing import Literal, TypedDict
diff --git a/python/pyarrow-stubs/_substrait.pyi b/python/pyarrow-stubs/_substrait.pyi
index ff226e9521b..ee78e9720fe 100644
--- a/python/pyarrow-stubs/_substrait.pyi
+++ b/python/pyarrow-stubs/_substrait.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Any, Callable
 
 from ._compute import Expression
diff --git a/python/pyarrow-stubs/acero.pyi b/python/pyarrow-stubs/acero.pyi
index 8a520bdc24a..2abb608b32c 100644
--- a/python/pyarrow-stubs/acero.pyi
+++ b/python/pyarrow-stubs/acero.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow-stubs/builder.pyi b/python/pyarrow-stubs/builder.pyi
index 4a0e9ca4708..39372f8e512 100644
--- a/python/pyarrow-stubs/builder.pyi
+++ b/python/pyarrow-stubs/builder.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Iterable
 
 from pyarrow.lib import MemoryPool, _Weakrefable
diff --git a/python/pyarrow-stubs/cffi.pyi b/python/pyarrow-stubs/cffi.pyi
index 2ae945c5974..e4f077d7155 100644
--- a/python/pyarrow-stubs/cffi.pyi
+++ b/python/pyarrow-stubs/cffi.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import cffi
 
 c_source: str
diff --git a/python/pyarrow-stubs/compute.pyi b/python/pyarrow-stubs/compute.pyi
index 8ee0a929ffd..dcedb34b14a 100644
--- a/python/pyarrow-stubs/compute.pyi
+++ b/python/pyarrow-stubs/compute.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 # ruff: noqa: I001
 from typing import Literal, TypeAlias, TypeVar, overload, Any, Iterable, ParamSpec, Sequence
 from collections.abc import Callable
diff --git a/python/pyarrow-stubs/config.pyi b/python/pyarrow-stubs/config.pyi
index 166e10c9734..7c2eb8a9c98 100644
--- a/python/pyarrow-stubs/config.pyi
+++ b/python/pyarrow-stubs/config.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import NamedTuple
 
 class VersionInfo(NamedTuple):
diff --git a/python/pyarrow-stubs/csv.pyi b/python/pyarrow-stubs/csv.pyi
index 510229d7e72..a7abd413aab 100644
--- a/python/pyarrow-stubs/csv.pyi
+++ b/python/pyarrow-stubs/csv.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._csv import (
     ISO8601,
     ConvertOptions,
diff --git a/python/pyarrow-stubs/cuda.pyi b/python/pyarrow-stubs/cuda.pyi
index e11baf7d4e7..0394965bb73 100644
--- a/python/pyarrow-stubs/cuda.pyi
+++ b/python/pyarrow-stubs/cuda.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._cuda import (
     BufferReader,
     BufferWriter,
diff --git a/python/pyarrow-stubs/dataset.pyi b/python/pyarrow-stubs/dataset.pyi
index 98f1a38aa85..6cb7fed43e6 100644
--- a/python/pyarrow-stubs/dataset.pyi
+++ b/python/pyarrow-stubs/dataset.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Callable, Iterable, Literal, Sequence, TypeAlias, overload
 
 from _typeshed import StrPath
diff --git a/python/pyarrow-stubs/device.pyi b/python/pyarrow-stubs/device.pyi
index d1b9f39eedd..6c4f1fdeeea 100644
--- a/python/pyarrow-stubs/device.pyi
+++ b/python/pyarrow-stubs/device.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 
 from pyarrow.lib import _Weakrefable
diff --git a/python/pyarrow-stubs/error.pyi b/python/pyarrow-stubs/error.pyi
index 981ed51e680..c1e1a04ee40 100644
--- a/python/pyarrow-stubs/error.pyi
+++ b/python/pyarrow-stubs/error.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow-stubs/feather.pyi b/python/pyarrow-stubs/feather.pyi
index 9451ee15763..ce8d83dbcd9 100644
--- a/python/pyarrow-stubs/feather.pyi
+++ b/python/pyarrow-stubs/feather.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import IO, Literal
 
 import pandas as pd
diff --git a/python/pyarrow-stubs/flight.pyi b/python/pyarrow-stubs/flight.pyi
index 9b806ccf305..dcc6ee2244b 100644
--- a/python/pyarrow-stubs/flight.pyi
+++ b/python/pyarrow-stubs/flight.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._flight import (
     Action,
     ActionType,
diff --git a/python/pyarrow-stubs/fs.pyi b/python/pyarrow-stubs/fs.pyi
index 6bf75616c13..6c5a0af8d19 100644
--- a/python/pyarrow-stubs/fs.pyi
+++ b/python/pyarrow-stubs/fs.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._fs import (  # noqa
     FileSelector,
     FileType,
diff --git a/python/pyarrow-stubs/gandiva.pyi b/python/pyarrow-stubs/gandiva.pyi
index a344f885b29..bc07e15c4a6 100644
--- a/python/pyarrow-stubs/gandiva.pyi
+++ b/python/pyarrow-stubs/gandiva.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Iterable, Literal
 
 from .lib import Array, DataType, Field, MemoryPool, RecordBatch, Schema, _Weakrefable
diff --git a/python/pyarrow-stubs/interchange/__init__.pyi b/python/pyarrow-stubs/interchange/__init__.pyi
index e69de29bb2d..13a83393a91 100644
--- a/python/pyarrow-stubs/interchange/__init__.pyi
+++ b/python/pyarrow-stubs/interchange/__init__.pyi
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyarrow-stubs/interchange/buffer.pyi b/python/pyarrow-stubs/interchange/buffer.pyi
index 46673961a75..78d1dabb8b7 100644
--- a/python/pyarrow-stubs/interchange/buffer.pyi
+++ b/python/pyarrow-stubs/interchange/buffer.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 
 from pyarrow.lib import Buffer
diff --git a/python/pyarrow-stubs/interchange/column.pyi b/python/pyarrow-stubs/interchange/column.pyi
index e6662867b6b..ce7e169bfb5 100644
--- a/python/pyarrow-stubs/interchange/column.pyi
+++ b/python/pyarrow-stubs/interchange/column.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import enum
 
 from typing import Any, Iterable, TypeAlias, TypedDict
diff --git a/python/pyarrow-stubs/interchange/dataframe.pyi b/python/pyarrow-stubs/interchange/dataframe.pyi
index 526a58926a9..a7ea6aeac74 100644
--- a/python/pyarrow-stubs/interchange/dataframe.pyi
+++ b/python/pyarrow-stubs/interchange/dataframe.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow-stubs/interchange/from_dataframe.pyi b/python/pyarrow-stubs/interchange/from_dataframe.pyi
index b04b6268975..aa6217b6181 100644
--- a/python/pyarrow-stubs/interchange/from_dataframe.pyi
+++ b/python/pyarrow-stubs/interchange/from_dataframe.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Any, Protocol, TypeAlias
 
 from pyarrow.lib import Array, Buffer, DataType, DictionaryArray, RecordBatch, Table
diff --git a/python/pyarrow-stubs/ipc.pyi b/python/pyarrow-stubs/ipc.pyi
index c7f2af004d4..985cf0678f9 100644
--- a/python/pyarrow-stubs/ipc.pyi
+++ b/python/pyarrow-stubs/ipc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from io import IOBase
 
 import pandas as pd
diff --git a/python/pyarrow-stubs/json.pyi b/python/pyarrow-stubs/json.pyi
index db1d35e0b8b..67768db42e4 100644
--- a/python/pyarrow-stubs/json.pyi
+++ b/python/pyarrow-stubs/json.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._json import ParseOptions, ReadOptions, open_json, read_json
 
 __all__ = ["ParseOptions", "ReadOptions", "read_json", "open_json"]
diff --git a/python/pyarrow-stubs/orc.pyi b/python/pyarrow-stubs/orc.pyi
index 2eba8d40a11..557f38a2b9e 100644
--- a/python/pyarrow-stubs/orc.pyi
+++ b/python/pyarrow-stubs/orc.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 if sys.version_info >= (3, 11):
diff --git a/python/pyarrow-stubs/pandas_compat.pyi b/python/pyarrow-stubs/pandas_compat.pyi
index efbd05ac2fe..82fcb19ad97 100644
--- a/python/pyarrow-stubs/pandas_compat.pyi
+++ b/python/pyarrow-stubs/pandas_compat.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from typing import Any, TypedDict, TypeVar
 
 import numpy as np
diff --git a/python/pyarrow-stubs/pandas_shim.pyi b/python/pyarrow-stubs/pandas_shim.pyi
index 2e5f1502fb6..e62767b1591 100644
--- a/python/pyarrow-stubs/pandas_shim.pyi
+++ b/python/pyarrow-stubs/pandas_shim.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from types import ModuleType
 from typing import Any, Iterable, TypeGuard
 
diff --git a/python/pyarrow-stubs/parquet/__init__.pyi b/python/pyarrow-stubs/parquet/__init__.pyi
index 151ee188f84..8d0b5374ea0 100644
--- a/python/pyarrow-stubs/parquet/__init__.pyi
+++ b/python/pyarrow-stubs/parquet/__init__.pyi
@@ -1 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from .core import *  # noqa
diff --git a/python/pyarrow-stubs/parquet/core.pyi b/python/pyarrow-stubs/parquet/core.pyi
index 56b2c8447d9..f5ac0510ffc 100644
--- a/python/pyarrow-stubs/parquet/core.pyi
+++ b/python/pyarrow-stubs/parquet/core.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import sys
 
 from pathlib import Path
diff --git a/python/pyarrow-stubs/parquet/encryption.pyi b/python/pyarrow-stubs/parquet/encryption.pyi
index 5a77dae7ef7..fe9a454e593 100644
--- a/python/pyarrow-stubs/parquet/encryption.pyi
+++ b/python/pyarrow-stubs/parquet/encryption.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._parquet_encryption import (
     CryptoFactory,
     DecryptionConfiguration,
diff --git a/python/pyarrow-stubs/substrait.pyi b/python/pyarrow-stubs/substrait.pyi
index a56a8a5b40f..b78bbd8aebd 100644
--- a/python/pyarrow-stubs/substrait.pyi
+++ b/python/pyarrow-stubs/substrait.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from pyarrow._substrait import (
     BoundExpressions,
     SubstraitSchema,
diff --git a/python/pyarrow-stubs/table.pyi b/python/pyarrow-stubs/table.pyi
index 685ae725d4b..a9b861e2b78 100644
--- a/python/pyarrow-stubs/table.pyi
+++ b/python/pyarrow-stubs/table.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import datetime as dt
 import sys
 
diff --git a/python/pyarrow-stubs/util.pyi b/python/pyarrow-stubs/util.pyi
index c2ecf7d6b61..5c9687bb83f 100644
--- a/python/pyarrow-stubs/util.pyi
+++ b/python/pyarrow-stubs/util.pyi
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from collections.abc import Callable
 from os import PathLike
 from typing import Any, Protocol, Sequence, TypeVar

From 9924db05e61a87c6a4ab43f9e3bd5012f7bbdfb4 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 16 Sep 2025 18:16:53 +0200
Subject: [PATCH 18/26] remove docs

---
 python/pyarrow-stubs/_azurefs.pyi             |   59 -
 python/pyarrow-stubs/interchange/buffer.pyi   |   44 +-
 python/pyarrow-stubs/interchange/column.pyi   |  212 +-
 .../pyarrow-stubs/interchange/dataframe.pyi   |   96 +-
 .../interchange/from_dataframe.pyi            |  206 +-
 python/pyarrow-stubs/parquet/core.pyi         | 1806 +----------------
 6 files changed, 66 insertions(+), 2357 deletions(-)

diff --git a/python/pyarrow-stubs/_azurefs.pyi b/python/pyarrow-stubs/_azurefs.pyi
index b9a83f01c56..37fcec2c9bd 100644
--- a/python/pyarrow-stubs/_azurefs.pyi
+++ b/python/pyarrow-stubs/_azurefs.pyi
@@ -20,65 +20,6 @@ from typing import Literal
 from ._fs import FileSystem
 
 class AzureFileSystem(FileSystem):
-    """
-    Azure Blob Storage backed FileSystem implementation
-
-    This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a.
-    Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific
-    features will be used when they provide a performance advantage. Azurite emulator is
-    also supported. Note: `/` is the only supported delimiter.
-
-    The storage account is considered the root of the filesystem. When enabled, containers
-    will be created or deleted during relevant directory operations. Obviously, this also
-    requires authentication with the additional permissions.
-
-    By default `DefaultAzureCredential <https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
-    is used for authentication. This means it will try several types of authentication
-    and go with the first one that works. If any authentication parameters are provided when
-    initialising the FileSystem, they will be used instead of the default credential.
-
-    Parameters
-    ----------
-    account_name : str
-        Azure Blob Storage account name. This is the globally unique identifier for the
-        storage account.
-    account_key : str, default None
-        Account key of the storage account. If sas_token and account_key are None the
-        default credential will be used. The parameters account_key and sas_token are
-        mutually exclusive.
-    blob_storage_authority : str, default None
-        hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful
-        for connecting to a local emulator, like Azurite.
-    dfs_storage_authority : str, default None
-        hostname[:port] of the Data Lake Gen 2 Service. Defaults to
-        `.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite.
-    blob_storage_scheme : str, default None
-        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
-        emulator, like Azurite.
-    dfs_storage_scheme : str, default None
-        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local
-        emulator, like Azurite.
-    sas_token : str, default None
-        SAS token for the storage account, used as an alternative to account_key. If sas_token
-        and account_key are None the default credential will be used. The parameters
-        account_key and sas_token are mutually exclusive.
-
-    Examples
-    --------
-    >>> from pyarrow import fs
-    >>> azure_fs = fs.AzureFileSystem(account_name="myaccount")
-    >>> azurite_fs = fs.AzureFileSystem(
-    ...     account_name="devstoreaccount1",
-    ...     account_key="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
-    ...     blob_storage_authority="127.0.0.1:10000",
-    ...     dfs_storage_authority="127.0.0.1:10000",
-    ...     blob_storage_scheme="http",
-    ...     dfs_storage_scheme="http",
-    ... )
-
-    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
-    """
-
     def __init__(
         self,
         account_name: str,
diff --git a/python/pyarrow-stubs/interchange/buffer.pyi b/python/pyarrow-stubs/interchange/buffer.pyi
index 78d1dabb8b7..6890a24030c 100644
--- a/python/pyarrow-stubs/interchange/buffer.pyi
+++ b/python/pyarrow-stubs/interchange/buffer.pyi
@@ -20,8 +20,6 @@ import enum
 from pyarrow.lib import Buffer
 
 class DlpackDeviceType(enum.IntEnum):
-    """Integer enum for device type codes matching DLPack."""
-
     CPU = 1
     CUDA = 2
     CPU_PINNED = 3
@@ -32,44 +30,10 @@ class DlpackDeviceType(enum.IntEnum):
     ROCM = 10
 
 class _PyArrowBuffer:
-    """
-    Data in the buffer is guaranteed to be contiguous in memory.
-
-    Note that there is no dtype attribute present, a buffer can be thought of
-    as simply a block of memory. However, if the column that the buffer is
-    attached to has a dtype that's supported by DLPack and ``__dlpack__`` is
-    implemented, then that dtype information will be contained in the return
-    value from ``__dlpack__``.
-
-    This distinction is useful to support both data exchange via DLPack on a
-    buffer and (b) dtypes like variable-length strings which do not have a
-    fixed number of bytes per element.
-    """
     def __init__(self, x: Buffer, allow_copy: bool = True) -> None: ...
     @property
-    def bufsize(self) -> int:
-        """
-        Buffer size in bytes.
-        """
+    def bufsize(self) -> int: ...
     @property
-    def ptr(self) -> int:
-        """
-        Pointer to start of the buffer as an integer.
-        """
-    def __dlpack__(self):
-        """
-        Produce DLPack capsule (see array API standard).
-
-        Raises:
-            - TypeError : if the buffer contains unsupported dtypes.
-            - NotImplementedError : if DLPack support is not implemented
-
-        Useful to have to connect to array libraries. Support optional because
-        it's not completely trivial to implement for a Python-only library.
-        """
-    def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]:
-        """
-        Device type and device ID for where the data in the buffer resides.
-        Uses device type codes matching DLPack.
-        Note: must be implemented even if ``__dlpack__`` is not.
-        """
+    def ptr(self) -> int: ...
+    def __dlpack__(self): ...
+    def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: ...
diff --git a/python/pyarrow-stubs/interchange/column.pyi b/python/pyarrow-stubs/interchange/column.pyi
index ce7e169bfb5..970ad3e07be 100644
--- a/python/pyarrow-stubs/interchange/column.pyi
+++ b/python/pyarrow-stubs/interchange/column.pyi
@@ -24,27 +24,6 @@ from pyarrow.lib import Array, ChunkedArray
 from .buffer import _PyArrowBuffer
 
 class DtypeKind(enum.IntEnum):
-    """
-    Integer enum for data types.
-
-    Attributes
-    ----------
-    INT : int
-        Matches to signed integer data type.
-    UINT : int
-        Matches to unsigned integer data type.
-    FLOAT : int
-        Matches to floating point data type.
-    BOOL : int
-        Matches to boolean data type.
-    STRING : int
-        Matches to string data type (UTF-8 encoded).
-    DATETIME : int
-        Matches to datetime data type.
-    CATEGORICAL : int
-        Matches to categorical data type.
-    """
-
     INT = 0
     UINT = 1
     FLOAT = 2
@@ -56,23 +35,6 @@ class DtypeKind(enum.IntEnum):
 Dtype: TypeAlias = tuple[DtypeKind, int, str, str]
 
 class ColumnNullType(enum.IntEnum):
-    """
-    Integer enum for null type representation.
-
-    Attributes
-    ----------
-    NON_NULLABLE : int
-        Non-nullable column.
-    USE_NAN : int
-        Use explicit float NaN value.
-    USE_SENTINEL : int
-        Sentinel value besides NaN.
-    USE_BITMASK : int
-        The bit is set/unset representing a null on a certain position.
-    USE_BYTEMASK : int
-        The byte is set/unset representing a null on a certain position.
-    """
-
     NON_NULLABLE = 0
     USE_NAN = 1
     USE_SENTINEL = 2
@@ -95,175 +57,23 @@ class Endianness(enum.Enum):
     NATIVE = "="
     NA = "|"
 
-class NoBufferPresent(Exception):
-    """Exception to signal that there is no requested buffer."""
+class NoBufferPresent(Exception): ...
 
 class _PyArrowColumn:
-    """
-    A column object, with only the methods and properties required by the
-    interchange protocol defined.
-
-    A column can contain one or more chunks. Each chunk can contain up to three
-    buffers - a data buffer, a mask buffer (depending on null representation),
-    and an offsets buffer (if variable-size binary; e.g., variable-length
-    strings).
-
-    TBD: Arrow has a separate "null" dtype, and has no separate mask concept.
-         Instead, it seems to use "children" for both columns with a bit mask,
-         and for nested dtypes. Unclear whether this is elegant or confusing.
-         This design requires checking the null representation explicitly.
-
-         The Arrow design requires checking:
-         1. the ARROW_FLAG_NULLABLE (for sentinel values)
-         2. if a column has two children, combined with one of those children
-            having a null dtype.
-
-         Making the mask concept explicit seems useful. One null dtype would
-         not be enough to cover both bit and byte masks, so that would mean
-         even more checking if we did it the Arrow way.
-
-    TBD: there's also the "chunk" concept here, which is implicit in Arrow as
-         multiple buffers per array (= column here). Semantically it may make
-         sense to have both: chunks were meant for example for lazy evaluation
-         of data which doesn't fit in memory, while multiple buffers per column
-         could also come from doing a selection operation on a single
-         contiguous buffer.
-
-         Given these concepts, one would expect chunks to be all of the same
-         size (say a 10,000 row dataframe could have 10 chunks of 1,000 rows),
-         while multiple buffers could have data-dependent lengths. Not an issue
-         in pandas if one column is backed by a single NumPy array, but in
-         Arrow it seems possible.
-         Are multiple chunks *and* multiple buffers per column necessary for
-         the purposes of this interchange protocol, or must producers either
-         reuse the chunk concept for this or copy the data?
-
-    Note: this Column object can only be produced by ``__dataframe__``, so
-          doesn't need its own version or ``__column__`` protocol.
-    """
     def __init__(self, column: Array | ChunkedArray, allow_copy: bool = True) -> None: ...
-    def size(self) -> int:
-        """
-        Size of the column, in elements.
-
-        Corresponds to DataFrame.num_rows() if column is a single chunk;
-        equal to size of this current chunk otherwise.
-
-        Is a method rather than a property because it may cause a (potentially
-        expensive) computation for some dataframe implementations.
-        """
+    def size(self) -> int: ...
     @property
-    def offset(self) -> int:
-        """
-        Offset of first element.
-
-        May be > 0 if using chunks; for example for a column with N chunks of
-        equal size M (only the last chunk may be shorter),
-        ``offset = n * M``, ``n = 0 .. N-1``.
-        """
+    def offset(self) -> int: ...
     @property
-    def dtype(self) -> tuple[DtypeKind, int, str, str]:
-        """
-        Dtype description as a tuple ``(kind, bit-width, format string,
-        endianness)``.
-
-        Bit-width : the number of bits as an integer
-        Format string : data type description format string in Apache Arrow C
-                        Data Interface format.
-        Endianness : current only native endianness (``=``) is supported
-
-        Notes:
-            - Kind specifiers are aligned with DLPack where possible (hence the
-              jump to 20, leave enough room for future extension)
-            - Masks must be specified as boolean with either bit width 1 (for
-              bit masks) or 8 (for byte masks).
-            - Dtype width in bits was preferred over bytes
-            - Endianness isn't too useful, but included now in case in the
-              future we need to support non-native endianness
-            - Went with Apache Arrow format strings over NumPy format strings
-              because they're more complete from a dataframe perspective
-            - Format strings are mostly useful for datetime specification, and
-              for categoricals.
-            - For categoricals, the format string describes the type of the
-              categorical in the data buffer. In case of a separate encoding of
-              the categorical (e.g. an integer to string mapping), this can
-              be derived from ``self.describe_categorical``.
-            - Data types not included: complex, Arrow-style null, binary,
-              decimal, and nested (list, struct, map, union) dtypes.
-        """
+    def dtype(self) -> tuple[DtypeKind, int, str, str]: ...
     @property
-    def describe_categorical(self) -> CategoricalDescription:
-        """
-        If the dtype is categorical, there are two options:
-        - There are only values in the data buffer.
-        - There is a separate non-categorical Column encoding categorical
-          values.
-
-        Raises TypeError if the dtype is not categorical
-
-        Returns the dictionary with description on how to interpret the
-        data buffer:
-            - "is_ordered" : bool, whether the ordering of dictionary indices
-                             is semantically meaningful.
-            - "is_dictionary" : bool, whether a mapping of
-                                categorical values to other objects exists
-            - "categories" : Column representing the (implicit) mapping of
-                             indices to category values (e.g. an array of
-                             cat1, cat2, ...). None if not a dictionary-style
-                             categorical.
-
-        TBD: are there any other in-memory representations that are needed?
-        """
+    def describe_categorical(self) -> CategoricalDescription: ...
     @property
-    def describe_null(self) -> tuple[ColumnNullType, Any]:
-        """
-        Return the missing value (or "null") representation the column dtype
-        uses, as a tuple ``(kind, value)``.
-
-        Value : if kind is "sentinel value", the actual value. If kind is a bit
-        mask or a byte mask, the value (0 or 1) indicating a missing value.
-        None otherwise.
-        """
+    def describe_null(self) -> tuple[ColumnNullType, Any]: ...
     @property
-    def null_count(self) -> int:
-        """
-        Number of null elements, if known.
-
-        Note: Arrow uses -1 to indicate "unknown", but None seems cleaner.
-        """
+    def null_count(self) -> int: ...
     @property
-    def metadata(self) -> dict[str, Any]:
-        """
-        The metadata for the column. See `DataFrame.metadata` for more details.
-        """
-    def num_chunks(self) -> int:
-        """
-        Return the number of chunks the column consists of.
-        """
-    def get_chunks(self, n_chunks: int | None = None) -> Iterable[_PyArrowColumn]:
-        """
-        Return an iterator yielding the chunks.
-
-        See `DataFrame.get_chunks` for details on ``n_chunks``.
-        """
-    def get_buffers(self) -> ColumnBuffers:
-        """
-        Return a dictionary containing the underlying buffers.
-
-        The returned dictionary has the following contents:
-
-            - "data": a two-element tuple whose first element is a buffer
-                      containing the data and whose second element is the data
-                      buffer's associated dtype.
-            - "validity": a two-element tuple whose first element is a buffer
-                          containing mask values indicating missing data and
-                          whose second element is the mask value buffer's
-                          associated dtype. None if the null representation is
-                          not a bit or byte mask.
-            - "offsets": a two-element tuple whose first element is a buffer
-                         containing the offset values for variable-size binary
-                         data (e.g., variable-length strings) and whose second
-                         element is the offsets buffer's associated dtype. None
-                         if the data buffer does not have an associated offsets
-                         buffer.
-        """
+    def metadata(self) -> dict[str, Any]: ...
+    def num_chunks(self) -> int: ...
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[_PyArrowColumn]: ...
+    def get_buffers(self) -> ColumnBuffers: ...
diff --git a/python/pyarrow-stubs/interchange/dataframe.pyi b/python/pyarrow-stubs/interchange/dataframe.pyi
index a7ea6aeac74..fb97e9a414f 100644
--- a/python/pyarrow-stubs/interchange/dataframe.pyi
+++ b/python/pyarrow-stubs/interchange/dataframe.pyi
@@ -27,93 +27,21 @@ from pyarrow.interchange.column import _PyArrowColumn
 from pyarrow.lib import RecordBatch, Table
 
 class _PyArrowDataFrame:
-    """
-    A data frame class, with only the methods required by the interchange
-    protocol defined.
-
-    A "data frame" represents an ordered collection of named columns.
-    A column's "name" must be a unique string.
-    Columns may be accessed by name or by position.
-
-    This could be a public data frame class, or an object with the methods and
-    attributes defined on this DataFrame class could be returned from the
-    ``__dataframe__`` method of a public data frame class in a library adhering
-    to the dataframe interchange protocol specification.
-    """
-
     def __init__(
         self, df: Table | RecordBatch, nan_as_null: bool = False, allow_copy: bool = True
     ) -> None: ...
     def __dataframe__(
         self, nan_as_null: bool = False, allow_copy: bool = True
-    ) -> _PyArrowDataFrame:
-        """
-        Construct a new exchange object, potentially changing the parameters.
-        ``nan_as_null`` is a keyword intended for the consumer to tell the
-        producer to overwrite null values in the data with ``NaN``.
-        It is intended for cases where the consumer does not support the bit
-        mask or byte mask that is the producer's native representation.
-        ``allow_copy`` is a keyword that defines whether or not the library is
-        allowed to make a copy of the data. For example, copying data would be
-        necessary if a library supports strided buffers, given that this
-        protocol specifies contiguous buffers.
-        """
+    ) -> _PyArrowDataFrame: ...
     @property
-    def metadata(self) -> dict[str, Any]:
-        """
-        The metadata for the data frame, as a dictionary with string keys. The
-        contents of `metadata` may be anything, they are meant for a library
-        to store information that it needs to, e.g., roundtrip losslessly or
-        for two implementations to share data that is not (yet) part of the
-        interchange protocol specification. For avoiding collisions with other
-        entries, please add name the keys with the name of the library
-        followed by a period and the desired name, e.g, ``pandas.indexcol``.
-        """
-    def num_columns(self) -> int:
-        """
-        Return the number of columns in the DataFrame.
-        """
-    def num_rows(self) -> int:
-        """
-        Return the number of rows in the DataFrame, if available.
-        """
-    def num_chunks(self) -> int:
-        """
-        Return the number of chunks the DataFrame consists of.
-        """
-    def column_names(self) -> Iterable[str]:
-        """
-        Return an iterator yielding the column names.
-        """
-    def get_column(self, i: int) -> _PyArrowColumn:
-        """
-        Return the column at the indicated position.
-        """
-    def get_column_by_name(self, name: str) -> _PyArrowColumn:
-        """
-        Return the column whose name is the indicated name.
-        """
-    def get_columns(self) -> Iterable[_PyArrowColumn]:
-        """
-        Return an iterator yielding the columns.
-        """
-    def select_columns(self, indices: Sequence[int]) -> Self:
-        """
-        Create a new DataFrame by selecting a subset of columns by index.
-        """
-    def select_columns_by_name(self, names: Sequence[str]) -> Self:
-        """
-        Create a new DataFrame by selecting a subset of columns by name.
-        """
-    def get_chunks(self, n_chunks: int | None = None) -> Iterable[Self]:
-        """
-        Return an iterator yielding the chunks.
-
-        By default (None), yields the chunks that the data is stored as by the
-        producer. If given, ``n_chunks`` must be a multiple of
-        ``self.num_chunks()``, meaning the producer must subdivide each chunk
-        before yielding it.
-
-        Note that the producer must ensure that all columns are chunked the
-        same way.
-        """
+    def metadata(self) -> dict[str, Any]: ...
+    def num_columns(self) -> int: ...
+    def num_rows(self) -> int: ...
+    def num_chunks(self) -> int: ...
+    def column_names(self) -> Iterable[str]: ...
+    def get_column(self, i: int) -> _PyArrowColumn: ...
+    def get_column_by_name(self, name: str) -> _PyArrowColumn: ...
+    def get_columns(self) -> Iterable[_PyArrowColumn]: ...
+    def select_columns(self, indices: Sequence[int]) -> Self: ...
+    def select_columns_by_name(self, names: Sequence[str]) -> Self: ...
+    def get_chunks(self, n_chunks: int | None = None) -> Iterable[Self]: ...
diff --git a/python/pyarrow-stubs/interchange/from_dataframe.pyi b/python/pyarrow-stubs/interchange/from_dataframe.pyi
index aa6217b6181..b13d5976337 100644
--- a/python/pyarrow-stubs/interchange/from_dataframe.pyi
+++ b/python/pyarrow-stubs/interchange/from_dataframe.pyi
@@ -31,125 +31,21 @@ class DataFrameObject(Protocol):
 
 ColumnObject: TypeAlias = Any
 
-def from_dataframe(df: DataFrameObject, allow_copy=True) -> Table:
-    """
-    Build a ``pa.Table`` from any DataFrame supporting the interchange protocol.
+def from_dataframe(df: DataFrameObject, allow_copy=True) -> Table: ...
 
-    Parameters
-    ----------
-    df : DataFrameObject
-        Object supporting the interchange protocol, i.e. `__dataframe__`
-        method.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
+def protocol_df_chunk_to_pyarrow(df: DataFrameObject, allow_copy: bool = True) -> RecordBatch: ...
 
-    Returns
-    -------
-    pa.Table
+def column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array: ...
 
-    Examples
-    --------
-    >>> import pyarrow
-    >>> from pyarrow.interchange import from_dataframe
-
-    Convert a pandas dataframe to a pyarrow table:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "n_attendees": [100, 10, 1],
-    ...         "country": ["Italy", "Spain", "Slovenia"],
-    ...     }
-    ... )
-    >>> df
-       n_attendees   country
-    0          100     Italy
-    1           10     Spain
-    2            1  Slovenia
-    >>> from_dataframe(df)
-    pyarrow.Table
-    n_attendees: int64
-    country: large_string
-    ----
-    n_attendees: [[100,10,1]]
-    country: [["Italy","Spain","Slovenia"]]
-    """
-
-def protocol_df_chunk_to_pyarrow(df: DataFrameObject, allow_copy: bool = True) -> RecordBatch:
-    """
-    Convert interchange protocol chunk to ``pa.RecordBatch``.
-
-    Parameters
-    ----------
-    df : DataFrameObject
-        Object supporting the interchange protocol, i.e. `__dataframe__`
-        method.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.RecordBatch
-    """
-
-def column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
-    """
-    Convert a column holding one of the primitive dtypes to a PyArrow array.
-    A primitive type is one of: int, uint, float, bool (1 bit).
-
-    Parameters
-    ----------
-    col : ColumnObject
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Array
-    """
-
-def bool_column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array:
-    """
-    Convert a column holding boolean dtype to a PyArrow array.
-
-    Parameters
-    ----------
-    col : ColumnObject
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Array
-    """
+def bool_column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array: ...
 
 def categorical_column_to_dictionary(
     col: ColumnObject, allow_copy: bool = True
-) -> DictionaryArray:
-    """
-    Convert a column holding categorical data to a pa.DictionaryArray.
-
-    Parameters
-    ----------
-    col : ColumnObject
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.DictionaryArray
-    """
+) -> DictionaryArray: ...
 
-def parse_datetime_format_str(format_str: str) -> tuple[str, str]:
-    """Parse datetime `format_str` to interpret the `data`."""
+def parse_datetime_format_str(format_str: str) -> tuple[str, str]: ...
 
-def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> DataType:
-    """Map column date type to pyarrow date type."""
+def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> DataType: ...
 
 def buffers_to_array(
     buffers: ColumnBuffers,
@@ -158,39 +54,7 @@ def buffers_to_array(
     describe_null: ColumnNullType,
     offset: int = 0,
     allow_copy: bool = True,
-) -> Array:
-    """
-    Build a PyArrow array from the passed buffer.
-
-    Parameters
-    ----------
-    buffer : ColumnBuffers
-        Dictionary containing tuples of underlying buffers and
-        their associated dtype.
-    data_type : Tuple[DtypeKind, int, str, str],
-        Dtype description of the column as a tuple ``(kind, bit-width, format string,
-        endianness)``.
-    length : int
-        The number of values in the array.
-    describe_null: ColumnNullType
-        Null representation the column dtype uses,
-        as a tuple ``(kind, value)``
-    offset : int, default: 0
-        Number of elements to offset from the start of the buffer.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Array
-
-    Notes
-    -----
-    The returned array doesn't own the memory. The caller of this function
-    is responsible for keeping the memory owner object alive as long as
-    the returned PyArrow array is being used.
-    """
+) -> Array: ...
 
 def validity_buffer_from_mask(
     validity_buff: Buffer,
@@ -199,32 +63,7 @@ def validity_buffer_from_mask(
     length: int,
     offset: int = 0,
     allow_copy: bool = True,
-) -> Buffer:
-    """
-    Build a PyArrow buffer from the passed mask buffer.
-
-    Parameters
-    ----------
-    validity_buff : BufferObject
-        Tuple of underlying validity buffer and associated dtype.
-    validity_dtype : Dtype
-        Dtype description as a tuple ``(kind, bit-width, format string,
-        endianness)``.
-    describe_null : ColumnNullType
-        Null representation the column dtype uses,
-        as a tuple ``(kind, value)``
-    length : int
-        The number of values in the array.
-    offset : int, default: 0
-        Number of elements to offset from the start of the buffer.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Buffer
-    """
+) -> Buffer: ...
 
 def validity_buffer_nan_sentinel(
     data_pa_buffer: Buffer,
@@ -233,29 +72,4 @@ def validity_buffer_nan_sentinel(
     length: int,
     offset: int = 0,
     allow_copy: bool = True,
-) -> Buffer:
-    """
-    Build a PyArrow buffer from NaN or sentinel values.
-
-    Parameters
-    ----------
-    data_pa_buffer : pa.Buffer
-        PyArrow buffer for the column data.
-    data_type : Dtype
-        Dtype description as a tuple ``(kind, bit-width, format string,
-        endianness)``.
-    describe_null : ColumnNullType
-        Null representation the column dtype uses,
-        as a tuple ``(kind, value)``
-    length : int
-        The number of values in the array.
-    offset : int, default: 0
-        Number of elements to offset from the start of the buffer.
-    allow_copy : bool, default: True
-        Whether to allow copying the memory to perform the conversion
-        (if false then zero-copy approach is requested).
-
-    Returns
-    -------
-    pa.Buffer
-    """
+) -> Buffer: ...
diff --git a/python/pyarrow-stubs/parquet/core.pyi b/python/pyarrow-stubs/parquet/core.pyi
index f5ac0510ffc..67882f3a747 100644
--- a/python/pyarrow-stubs/parquet/core.pyi
+++ b/python/pyarrow-stubs/parquet/core.pyi
@@ -77,29 +77,7 @@ __all__ = (
     "filters_to_expression",
 )
 
-def filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression:
-    """
-    Check if filters are well-formed and convert to an ``Expression``.
-
-    Parameters
-    ----------
-    filters : List[Tuple] or List[List[Tuple]]
-
-    Notes
-    -----
-    See internal ``pyarrow._DNF_filter_doc`` attribute for more details.
-
-    Examples
-    --------
-
-    >>> filters_to_expression([("foo", "==", "bar")])
-    <pyarrow.compute.Expression (foo == "bar")>
-
-    Returns
-    -------
-    pyarrow.compute.Expression
-        An Expression representing the filters
-    """
+def filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
 
 @deprecated("use filters_to_expression")
 def _filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
@@ -107,97 +85,6 @@ def _filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Ex
 _Compression: TypeAlias = Literal["gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"]
 
 class ParquetFile:
-    """
-    Reader interface for a single Parquet file.
-
-    Parameters
-    ----------
-    source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
-        Readable source. For passing bytes or buffer-like file containing a
-        Parquet file, use pyarrow.BufferReader.
-    metadata : FileMetaData, default None
-        Use existing metadata object, rather than reading from file.
-    common_metadata : FileMetaData, default None
-        Will be used in reads for pandas schema metadata if not found in the
-        main file's metadata, no other uses at the moment.
-    read_dictionary : list
-        List of column names to read directly as DictionaryArray.
-    memory_map : bool, default False
-        If the source is a file path, use a memory map to read file, which can
-        improve performance in some environments.
-    buffer_size : int, default 0
-        If positive, perform read buffering when deserializing individual
-        column chunks. Otherwise IO calls are unbuffered.
-    pre_buffer : bool, default False
-        Coalesce and issue file reads in parallel to improve performance on
-        high-latency filesystems (e.g. S3). If True, Arrow will use a
-        background I/O thread pool.
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular
-        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be inferred as timestamps
-        in nanoseconds.
-    decryption_properties : FileDecryptionProperties, default None
-        File decryption properties for Parquet Modular Encryption.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    page_checksum_verification : bool, default False
-        If True, verify the checksum for each page read from the file.
-
-    Examples
-    --------
-
-    Generate an example PyArrow Table and write it to Parquet file:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_table(table, "example.parquet")
-
-    Create a ``ParquetFile`` object from the Parquet file:
-
-    >>> parquet_file = pq.ParquetFile("example.parquet")
-
-    Read the data:
-
-    >>> parquet_file.read()
-    pyarrow.Table
-    n_legs: int64
-    animal: string
-    ----
-    n_legs: [[2,2,4,4,5,100]]
-    animal: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
-
-    Create a ParquetFile object with "animal" column as DictionaryArray:
-
-    >>> parquet_file = pq.ParquetFile("example.parquet", read_dictionary=["animal"])
-    >>> parquet_file.read()
-    pyarrow.Table
-    n_legs: int64
-    animal: dictionary<values=string, indices=int32, ordered=0>
-    ----
-    n_legs: [[2,2,4,4,5,100]]
-    animal: [  -- dictionary:
-    ["Flamingo","Parrot",...,"Brittle stars","Centipede"]  -- indices:
-    [0,1,2,3,4,5]]
-    """
-
     reader: ParquetReader
     common_metadata: FileMetaData
 
@@ -221,63 +108,13 @@ class ParquetFile:
     def __enter__(self) -> Self: ...
     def __exit__(self, *args, **kwargs) -> None: ...
     @property
-    def metadata(self) -> FileMetaData:
-        """
-        Return the Parquet metadata.
-        """
+    def metadata(self) -> FileMetaData: ...
     @property
-    def schema(self) -> ParquetSchema:
-        """
-        Return the Parquet schema, unconverted to Arrow types
-        """
+    def schema(self) -> ParquetSchema: ...
     @property
-    def schema_arrow(self) -> Schema:
-        """
-        Return the inferred Arrow schema, converted from the whole Parquet
-        file's schema
-
-        Examples
-        --------
-        Generate an example Parquet file:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        Read the Arrow schema:
-
-        >>> parquet_file.schema_arrow
-        n_legs: int64
-        animal: string
-        """
+    def schema_arrow(self) -> Schema: ...
     @property
-    def num_row_groups(self) -> int:
-        """
-        Return the number of row groups of the Parquet file.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        >>> parquet_file.num_row_groups
-        1
-        """
+    def num_row_groups(self) -> int: ...
     def close(self, force: bool = False) -> None: ...
     @property
     def closed(self) -> bool: ...
@@ -287,100 +124,14 @@ class ParquetFile:
         columns: list | None = None,
         use_threads: bool = True,
         use_pandas_metadata: bool = False,
-    ) -> Table:
-        """
-        Read a single row group from a Parquet file.
-
-        Parameters
-        ----------
-        i : int
-            Index of the individual row group that we want to read.
-        columns : list
-            If not None, only these columns will be read from the row group. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'.
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : bool, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Returns
-        -------
-        pyarrow.table.Table
-            Content of the row group as a table (of columns)
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        >>> parquet_file.read_row_group(0)
-        pyarrow.Table
-        n_legs: int64
-        animal: string
-        ----
-        n_legs: [[2,2,4,4,5,100]]
-        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
-        """
+    ) -> Table: ...
     def read_row_groups(
         self,
         row_groups: list,
         columns: list | None = None,
         use_threads: bool = True,
         use_pandas_metadata: bool = False,
-    ) -> Table:
-        """
-        Read a multiple row groups from a Parquet file.
-
-        Parameters
-        ----------
-        row_groups : list
-            Only these row groups will be read from the file.
-        columns : list
-            If not None, only these columns will be read from the row group. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'.
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : bool, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Returns
-        -------
-        pyarrow.table.Table
-            Content of the row groups as a table (of columns).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        >>> parquet_file.read_row_groups([0, 0])
-        pyarrow.Table
-        n_legs: int64
-        animal: string
-        ----
-        n_legs: [[2,2,4,4,5,...,2,4,4,5,100]]
-        animal: [["Flamingo","Parrot","Dog",...,"Brittle stars","Centipede"]]
-        """
+    ) -> Table: ...
     def iter_batches(
         self,
         batch_size: int = 65536,
@@ -388,375 +139,16 @@ class ParquetFile:
         columns: list | None = None,
         use_threads: bool = True,
         use_pandas_metadata: bool = False,
-    ) -> Iterator[RecordBatch]:
-        """
-        Read streaming batches from a Parquet file.
-
-        Parameters
-        ----------
-        batch_size : int, default 64K
-            Maximum number of records to yield per batch. Batches may be
-            smaller if there aren't enough rows in the file.
-        row_groups : list
-            Only these row groups will be read from the file.
-        columns : list
-            If not None, only these columns will be read from the file. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'.
-        use_threads : boolean, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : boolean, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Yields
-        ------
-        pyarrow.RecordBatch
-            Contents of each batch as a record batch
-
-        Examples
-        --------
-        Generate an example Parquet file:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-        >>> for i in parquet_file.iter_batches():
-        ...     print("RecordBatch")
-        ...     print(i.to_pandas())
-        RecordBatch
-           n_legs         animal
-        0       2       Flamingo
-        1       2         Parrot
-        2       4            Dog
-        3       4          Horse
-        4       5  Brittle stars
-        5     100      Centipede
-        """
+    ) -> Iterator[RecordBatch]: ...
     def read(
         self,
         columns: list | None = None,
         use_threads: bool = True,
         use_pandas_metadata: bool = False,
-    ) -> Table:
-        """
-        Read a Table from Parquet format.
-
-        Parameters
-        ----------
-        columns : list
-            If not None, only these columns will be read from the file. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'.
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : bool, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Returns
-        -------
-        pyarrow.table.Table
-            Content of the file as a table (of columns).
-
-        Examples
-        --------
-        Generate an example Parquet file:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        Read a Table:
-
-        >>> parquet_file.read(columns=["animal"])
-        pyarrow.Table
-        animal: string
-        ----
-        animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]]
-        """
-    def scan_contents(self, columns: list | None = None, batch_size: int = 65536) -> int:
-        """
-        Read contents of file for the given columns and batch size.
-
-        Notes
-        -----
-        This function's primary purpose is benchmarking.
-        The scan is executed on a single thread.
-
-        Parameters
-        ----------
-        columns : list of integers, default None
-            Select columns to read, if None scan all columns.
-        batch_size : int, default 64K
-            Number of rows to read at a time internally.
-
-        Returns
-        -------
-        num_rows : int
-            Number of rows in file
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "example.parquet")
-        >>> parquet_file = pq.ParquetFile("example.parquet")
-
-        >>> parquet_file.scan_contents()
-        6
-        """
+    ) -> Table: ...
+    def scan_contents(self, columns: list | None = None, batch_size: int = 65536) -> int: ...
 
 class ParquetWriter:
-    """
-    Class for incrementally building a Parquet file for Arrow tables.
-
-    Parameters
-    ----------
-    where : path or file-like object
-    schema : pyarrow.Schema
-    version : {"1.0", "2.4", "2.6"}, default "2.6"
-        Determine which Parquet logical types are available for use, whether the
-        reduced set from the Parquet 1.x.x format or the expanded logical types
-        added in later format versions.
-        Files written with version='2.4' or '2.6' may not be readable in all
-        Parquet implementations, so version='1.0' is likely the choice that
-        maximizes file compatibility.
-        UINT32 and some logical types are only available with version '2.4'.
-        Nanosecond timestamps are only available with version '2.6'.
-        Other features such as compression algorithms or the new serialized
-        data page format must be enabled separately (see 'compression' and
-        'data_page_version').
-    use_dictionary : bool or list, default True
-        Specify if we should use dictionary encoding in general or only for
-        some columns.
-        When encoding the column, if the dictionary size is too large, the
-        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
-        doesn't support dictionary encoding.
-    compression : str or dict, default 'snappy'
-        Specify the compression codec, either on a general basis or per-column.
-        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
-    write_statistics : bool or list, default True
-        Specify if we should write statistics in general (default is True) or only
-        for some columns.
-    use_deprecated_int96_timestamps : bool, default None
-        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
-        by flavor argument. This take priority over the coerce_timestamps option.
-    coerce_timestamps : str, default None
-        Cast timestamps to a particular resolution. If omitted, defaults are chosen
-        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
-        nanoseconds are cast to microseconds ('us'), while for
-        ``version='2.6'`` (the default), they are written natively without loss
-        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
-        as Parquet does not have any temporal type with seconds resolution.
-        If the casting results in loss of data, it will raise an exception
-        unless ``allow_truncated_timestamps=True`` is given.
-        Valid values: {None, 'ms', 'us'}
-    allow_truncated_timestamps : bool, default False
-        Allow loss of data when coercing timestamps to a particular
-        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
-        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
-        will NOT result in the truncation exception being ignored unless
-        ``coerce_timestamps`` is not None.
-    data_page_size : int, default None
-        Set a target threshold for the approximate encoded size of data
-        pages within a column chunk (in bytes). If None, use the default data page
-        size of 1MByte.
-    flavor : {'spark'}, default None
-        Sanitize schema or set other compatibility options to work with
-        various target systems.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred from `where` if path-like, else
-        `where` is already a file-like object so no filesystem is needed.
-    compression_level : int or dict, default None
-        Specify the compression level for a codec, either on a general basis or
-        per-column. If None is passed, arrow selects the compression level for
-        the compression codec in use. The compression level has a different
-        meaning for each codec, so you have to read the documentation of the
-        codec you are using.
-        An exception is thrown if the compression codec does not allow specifying
-        a compression level.
-    use_byte_stream_split : bool or list, default False
-        Specify if the byte_stream_split encoding should be used in general or
-        only for some columns. If both dictionary and byte_stream_stream are
-        enabled, then dictionary is preferred.
-        The byte_stream_split encoding is valid for integer, floating-point
-        and fixed-size binary data types (including decimals); it should be
-        combined with a compression codec so as to achieve size reduction.
-    column_encoding : string or dict, default None
-        Specify the encoding scheme on a per column basis.
-        Can only be used when ``use_dictionary`` is set to False, and
-        cannot be used in combination with ``use_byte_stream_split``.
-        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
-        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
-        Certain encodings are only compatible with certain data types.
-        Please refer to the encodings section of `Reading and writing Parquet
-        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
-    data_page_version : {"1.0", "2.0"}, default "1.0"
-        The serialized Parquet data page format version to write, defaults to
-        1.0. This does not impact the file schema logical types and Arrow to
-        Parquet type casting behavior; for that use the "version" option.
-    use_compliant_nested_type : bool, default True
-        Whether to write compliant Parquet nested type (lists) as defined
-        `here <https://github.com/apache/parquet-format/blob/master/
-        LogicalTypes.md#nested-types>`_, defaults to ``True``.
-        For ``use_compliant_nested_type=True``, this will write into a list
-        with 3-level structure where the middle level, named ``list``,
-        is a repeated group with a single field named ``element``::
-
-            <list-repetition> group <name> (LIST) {
-                repeated group list {
-                        <element-repetition> <element-type> element;
-                }
-            }
-
-        For ``use_compliant_nested_type=False``, this will also write into a list
-        with 3-level structure, where the name of the single field of the middle
-        level ``list`` is taken from the element name for nested columns in Arrow,
-        which defaults to ``item``::
-
-            <list-repetition> group <name> (LIST) {
-                repeated group list {
-                    <element-repetition> <element-type> item;
-                }
-            }
-    encryption_properties : FileEncryptionProperties, default None
-        File encryption properties for Parquet Modular Encryption.
-        If None, no encryption will be done.
-        The encryption properties can be created using:
-        ``CryptoFactory.file_encryption_properties()``.
-    write_batch_size : int, default None
-        Number of values to write to a page at a time. If None, use the default of
-        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
-        are exceeding the ``data_page_size`` due to large column values, lowering
-        the batch size can help keep page sizes closer to the intended size.
-    dictionary_pagesize_limit : int, default None
-        Specify the dictionary page size limit per row group. If None, use the
-        default 1MB.
-    store_schema : bool, default True
-        By default, the Arrow schema is serialized and stored in the Parquet
-        file metadata (in the "ARROW:schema" key). When reading the file,
-        if this key is available, it will be used to more faithfully recreate
-        the original Arrow data. For example, for tz-aware timestamp columns
-        it will restore the timezone (Parquet only stores the UTC values without
-        timezone), or columns with duration type will be restored from the int64
-        Parquet column.
-    write_page_index : bool, default False
-        Whether to write a page index in general for all columns.
-        Writing statistics to the page index disables the old method of writing
-        statistics to each data page header. The page index makes statistics-based
-        filtering more efficient than the page header, as it gathers all the
-        statistics for a Parquet file in a single place, avoiding scattered I/O.
-        Note that the page index is not yet used on the read size by PyArrow.
-    write_page_checksum : bool, default False
-        Whether to write page checksums in general for all columns.
-        Page checksums enable detection of data corruption, which might occur during
-        transmission or in the storage.
-    sorting_columns : Sequence of SortingColumn, default None
-        Specify the sort order of the data being written. The writer does not sort
-        the data nor does it verify that the data is sorted. The sort order is
-        written to the row group metadata, which can then be used by readers.
-    store_decimal_as_integer : bool, default False
-        Allow decimals with 1 <= precision <= 18 to be stored as integers.
-        In Parquet, DECIMAL can be stored in any of the following physical types:
-        - int32: for 1 <= precision <= 9.
-        - int64: for 10 <= precision <= 18.
-        - fixed_len_byte_array: precision is limited by the array size.
-            Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
-        - binary: precision is unlimited. The minimum number of bytes to store the
-            unscaled value is used.
-
-        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
-        When enabled, the writer will use the following physical types to store decimals:
-        - int32: for 1 <= precision <= 9.
-        - int64: for 10 <= precision <= 18.
-        - fixed_len_byte_array: for precision > 18.
-
-        As a consequence, decimal columns stored in integer types are more compact.
-    writer_engine_version : unused
-    **options : dict
-        If options contains a key `metadata_collector` then the
-        corresponding value is assumed to be a list (or any object with
-        `.append` method) that will be filled with the file metadata instance
-        of the written file.
-
-    Examples
-    --------
-    Generate an example PyArrow Table and RecordBatch:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> batch = pa.record_batch(
-    ...     [
-    ...         [2, 2, 4, 4, 5, 100],
-    ...         ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     ],
-    ...     names=["n_legs", "animal"],
-    ... )
-
-    create a ParquetWriter object:
-
-    >>> import pyarrow.parquet as pq
-    >>> writer = pq.ParquetWriter("example.parquet", table.schema)
-
-    and write the Table into the Parquet file:
-
-    >>> writer.write_table(table)
-    >>> writer.close()
-
-    >>> pq.read_table("example.parquet").to_pandas()
-       n_legs         animal
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-
-    create a ParquetWriter object for the RecordBatch:
-
-    >>> writer2 = pq.ParquetWriter("example2.parquet", batch.schema)
-
-    and write the RecordBatch into the Parquet file:
-
-    >>> writer2.write_batch(batch)
-    >>> writer2.close()
-
-    >>> pq.read_table("example2.parquet").to_pandas()
-       n_legs         animal
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-    """
-
     flavor: str
     schema_changed: bool
     schema: ParquetSchema
@@ -796,210 +188,13 @@ class ParquetWriter:
     def __exit__(self, *args, **kwargs) -> Literal[False]: ...
     def write(
         self, table_or_batch: RecordBatch | Table, row_group_size: int | None = None
-    ) -> None:
-        """
-        Write RecordBatch or Table to the Parquet file.
-
-        Parameters
-        ----------
-        table_or_batch : {RecordBatch, Table}
-        row_group_size : int, default None
-            Maximum number of rows in each written row group. If None,
-            the row group size will be the minimum of the input
-            table or batch length and 1024 * 1024.
-        """
-    def write_batch(self, batch: RecordBatch, row_group_size: int | None = None) -> None:
-        """
-        Write RecordBatch to the Parquet file.
-
-        Parameters
-        ----------
-        batch : RecordBatch
-        row_group_size : int, default None
-            Maximum number of rows in written row group. If None, the
-            row group size will be the minimum of the RecordBatch
-            size and 1024 * 1024.  If set larger than 64Mi then 64Mi
-            will be used instead.
-        """
-    def write_table(self, table: Table, row_group_size: int | None = None) -> None:
-        """
-        Write Table to the Parquet file.
-
-        Parameters
-        ----------
-        table : Table
-        row_group_size : int, default None
-            Maximum number of rows in each written row group. If None,
-            the row group size will be the minimum of the Table size
-            and 1024 * 1024.  If set larger than 64Mi then 64Mi will
-            be used instead.
-
-        """
-    def close(self) -> None:
-        """
-        Close the connection to the Parquet file.
-        """
-    def add_key_value_metadata(self, key_value_metadata: dict[str, str]) -> None:
-        """
-        Add key-value metadata to the file.
-        This will overwrite any existing metadata with the same key.
-
-        Parameters
-        ----------
-        key_value_metadata : dict
-            Keys and values must be string-like / coercible to bytes.
-        """
+    ) -> None: ...
+    def write_batch(self, batch: RecordBatch, row_group_size: int | None = None) -> None: ...
+    def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
+    def close(self) -> None: ...
+    def add_key_value_metadata(self, key_value_metadata: dict[str, str]) -> None: ...
 
 class ParquetDataset:
-    """
-    Encapsulates details of reading a complete Parquet dataset possibly
-    consisting of multiple files and partitions in subdirectories.
-
-    Parameters
-    ----------
-    path_or_paths : str or List[str]
-        A directory name, single file name, or list of file names.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    schema : pyarrow.parquet.Schema
-        Optionally provide the Schema for the Dataset, in which case it will
-        not be inferred from the source.
-    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
-        Rows which do not match the filter predicate will be removed from scanned
-        data. Partition keys embedded in a nested directory structure will be
-        exploited to avoid loading files at all if they contain no matching rows.
-        Within-file level filtering and different partitioning schemes are supported.
-
-        Predicates are expressed using an ``Expression`` or using
-        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
-        DNF allows arbitrary boolean logical combinations of single column predicates.
-        The innermost tuples each describe a single column predicate. The list of inner
-        predicates is interpreted as a conjunction (AND), forming a more selective and
-        multiple column predicate. Finally, the most outer list combines these filters
-        as a disjunction (OR).
-
-        Predicates may also be passed as List[Tuple]. This form is interpreted
-        as a single conjunction. To express OR in predicates, one must
-        use the (preferred) List[List[Tuple]] notation.
-
-        Each tuple has format: (``key``, ``op``, ``value``) and compares the
-        ``key`` with the ``value``.
-        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
-        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
-        ``value`` must be a collection such as a ``list``, a ``set`` or a
-        ``tuple``.
-
-        Examples:
-
-        Using the ``Expression`` API:
-
-        .. code-block:: python
-
-            import pyarrow.compute as pc
-            pc.field('x') = 0
-            pc.field('y').isin(['a', 'b', 'c'])
-            ~pc.field('y').isin({'a', 'b'})
-
-        Using the DNF format:
-
-        .. code-block:: python
-
-            ("x", "=", 0)
-            ("y", "in", ["a", "b", "c"])
-            ("z", "not in", {"a", "b"})
-
-
-    read_dictionary : list, default None
-        List of names or column paths (for nested types) to read directly
-        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
-        a flat column as dictionary-encoded pass the column name. For
-        nested types, you must pass the full column "path", which could be
-        something like level1.level2.list.item. Refer to the Parquet
-        file's schema to obtain the paths.
-    memory_map : bool, default False
-        If the source is a file path, use a memory map to read file, which can
-        improve performance in some environments.
-    buffer_size : int, default 0
-        If positive, perform read buffering when deserializing individual
-        column chunks. Otherwise IO calls are unbuffered.
-    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
-        The partitioning scheme for a partitioned dataset. The default of "hive"
-        assumes directory names with key=value pairs like "/year=2009/month=11".
-        In addition, a scheme like "/2009/11" is also supported, in which case
-        you need to specify the field names or a full schema. See the
-        ``pyarrow.dataset.partitioning()`` function for more details.
-    ignore_prefixes : list, optional
-        Files matching any of these prefixes will be ignored by the
-        discovery process.
-        This is matched to the basename of a path.
-        By default this is ['.', '_'].
-        Note that discovery happens only if a directory is passed as source.
-    pre_buffer : bool, default True
-        Coalesce and issue file reads in parallel to improve performance on
-        high-latency filesystems (e.g. S3, GCS). If True, Arrow will use a
-        background I/O thread pool. If using a filesystem layer that itself
-        performs readahead (e.g. fsspec's S3FS), disable readahead for best
-        results. Set to False if you want to prioritize minimal memory usage
-        over maximum speed.
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular resolution
-        (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
-        timestamps will be inferred as timestamps in nanoseconds.
-    decryption_properties : FileDecryptionProperties or None
-        File-level decryption properties.
-        The decryption properties can be created using
-        ``CryptoFactory.file_decryption_properties()``.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    page_checksum_verification : bool, default False
-        If True, verify the page checksum for each page read from the file.
-
-    Examples
-    --------
-    Generate an example PyArrow Table and write it to a partitioned dataset:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, root_path="dataset_v2", partition_cols=["year"])
-
-    create a ParquetDataset object from the dataset source:
-
-    >>> dataset = pq.ParquetDataset("dataset_v2/")
-
-    and read the data:
-
-    >>> dataset.read().to_pandas()
-       n_legs         animal  year
-    0       5  Brittle stars  2019
-    1       2       Flamingo  2020
-    2       4            Dog  2021
-    3     100      Centipede  2021
-    4       2         Parrot  2022
-    5       4          Horse  2022
-
-    create a ParquetDataset object with filter:
-
-    >>> dataset = pq.ParquetDataset("dataset_v2/", filters=[("n_legs", "=", 4)])
-    >>> dataset.read().to_pandas()
-       n_legs animal  year
-    0       4    Dog  2021
-    1       4  Horse  2022
-    """
     def __init__(
         self,
         path_or_paths: SingleOrList[str]
@@ -1024,184 +219,22 @@ class ParquetDataset:
     ): ...
     def equals(self, other: ParquetDataset) -> bool: ...
     @property
-    def schema(self) -> Schema:
-        """
-        Schema of the Dataset.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path="dataset_v2_schema", partition_cols=["year"])
-        >>> dataset = pq.ParquetDataset("dataset_v2_schema/")
-
-        Read the schema:
-
-        >>> dataset.schema
-        n_legs: int64
-        animal: string
-        year: dictionary<values=int32, indices=int32, ordered=0>
-        """
+    def schema(self) -> Schema: ...
     def read(
         self,
         columns: list[str] | None = None,
         use_threads: bool = True,
         use_pandas_metadata: bool = False,
-    ) -> Table:
-        """
-        Read (multiple) Parquet files as a single pyarrow.Table.
-
-        Parameters
-        ----------
-        columns : List[str]
-            Names of columns to read from the dataset. The partition fields
-            are not automatically included.
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : bool, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Returns
-        -------
-        pyarrow.Table
-            Content of the file as a table (of columns).
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path="dataset_v2_read", partition_cols=["year"])
-        >>> dataset = pq.ParquetDataset("dataset_v2_read/")
-
-        Read the dataset:
-
-        >>> dataset.read(columns=["n_legs"])
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[5],[2],[4,100],[2,4]]
-        """
-    def read_pandas(self, **kwargs) -> Table:
-        """
-        Read dataset including pandas metadata, if any. Other arguments passed
-        through to :func:`read`, see docstring for further details.
-
-        Parameters
-        ----------
-        **kwargs : optional
-            Additional options for :func:`read`
-
-        Examples
-        --------
-        Generate an example parquet file:
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "table_V2.parquet")
-        >>> dataset = pq.ParquetDataset("table_V2.parquet")
-
-        Read the dataset with pandas metadata:
-
-        >>> dataset.read_pandas(columns=["n_legs"])
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[2,2,4,4,5,100]]
-
-        >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata
-        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...}
-        """
+    ) -> Table: ...
+    def read_pandas(self, **kwargs) -> Table: ...
     @property
-    def fragments(self) -> list[ParquetFileFragment]:
-        """
-        A list of the Dataset source fragments or pieces with absolute
-        file paths.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path="dataset_v2_fragments", partition_cols=["year"])
-        >>> dataset = pq.ParquetDataset("dataset_v2_fragments/")
-
-        List the fragments:
-
-        >>> dataset.fragments
-        [<pyarrow.dataset.ParquetFileFragment path=dataset_v2_fragments/...
-        """
+    def fragments(self) -> list[ParquetFileFragment]: ...
     @property
-    def files(self) -> list[str]:
-        """
-        A list of absolute Parquet file paths in the Dataset source.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path="dataset_v2_files", partition_cols=["year"])
-        >>> dataset = pq.ParquetDataset("dataset_v2_files/")
-
-        List the files:
-
-        >>> dataset.files
-        ['dataset_v2_files/year=2019/...-0.parquet', ...
-        """
+    def files(self) -> list[str]: ...
     @property
-    def filesystem(self) -> FileSystem:
-        """
-        The filesystem type of the Dataset source.
-        """
+    def filesystem(self) -> FileSystem: ...
     @property
-    def partitioning(self) -> Partitioning:
-        """
-        The partitioning of the Dataset source, if discovered.
-        """
+    def partitioning(self) -> Partitioning: ...
 
 def read_table(
     source: SingleOrList[str] | SingleOrList[Path] | SingleOrList[NativeFile] | SingleOrList[IO],
@@ -1223,347 +256,11 @@ def read_table(
     thrift_string_size_limit: int | None = None,
     thrift_container_size_limit: int | None = None,
     page_checksum_verification: bool = False,
-) -> Table:
-    """
-    Read a Table from Parquet format
-
-    Parameters
-    ----------
-    source : str, pyarrow.NativeFile, or file-like object
-        If a string passed, can be a single file name or directory name. For
-        file-like objects, only read a single file. Use pyarrow.BufferReader to
-        read a file contained in a bytes or buffer-like object.
-    columns : list
-        If not None, only these columns will be read from the file. A column
-        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
-        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
-        that the table will still have the correct num_rows set despite having
-        no columns.
-    use_threads : bool, default True
-        Perform multi-threaded column reads.
-    schema : Schema, optional
-        Optionally provide the Schema for the parquet dataset, in which case it
-        will not be inferred from the source.
-    use_pandas_metadata : bool, default False
-        If True and file has custom pandas schema metadata, ensure that
-        index columns are also loaded.
-    read_dictionary : list, default None
-        List of names or column paths (for nested types) to read directly
-        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
-        a flat column as dictionary-encoded pass the column name. For
-        nested types, you must pass the full column "path", which could be
-        something like level1.level2.list.item. Refer to the Parquet
-        file's schema to obtain the paths.
-    memory_map : bool, default False
-        If the source is a file path, use a memory map to read file, which can
-        improve performance in some environments.
-    buffer_size : int, default 0
-        If positive, perform read buffering when deserializing individual
-        column chunks. Otherwise IO calls are unbuffered.
-    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
-        The partitioning scheme for a partitioned dataset. The default of "hive"
-        assumes directory names with key=value pairs like "/year=2009/month=11".
-        In addition, a scheme like "/2009/11" is also supported, in which case
-        you need to specify the field names or a full schema. See the
-        ``pyarrow.dataset.partitioning()`` function for more details.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
-        Rows which do not match the filter predicate will be removed from scanned
-        data. Partition keys embedded in a nested directory structure will be
-        exploited to avoid loading files at all if they contain no matching rows.
-        Within-file level filtering and different partitioning schemes are supported.
-
-        Predicates are expressed using an ``Expression`` or using
-        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
-        DNF allows arbitrary boolean logical combinations of single column predicates.
-        The innermost tuples each describe a single column predicate. The list of inner
-        predicates is interpreted as a conjunction (AND), forming a more selective and
-        multiple column predicate. Finally, the most outer list combines these filters
-        as a disjunction (OR).
-
-        Predicates may also be passed as List[Tuple]. This form is interpreted
-        as a single conjunction. To express OR in predicates, one must
-        use the (preferred) List[List[Tuple]] notation.
-
-        Each tuple has format: (``key``, ``op``, ``value``) and compares the
-        ``key`` with the ``value``.
-        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
-        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
-        ``value`` must be a collection such as a ``list``, a ``set`` or a
-        ``tuple``.
-
-        Examples:
-
-        Using the ``Expression`` API:
-
-        .. code-block:: python
-
-            import pyarrow.compute as pc
-            pc.field('x') = 0
-            pc.field('y').isin(['a', 'b', 'c'])
-            ~pc.field('y').isin({'a', 'b'})
-
-        Using the DNF format:
-
-        .. code-block:: python
-
-            ("x", "=", 0)
-            ("y", "in", ["a", "b", "c"])
-            ("z", "not in", {"a", "b"})
-
-
-    ignore_prefixes : list, optional
-        Files matching any of these prefixes will be ignored by the
-        discovery process.
-        This is matched to the basename of a path.
-        By default this is ['.', '_'].
-        Note that discovery happens only if a directory is passed as source.
-    pre_buffer : bool, default True
-        Coalesce and issue file reads in parallel to improve performance on
-        high-latency filesystems (e.g. S3). If True, Arrow will use a
-        background I/O thread pool. If using a filesystem layer that itself
-        performs readahead (e.g. fsspec's S3FS), disable readahead for best
-        results.
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular
-        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be inferred as timestamps
-        in nanoseconds.
-    decryption_properties : FileDecryptionProperties or None
-        File-level decryption properties.
-        The decryption properties can be created using
-        ``CryptoFactory.file_decryption_properties()``.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    page_checksum_verification : bool, default False
-        If True, verify the checksum for each page read from the file.
-
-    Returns
-    -------
-    pyarrow.Table
-        Content of the file as a table (of columns)
-
-
-    Examples
-    --------
-
-    Generate an example PyArrow Table and write it to a partitioned dataset:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, root_path="dataset_name_2", partition_cols=["year"])
-
-    Read the data:
-
-    >>> pq.read_table("dataset_name_2").to_pandas()
-       n_legs         animal  year
-    0       5  Brittle stars  2019
-    1       2       Flamingo  2020
-    2       4            Dog  2021
-    3     100      Centipede  2021
-    4       2         Parrot  2022
-    5       4          Horse  2022
-
-
-    Read only a subset of columns:
-
-    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"])
-    pyarrow.Table
-    n_legs: int64
-    animal: string
-    ----
-    n_legs: [[5],[2],[4,100],[2,4]]
-    animal: [["Brittle stars"],["Flamingo"],["Dog","Centipede"],["Parrot","Horse"]]
-
-    Read a subset of columns and read one column as DictionaryArray:
-
-    >>> pq.read_table("dataset_name_2", columns=["n_legs", "animal"], read_dictionary=["animal"])
-    pyarrow.Table
-    n_legs: int64
-    animal: dictionary<values=string, indices=int32, ordered=0>
-    ----
-    n_legs: [[5],[2],[4,100],[2,4]]
-    animal: [  -- dictionary:
-    ["Brittle stars"]  -- indices:
-    [0],  -- dictionary:
-    ["Flamingo"]  -- indices:
-    [0],  -- dictionary:
-    ["Dog","Centipede"]  -- indices:
-    [0,1],  -- dictionary:
-    ["Parrot","Horse"]  -- indices:
-    [0,1]]
-
-    Read the table with filter:
-
-    >>> pq.read_table(
-    ...     "dataset_name_2", columns=["n_legs", "animal"], filters=[("n_legs", "<", 4)]
-    ... ).to_pandas()
-       n_legs    animal
-    0       2  Flamingo
-    1       2    Parrot
-
-    Read data from a single Parquet file:
-
-    >>> pq.write_table(table, "example.parquet")
-    >>> pq.read_table("dataset_name_2").to_pandas()
-       n_legs         animal  year
-    0       5  Brittle stars  2019
-    1       2       Flamingo  2020
-    2       4            Dog  2021
-    3     100      Centipede  2021
-    4       2         Parrot  2022
-    5       4          Horse  2022
-    """
+) -> Table: ...
 
 def read_pandas(
     source: str | Path | NativeFile | IO, columns: list | None = None, **kwargs
-) -> Table:
-    """
-
-    Read a Table from Parquet format, also reading DataFrame
-    index values if known in the file metadata
-
-    Parameters
-    ----------
-    source : str, pyarrow.NativeFile, or file-like object
-        If a string passed, can be a single file name or directory name. For
-        file-like objects, only read a single file. Use pyarrow.BufferReader to
-        read a file contained in a bytes or buffer-like object.
-    columns : list
-        If not None, only these columns will be read from the file. A column
-        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
-        'a.c', and 'a.d.e'. If empty, no columns will be read. Note
-        that the table will still have the correct num_rows set despite having
-        no columns.
-    use_threads : bool, default True
-        Perform multi-threaded column reads.
-    schema : Schema, optional
-        Optionally provide the Schema for the parquet dataset, in which case it
-        will not be inferred from the source.
-    read_dictionary : list, default None
-        List of names or column paths (for nested types) to read directly
-        as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
-        a flat column as dictionary-encoded pass the column name. For
-        nested types, you must pass the full column "path", which could be
-        something like level1.level2.list.item. Refer to the Parquet
-        file's schema to obtain the paths.
-    memory_map : bool, default False
-        If the source is a file path, use a memory map to read file, which can
-        improve performance in some environments.
-    buffer_size : int, default 0
-        If positive, perform read buffering when deserializing individual
-        column chunks. Otherwise IO calls are unbuffered.
-    partitioning : pyarrow.dataset.Partitioning or str or list of str, default "hive"
-        The partitioning scheme for a partitioned dataset. The default of "hive"
-        assumes directory names with key=value pairs like "/year=2009/month=11".
-        In addition, a scheme like "/2009/11" is also supported, in which case
-        you need to specify the field names or a full schema. See the
-        ``pyarrow.dataset.partitioning()`` function for more details.
-    **kwargs
-        additional options for :func:`read_table`
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
-        Rows which do not match the filter predicate will be removed from scanned
-        data. Partition keys embedded in a nested directory structure will be
-        exploited to avoid loading files at all if they contain no matching rows.
-        Within-file level filtering and different partitioning schemes are supported.
-
-        Predicates are expressed using an ``Expression`` or using
-        the disjunctive normal form (DNF), like ``[[('x', '=', 0), ...], ...]``.
-        DNF allows arbitrary boolean logical combinations of single column predicates.
-        The innermost tuples each describe a single column predicate. The list of inner
-        predicates is interpreted as a conjunction (AND), forming a more selective and
-        multiple column predicate. Finally, the most outer list combines these filters
-        as a disjunction (OR).
-
-        Predicates may also be passed as List[Tuple]. This form is interpreted
-        as a single conjunction. To express OR in predicates, one must
-        use the (preferred) List[List[Tuple]] notation.
-
-        Each tuple has format: (``key``, ``op``, ``value``) and compares the
-        ``key`` with the ``value``.
-        The supported ``op`` are:  ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``,
-        ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the
-        ``value`` must be a collection such as a ``list``, a ``set`` or a
-        ``tuple``.
-
-        Examples:
-
-        Using the ``Expression`` API:
-
-        .. code-block:: python
-
-            import pyarrow.compute as pc
-            pc.field('x') = 0
-            pc.field('y').isin(['a', 'b', 'c'])
-            ~pc.field('y').isin({'a', 'b'})
-
-        Using the DNF format:
-
-        .. code-block:: python
-
-            ("x", "=", 0)
-            ("y", "in", ["a", "b", "c"])
-            ("z", "not in", {"a", "b"})
-
-
-    ignore_prefixes : list, optional
-        Files matching any of these prefixes will be ignored by the
-        discovery process.
-        This is matched to the basename of a path.
-        By default this is ['.', '_'].
-        Note that discovery happens only if a directory is passed as source.
-    pre_buffer : bool, default True
-        Coalesce and issue file reads in parallel to improve performance on
-        high-latency filesystems (e.g. S3). If True, Arrow will use a
-        background I/O thread pool. If using a filesystem layer that itself
-        performs readahead (e.g. fsspec's S3FS), disable readahead for best
-        results.
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular
-        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be inferred as timestamps
-        in nanoseconds.
-    decryption_properties : FileDecryptionProperties or None
-        File-level decryption properties.
-        The decryption properties can be created using
-        ``CryptoFactory.file_decryption_properties()``.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    page_checksum_verification : bool, default False
-        If True, verify the checksum for each page read from the file.
-
-    Returns
-    -------
-    pyarrow.Table
-        Content of the file as a Table of Columns, including DataFrame
-        indexes as columns
-    """
+) -> Table: ...
 
 def write_table(
     table: Table,
@@ -1593,221 +290,7 @@ def write_table(
     sorting_columns: Sequence[SortingColumn] | None = None,
     store_decimal_as_integer: bool = False,
     **kwargs,
-) -> None:
-    """
-
-    Write a Table to Parquet format.
-
-    Parameters
-    ----------
-    table : pyarrow.Table
-    where : string or pyarrow.NativeFile
-    row_group_size : int
-        Maximum number of rows in each written row group. If None, the
-        row group size will be the minimum of the Table size and
-        1024 * 1024.
-    version : {"1.0", "2.4", "2.6"}, default "2.6"
-        Determine which Parquet logical types are available for use, whether the
-        reduced set from the Parquet 1.x.x format or the expanded logical types
-        added in later format versions.
-        Files written with version='2.4' or '2.6' may not be readable in all
-        Parquet implementations, so version='1.0' is likely the choice that
-        maximizes file compatibility.
-        UINT32 and some logical types are only available with version '2.4'.
-        Nanosecond timestamps are only available with version '2.6'.
-        Other features such as compression algorithms or the new serialized
-        data page format must be enabled separately (see 'compression' and
-        'data_page_version').
-    use_dictionary : bool or list, default True
-        Specify if we should use dictionary encoding in general or only for
-        some columns.
-        When encoding the column, if the dictionary size is too large, the
-        column will fallback to ``PLAIN`` encoding. Specially, ``BOOLEAN`` type
-        doesn't support dictionary encoding.
-    compression : str or dict, default 'snappy'
-        Specify the compression codec, either on a general basis or per-column.
-        Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}.
-    write_statistics : bool or list, default True
-        Specify if we should write statistics in general (default is True) or only
-        for some columns.
-    use_deprecated_int96_timestamps : bool, default None
-        Write timestamps to INT96 Parquet format. Defaults to False unless enabled
-        by flavor argument. This take priority over the coerce_timestamps option.
-    coerce_timestamps : str, default None
-        Cast timestamps to a particular resolution. If omitted, defaults are chosen
-        depending on `version`. For ``version='1.0'`` and ``version='2.4'``,
-        nanoseconds are cast to microseconds ('us'), while for
-        ``version='2.6'`` (the default), they are written natively without loss
-        of resolution.  Seconds are always cast to milliseconds ('ms') by default,
-        as Parquet does not have any temporal type with seconds resolution.
-        If the casting results in loss of data, it will raise an exception
-        unless ``allow_truncated_timestamps=True`` is given.
-        Valid values: {None, 'ms', 'us'}
-    allow_truncated_timestamps : bool, default False
-        Allow loss of data when coercing timestamps to a particular
-        resolution. E.g. if microsecond or nanosecond data is lost when coercing to
-        'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True``
-        will NOT result in the truncation exception being ignored unless
-        ``coerce_timestamps`` is not None.
-    data_page_size : int, default None
-        Set a target threshold for the approximate encoded size of data
-        pages within a column chunk (in bytes). If None, use the default data page
-        size of 1MByte.
-    flavor : {'spark'}, default None
-        Sanitize schema or set other compatibility options to work with
-        various target systems.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred from `where` if path-like, else
-        `where` is already a file-like object so no filesystem is needed.
-    compression_level : int or dict, default None
-        Specify the compression level for a codec, either on a general basis or
-        per-column. If None is passed, arrow selects the compression level for
-        the compression codec in use. The compression level has a different
-        meaning for each codec, so you have to read the documentation of the
-        codec you are using.
-        An exception is thrown if the compression codec does not allow specifying
-        a compression level.
-    use_byte_stream_split : bool or list, default False
-        Specify if the byte_stream_split encoding should be used in general or
-        only for some columns. If both dictionary and byte_stream_stream are
-        enabled, then dictionary is preferred.
-        The byte_stream_split encoding is valid for integer, floating-point
-        and fixed-size binary data types (including decimals); it should be
-        combined with a compression codec so as to achieve size reduction.
-    column_encoding : string or dict, default None
-        Specify the encoding scheme on a per column basis.
-        Can only be used when ``use_dictionary`` is set to False, and
-        cannot be used in combination with ``use_byte_stream_split``.
-        Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
-        'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
-        Certain encodings are only compatible with certain data types.
-        Please refer to the encodings section of `Reading and writing Parquet
-        files <https://arrow.apache.org/docs/cpp/parquet.html#encodings>`_.
-    data_page_version : {"1.0", "2.0"}, default "1.0"
-        The serialized Parquet data page format version to write, defaults to
-        1.0. This does not impact the file schema logical types and Arrow to
-        Parquet type casting behavior; for that use the "version" option.
-    use_compliant_nested_type : bool, default True
-        Whether to write compliant Parquet nested type (lists) as defined
-        `here <https://github.com/apache/parquet-format/blob/master/
-        LogicalTypes.md#nested-types>`_, defaults to ``True``.
-        For ``use_compliant_nested_type=True``, this will write into a list
-        with 3-level structure where the middle level, named ``list``,
-        is a repeated group with a single field named ``element``::
-
-            <list-repetition> group <name> (LIST) {
-                repeated group list {
-                      <element-repetition> <element-type> element;
-                }
-            }
-
-        For ``use_compliant_nested_type=False``, this will also write into a list
-        with 3-level structure, where the name of the single field of the middle
-        level ``list`` is taken from the element name for nested columns in Arrow,
-        which defaults to ``item``::
-
-            <list-repetition> group <name> (LIST) {
-                repeated group list {
-                    <element-repetition> <element-type> item;
-                }
-            }
-    encryption_properties : FileEncryptionProperties, default None
-        File encryption properties for Parquet Modular Encryption.
-        If None, no encryption will be done.
-        The encryption properties can be created using:
-        ``CryptoFactory.file_encryption_properties()``.
-    write_batch_size : int, default None
-        Number of values to write to a page at a time. If None, use the default of
-        1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages
-        are exceeding the ``data_page_size`` due to large column values, lowering
-        the batch size can help keep page sizes closer to the intended size.
-    dictionary_pagesize_limit : int, default None
-        Specify the dictionary page size limit per row group. If None, use the
-        default 1MB.
-    store_schema : bool, default True
-        By default, the Arrow schema is serialized and stored in the Parquet
-        file metadata (in the "ARROW:schema" key). When reading the file,
-        if this key is available, it will be used to more faithfully recreate
-        the original Arrow data. For example, for tz-aware timestamp columns
-        it will restore the timezone (Parquet only stores the UTC values without
-        timezone), or columns with duration type will be restored from the int64
-        Parquet column.
-    write_page_index : bool, default False
-        Whether to write a page index in general for all columns.
-        Writing statistics to the page index disables the old method of writing
-        statistics to each data page header. The page index makes statistics-based
-        filtering more efficient than the page header, as it gathers all the
-        statistics for a Parquet file in a single place, avoiding scattered I/O.
-        Note that the page index is not yet used on the read size by PyArrow.
-    write_page_checksum : bool, default False
-        Whether to write page checksums in general for all columns.
-        Page checksums enable detection of data corruption, which might occur during
-        transmission or in the storage.
-    sorting_columns : Sequence of SortingColumn, default None
-        Specify the sort order of the data being written. The writer does not sort
-        the data nor does it verify that the data is sorted. The sort order is
-        written to the row group metadata, which can then be used by readers.
-    store_decimal_as_integer : bool, default False
-        Allow decimals with 1 <= precision <= 18 to be stored as integers.
-        In Parquet, DECIMAL can be stored in any of the following physical types:
-        - int32: for 1 <= precision <= 9.
-        - int64: for 10 <= precision <= 18.
-        - fixed_len_byte_array: precision is limited by the array size.
-          Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits.
-        - binary: precision is unlimited. The minimum number of bytes to store the
-          unscaled value is used.
-
-        By default, this is DISABLED and all decimal types annotate fixed_len_byte_array.
-        When enabled, the writer will use the following physical types to store decimals:
-        - int32: for 1 <= precision <= 9.
-        - int64: for 10 <= precision <= 18.
-        - fixed_len_byte_array: for precision > 18.
-
-        As a consequence, decimal columns stored in integer types are more compact.
-
-    **kwargs : optional
-        Additional options for ParquetWriter
-
-    Examples
-    --------
-    Generate an example PyArrow Table:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-
-    and write the Table into Parquet file:
-
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_table(table, "example.parquet")
-
-    Defining row group size for the Parquet file:
-
-    >>> pq.write_table(table, "example.parquet", row_group_size=3)
-
-    Defining row group compression (default is Snappy):
-
-    >>> pq.write_table(table, "example.parquet", compression="none")
-
-    Defining row group compression and encoding per-column:
-
-    >>> pq.write_table(
-    ...     table,
-    ...     "example.parquet",
-    ...     compression={"n_legs": "snappy", "animal": "gzip"},
-    ...     use_dictionary=["n_legs", "animal"],
-    ... )
-
-    Defining column encoding per-column:
-
-    >>> pq.write_table(
-    ...     table, "example.parquet", column_encoding={"animal": "PLAIN"}, use_dictionary=False
-    ... )
-    """
+) -> None: ...
 
 def write_to_dataset(
     table: Table,
@@ -1822,125 +305,7 @@ def write_to_dataset(
     existing_data_behavior: Literal["overwrite_or_ignore", "error", "delete_matching"]
     | None = None,
     **kwargs,
-) -> None:
-    """
-    Wrapper around dataset.write_dataset for writing a Table to
-    Parquet format by partitions.
-    For each combination of partition columns and values,
-    a subdirectories are created in the following
-        manner:
-
-        root_dir/
-          group1=value1
-            group2=value1
-              <uuid>.parquet
-            group2=value2
-              <uuid>.parquet
-          group1=valueN
-            group2=value1
-              <uuid>.parquet
-            group2=valueN
-              <uuid>.parquet
-
-    Parameters
-    ----------
-    table : pyarrow.Table
-    root_path : str, pathlib.Path
-        The root directory of the dataset.
-    partition_cols : list,
-        Column names by which to partition the dataset.
-        Columns are partitioned in the order they are given.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    schema : Schema, optional
-        This Schema of the dataset.
-    partitioning : Partitioning or list[str], optional
-        The partitioning scheme specified with the
-        ``pyarrow.dataset.partitioning()`` function or a list of field names.
-        When providing a list of field names, you can use
-        ``partitioning_flavor`` to drive which partitioning type should be
-        used.
-    basename_template : str, optional
-        A template string used to generate basenames of written data files.
-        The token '{i}' will be replaced with an automatically incremented
-        integer. If not specified, it defaults to "guid-{i}.parquet".
-    use_threads : bool, default True
-        Write files in parallel. If enabled, then maximum parallelism will be
-        used determined by the number of available CPU cores.
-    file_visitor : function
-        If set, this function will be called with a WrittenFile instance
-        for each file created during the call.  This object will have both
-        a path attribute and a metadata attribute.
-
-        The path attribute will be a string containing the path to
-        the created file.
-
-        The metadata attribute will be the parquet metadata of the file.
-        This metadata will have the file path attribute set and can be used
-        to build a _metadata file.  The metadata attribute will be None if
-        the format is not parquet.
-
-        Example visitor which simple collects the filenames created::
-
-            visited_paths = []
-
-            def file_visitor(written_file):
-                visited_paths.append(written_file.path)
-
-    existing_data_behavior : 'overwrite_or_ignore' | 'error' | 'delete_matching'
-        Controls how the dataset will handle data that already exists in
-        the destination. The default behaviour is 'overwrite_or_ignore'.
-
-        'overwrite_or_ignore' will ignore any existing data and will
-        overwrite files with the same name as an output file.  Other
-        existing files will be ignored.  This behavior, in combination
-        with a unique basename_template for each write, will allow for
-        an append workflow.
-
-        'error' will raise an error if any data exists in the destination.
-
-        'delete_matching' is useful when you are writing a partitioned
-        dataset.  The first time each partition directory is encountered
-        the entire directory will be deleted.  This allows you to overwrite
-        old partitions completely.
-    **kwargs : dict,
-        Used as additional kwargs for :func:`pyarrow.dataset.write_dataset`
-        function for matching kwargs, and remainder to
-        :func:`pyarrow.dataset.ParquetFileFormat.make_write_options`.
-        See the docstring of :func:`write_table` and
-        :func:`pyarrow.dataset.write_dataset` for the available options.
-        Using `metadata_collector` in kwargs allows one to collect the
-        file metadata instances of dataset pieces. The file paths in the
-        ColumnChunkMetaData will be set relative to `root_path`.
-
-    Examples
-    --------
-    Generate an example PyArrow Table:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-
-    and write it to a partitioned dataset:
-
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, root_path="dataset_name_3", partition_cols=["year"])
-    >>> pq.ParquetDataset("dataset_name_3").files
-    ['dataset_name_3/year=2019/...-0.parquet', ...
-
-    Write a single Parquet file into the root folder:
-
-    >>> pq.write_to_dataset(table, root_path="dataset_name_4")
-    >>> pq.ParquetDataset("dataset_name_4/").files
-    ['dataset_name_4/...-0.parquet']
-    """
+) -> None: ...
 
 def write_metadata(
     schema: Schema,
@@ -1948,131 +313,18 @@ def write_metadata(
     metadata_collector: list[FileMetaData] | None = None,
     filesystem: SupportedFileSystem | None = None,
     **kwargs,
-) -> None:
-    """
-    Write metadata-only Parquet file from schema. This can be used with
-    `write_to_dataset` to generate `_common_metadata` and `_metadata` sidecar
-    files.
-
-    Parameters
-    ----------
-    schema : pyarrow.Schema
-    where : string or pyarrow.NativeFile
-    metadata_collector : list
-        where to collect metadata information.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred from `where` if path-like, else
-        `where` is already a file-like object so no filesystem is needed.
-    **kwargs : dict,
-        Additional kwargs for ParquetWriter class. See docstring for
-        `ParquetWriter` for more information.
-
-    Examples
-    --------
-    Generate example data:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table(
-    ...     {
-    ...         "n_legs": [2, 2, 4, 4, 5, 100],
-    ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-
-    Write a dataset and collect metadata information.
-
-    >>> metadata_collector = []
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, "dataset_metadata", metadata_collector=metadata_collector)
-
-    Write the `_common_metadata` parquet file without row groups statistics.
-
-    >>> pq.write_metadata(table.schema, "dataset_metadata/_common_metadata")
-
-    Write the `_metadata` parquet file with row groups statistics.
-
-    >>> pq.write_metadata(
-    ...     table.schema, "dataset_metadata/_metadata", metadata_collector=metadata_collector
-    ... )
-    """
+) -> None: ...
 
 def read_metadata(
     where: str | Path | IO | NativeFile,
     memory_map: bool = False,
     decryption_properties: FileDecryptionProperties | None = None,
     filesystem: SupportedFileSystem | None = None,
-) -> FileMetaData:
-    """
-    Read FileMetaData from footer of a single Parquet file.
-
-    Parameters
-    ----------
-    where : str (file path) or file-like object
-    memory_map : bool, default False
-        Create memory map when the source is a file path.
-    decryption_properties : FileDecryptionProperties, default None
-        Decryption properties for reading encrypted Parquet files.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-
-    Returns
-    -------
-    metadata : FileMetaData
-        The metadata of the Parquet file
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.parquet as pq
-    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
-    >>> pq.write_table(table, "example.parquet")
-
-    >>> pq.read_metadata("example.parquet")
-    <pyarrow._parquet.FileMetaData object at ...>
-      created_by: parquet-cpp-arrow version ...
-      num_columns: 2
-      num_rows: 3
-      num_row_groups: 1
-      format_version: 2.6
-      serialized_size: ...
-    """
+) -> FileMetaData: ...
 
 def read_schema(
     where: str | Path | IO | NativeFile,
     memory_map: bool = False,
     decryption_properties: FileDecryptionProperties | None = None,
     filesystem: SupportedFileSystem | None = None,
-) -> Schema:
-    """
-    Read effective Arrow schema from Parquet file metadata.
-
-    Parameters
-    ----------
-    where : str (file path) or file-like object
-    memory_map : bool, default False
-        Create memory map when the source is a file path.
-    decryption_properties : FileDecryptionProperties, default None
-        Decryption properties for reading encrypted Parquet files.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-
-    Returns
-    -------
-    schema : pyarrow.Schema
-        The schema of the Parquet file
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.parquet as pq
-    >>> table = pa.table({"n_legs": [4, 5, 100], "animal": ["Dog", "Brittle stars", "Centipede"]})
-    >>> pq.write_table(table, "example.parquet")
-
-    >>> pq.read_schema("example.parquet")
-    n_legs: int64
-    animal: string
-    """
+) -> Schema: ...

From e23d97fb383145481e7708ba05ac91f449edfaca Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 16 Sep 2025 19:18:52 +0200
Subject: [PATCH 19/26] Remove docsting

---
 python/pyarrow-stubs/__init__.pyi             |    5 +
 python/pyarrow-stubs/_azurefs.pyi             |    1 +
 python/pyarrow-stubs/_compute.pyi             | 1464 +----
 python/pyarrow-stubs/_csv.pyi                 |  555 +-
 python/pyarrow-stubs/_cuda.pyi                |  567 +-
 python/pyarrow-stubs/_dataset.pyi             | 1940 +------
 python/pyarrow-stubs/_dataset_orc.pyi         |    1 +
 python/pyarrow-stubs/_dataset_parquet.pyi     |  212 +-
 .../_dataset_parquet_encryption.pyi           |   57 +-
 python/pyarrow-stubs/_feather.pyi             |    6 +-
 python/pyarrow-stubs/_flight.pyi              | 1177 +---
 python/pyarrow-stubs/_fs.pyi                  |  962 +---
 python/pyarrow-stubs/_gcsfs.pyi               |   64 +-
 python/pyarrow-stubs/_hdfs.pyi                |   58 +-
 python/pyarrow-stubs/_ipc.pyi                 |  632 +-
 python/pyarrow-stubs/_json.pyi                |  140 +-
 python/pyarrow-stubs/_parquet.pyi             |   38 +-
 python/pyarrow-stubs/_parquet_encryption.pyi  |   11 +-
 python/pyarrow-stubs/_s3fs.pyi                |   15 +-
 python/pyarrow-stubs/_stubs_typing.pyi        |   19 +-
 python/pyarrow-stubs/_substrait.pyi           |    7 +
 python/pyarrow-stubs/_types.pyi               | 4280 ++------------
 python/pyarrow-stubs/acero.pyi                |   15 +-
 python/pyarrow-stubs/array.pyi                | 3122 ++--------
 python/pyarrow-stubs/builder.pyi              |   83 +-
 python/pyarrow-stubs/compute.pyi              | 5085 +----------------
 python/pyarrow-stubs/config.pyi               |    6 +
 python/pyarrow-stubs/dataset.pyi              |   28 +-
 python/pyarrow-stubs/device.pyi               |   65 +-
 python/pyarrow-stubs/feather.pyi              |   13 +-
 python/pyarrow-stubs/fs.pyi                   |    3 +
 python/pyarrow-stubs/interchange/buffer.pyi   |    2 +
 python/pyarrow-stubs/interchange/column.pyi   |   15 +-
 .../pyarrow-stubs/interchange/dataframe.pyi   |    2 +
 .../interchange/from_dataframe.pyi            |   18 +-
 python/pyarrow-stubs/io.pyi                   | 1402 +----
 python/pyarrow-stubs/ipc.pyi                  |   19 +-
 python/pyarrow-stubs/lib.pyi                  |   36 +-
 python/pyarrow-stubs/memory.pyi               |  207 +-
 python/pyarrow-stubs/orc.pyi                  |  254 +-
 python/pyarrow-stubs/pandas_compat.pyi        |   11 +
 python/pyarrow-stubs/parquet/core.pyi         |   35 +-
 python/pyarrow-stubs/scalar.pyi               |  627 +-
 python/pyarrow-stubs/table.pyi                | 5026 +---------------
 python/pyarrow-stubs/tensor.pyi               |  660 +--
 python/pyarrow-stubs/types.pyi                |    5 +-
 python/pyarrow-stubs/util.pyi                 |    4 +
 47 files changed, 2946 insertions(+), 26008 deletions(-)

diff --git a/python/pyarrow-stubs/__init__.pyi b/python/pyarrow-stubs/__init__.pyi
index 6df38801de1..1a188eccd45 100644
--- a/python/pyarrow-stubs/__init__.pyi
+++ b/python/pyarrow-stubs/__init__.pyi
@@ -39,11 +39,13 @@ from pyarrow.lib import (
     set_io_thread_count,
 )
 
+
 def show_versions() -> None: ...
 def show_info() -> None: ...
 def _module_is_available(module: str) -> bool: ...
 def _filesystem_is_available(fs: str) -> bool: ...
 
+
 from pyarrow.lib import (
     null,
     bool_,
@@ -352,6 +354,8 @@ from pyarrow.ipc import (
 # ----------------------------------------------------------------------
 # Returning absolute path to the pyarrow include directory (if bundled, e.g. in
 # wheels)
+
+
 def get_include() -> str: ...
 def _get_pkg_config_executable() -> str: ...
 def _has_pkg_config(pkgname: str) -> bool: ...
@@ -360,6 +364,7 @@ def get_libraries() -> list[str]: ...
 def create_library_symlinks() -> None: ...
 def get_library_dirs() -> list[str]: ...
 
+
 __all__ = [
     "__version__",
     "_lib",
diff --git a/python/pyarrow-stubs/_azurefs.pyi b/python/pyarrow-stubs/_azurefs.pyi
index 37fcec2c9bd..2d866f34dbd 100644
--- a/python/pyarrow-stubs/_azurefs.pyi
+++ b/python/pyarrow-stubs/_azurefs.pyi
@@ -19,6 +19,7 @@ from typing import Literal
 
 from ._fs import FileSystem
 
+
 class AzureFileSystem(FileSystem):
     def __init__(
         self,
diff --git a/python/pyarrow-stubs/_compute.pyi b/python/pyarrow-stubs/_compute.pyi
index e8360b48edc..7742dbda539 100644
--- a/python/pyarrow-stubs/_compute.pyi
+++ b/python/pyarrow-stubs/_compute.pyi
@@ -31,158 +31,87 @@ from . import lib
 _Order: TypeAlias = Literal["ascending", "descending"]
 _Placement: TypeAlias = Literal["at_start", "at_end"]
 
+
 class Kernel(lib._Weakrefable):
-    """
-    A kernel object.
+    ...
 
-    Kernels handle the execution of a Function for a certain signature.
-    """
 
 class Function(lib._Weakrefable):
-    """
-    A compute function.
-
-    A function implements a certain logical computation over a range of
-    possible input signatures.  Each signature accepts a range of input
-    types and is implemented by a given Kernel.
-
-    Functions can be of different kinds:
-
-    * "scalar" functions apply an item-wise computation over all items
-      of their inputs.  Each item in the output only depends on the values
-      of the inputs at the same position.  Examples: addition, comparisons,
-      string predicates...
-
-    * "vector" functions apply a collection-wise computation, such that
-      each item in the output may depend on the values of several items
-      in each input.  Examples: dictionary encoding, sorting, extracting
-      unique values...
-
-    * "scalar_aggregate" functions reduce the dimensionality of the inputs by
-      applying a reduction function.  Examples: sum, min_max, mode...
-
-    * "hash_aggregate" functions apply a reduction function to an input
-      subdivided by grouping criteria.  They may not be directly called.
-      Examples: hash_sum, hash_min_max...
-
-    * "meta" functions dispatch to other functions.
-    """
     @property
-    def arity(self) -> int:
-        """
-        The function arity.
+    def arity(self) -> int: ...
 
-        If Ellipsis (i.e. `...`) is returned, the function takes a variable
-        number of arguments.
-        """
     @property
     def kind(
         self,
-    ) -> Literal["scalar", "vector", "scalar_aggregate", "hash_aggregate", "meta"]:
-        """
-        The function kind.
-        """
+    ) -> Literal["scalar", "vector", "scalar_aggregate", "hash_aggregate", "meta"]: ...
     @property
-    def name(self) -> str:
-        """
-        The function name.
-        """
+    def name(self) -> str: ...
     @property
-    def num_kernels(self) -> int:
-        """
-        The number of kernels implementing this function.
-        """
+    def num_kernels(self) -> int: ...
+
     def call(
         self,
         args: Iterable,
         options: FunctionOptions | None = None,
         memory_pool: lib.MemoryPool | None = None,
         length: int | None = None,
-    ) -> Any:
-        """
-        Call the function on the given arguments.
-
-        Parameters
-        ----------
-        args : iterable
-            The arguments to pass to the function.  Accepted types depend
-            on the specific function.
-        options : FunctionOptions, optional
-            Options instance for executing this function.  This should have
-            the right concrete options type.
-        memory_pool : pyarrow.MemoryPool, optional
-            If not passed, will allocate memory from the default memory pool.
-        length : int, optional
-            Batch size for execution, for nullary (no argument) functions. If
-            not passed, will be inferred from passed data.
-        """
+    ) -> Any: ...
+
 
 class FunctionOptions(lib._Weakrefable):
     def serialize(self) -> lib.Buffer: ...
     @classmethod
     def deserialize(cls, buf: lib.Buffer) -> FunctionOptions: ...
 
+
 class FunctionRegistry(lib._Weakrefable):
-    def get_function(self, name: str) -> Function:
-        """
-        Look up a function by name in the registry.
-
-        Parameters
-        ----------
-        name : str
-            The name of the function to lookup
-        """
-
-    def list_functions(self) -> list[str]:
-        """
-        Return all function names in the registry.
-        """
-
-class HashAggregateFunction(Function): ...
-class HashAggregateKernel(Kernel): ...
-class ScalarAggregateFunction(Function): ...
-class ScalarAggregateKernel(Kernel): ...
-class ScalarFunction(Function): ...
-class ScalarKernel(Kernel): ...
-class VectorFunction(Function): ...
-class VectorKernel(Kernel): ...
+    def get_function(self, name: str) -> Function: ...
+    def list_functions(self) -> list[str]: ...
+
+
+class HashAggregateFunction(Function):
+    ...
+
+
+class HashAggregateKernel(Kernel):
+    ...
+
+
+class ScalarAggregateFunction(Function):
+    ...
+
+
+class ScalarAggregateKernel(Kernel):
+    ...
+
+
+class ScalarFunction(Function):
+    ...
+
+
+class ScalarKernel(Kernel):
+    ...
+
+
+class VectorFunction(Function):
+    ...
+
+
+class VectorKernel(Kernel):
+    ...
 
 # ==================== _compute.pyx Option classes ====================
+
+
 class ArraySortOptions(FunctionOptions):
-    """
-    Options for the `array_sort_indices` function.
-
-    Parameters
-    ----------
-    order : str, default "ascending"
-        Which order to sort values in.
-        Accepted values are "ascending", "descending".
-    null_placement : str, default "at_end"
-        Where nulls in the input should be sorted.
-        Accepted values are "at_start", "at_end".
-    """
     def __init__(
         self,
         order: _Order = "ascending",
         null_placement: _Placement = "at_end",
     ) -> None: ...
 
-class AssumeTimezoneOptions(FunctionOptions):
-    """
-    Options for the `assume_timezone` function.
-
-    Parameters
-    ----------
-    timezone : str
-        Timezone to assume for the input.
-    ambiguous : str, default "raise"
-        How to handle timestamps that are ambiguous in the assumed timezone.
-        Accepted values are "raise", "earliest", "latest".
-    nonexistent : str, default "raise"
-        How to handle timestamps that don't exist in the assumed timezone.
-        Accepted values are "raise", "earliest", "latest".
-    """
 
+class AssumeTimezoneOptions(FunctionOptions):
     def __init__(
         self,
         timezone: str,
@@ -191,28 +120,8 @@ class AssumeTimezoneOptions(FunctionOptions):
         nonexistent: Literal["raise", "earliest", "latest"] = "raise",
     ) -> None: ...
 
-class CastOptions(FunctionOptions):
-    """
-    Options for the `cast` function.
-
-    Parameters
-    ----------
-    target_type : DataType, optional
-        The PyArrow type to cast to.
-    allow_int_overflow : bool, default False
-        Whether integer overflow is allowed when casting.
-    allow_time_truncate : bool, default False
-        Whether time precision truncation is allowed when casting.
-    allow_time_overflow : bool, default False
-        Whether date/time range overflow is allowed when casting.
-    allow_decimal_truncate : bool, default False
-        Whether decimal precision truncation is allowed when casting.
-    allow_float_truncate : bool, default False
-        Whether floating-point precision truncation is allowed when casting.
-    allow_invalid_utf8 : bool, default False
-        Whether producing invalid utf8 data is allowed when casting.
-    """
 
+class CastOptions(FunctionOptions):
     allow_int_overflow: bool
     allow_time_truncate: bool
     allow_time_overflow: bool
@@ -237,190 +146,68 @@ class CastOptions(FunctionOptions):
     def unsafe(target_type: lib.DataType | None = None) -> CastOptions: ...
     def is_safe(self) -> bool: ...
 
+
 class CountOptions(FunctionOptions):
-    """
-    Options for the `count` function.
+    def __init__(self, mode: Literal["only_valid",
+                 "only_null", "all"] = "only_valid") -> None: ...
 
-    Parameters
-    ----------
-    mode : str, default "only_valid"
-        Which values to count in the input.
-        Accepted values are "only_valid", "only_null", "all".
-    """
-    def __init__(self, mode: Literal["only_valid", "only_null", "all"] = "only_valid") -> None: ...
 
 class CumulativeOptions(FunctionOptions):
-    """
-    Options for `cumulative_*` functions.
-
-    - cumulative_sum
-    - cumulative_sum_checked
-    - cumulative_prod
-    - cumulative_prod_checked
-    - cumulative_max
-    - cumulative_min
-
-    Parameters
-    ----------
-    start : Scalar, default None
-        Starting value for the cumulative operation. If none is given,
-        a default value depending on the operation and input type is used.
-    skip_nulls : bool, default False
-        When false, the first encountered null is propagated.
-    """
-    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
+    def __init__(self, start: lib.Scalar | None = None,
+                 *, skip_nulls: bool = False) -> None: ...
+
 
 class CumulativeSumOptions(FunctionOptions):
-    """
-    Options for `cumulative_sum` function.
-
-    Parameters
-    ----------
-    start : Scalar, default None
-        Starting value for sum computation
-    skip_nulls : bool, default False
-        When false, the first encountered null is propagated.
-    """
-    def __init__(self, start: lib.Scalar | None = None, *, skip_nulls: bool = False) -> None: ...
+    def __init__(self, start: lib.Scalar | None = None,
+                 *, skip_nulls: bool = False) -> None: ...
 
-class DayOfWeekOptions(FunctionOptions):
-    """
-    Options for the `day_of_week` function.
 
-    Parameters
-    ----------
-    count_from_zero : bool, default True
-        If True, number days from 0, otherwise from 1.
-    week_start : int, default 1
-        Which day does the week start with (Monday=1, Sunday=7).
-        How this value is numbered is unaffected by `count_from_zero`.
-    """
+class DayOfWeekOptions(FunctionOptions):
+    def __init__(self, *, count_from_zero: bool = True,
+                 week_start: int = 1) -> None: ...
 
-    def __init__(self, *, count_from_zero: bool = True, week_start: int = 1) -> None: ...
 
 class DictionaryEncodeOptions(FunctionOptions):
-    """
-    Options for dictionary encoding.
-
-    Parameters
-    ----------
-    null_encoding : str, default "mask"
-        How to encode nulls in the input.
-        Accepted values are "mask" (null inputs emit a null in the indices
-        array), "encode" (null inputs emit a non-null index pointing to
-        a null value in the dictionary array).
-    """
     def __init__(self, null_encoding: Literal["mask", "encode"] = "mask") -> None: ...
 
-class RunEndEncodeOptions(FunctionOptions):
-    """
-    Options for run-end encoding.
 
-    Parameters
-    ----------
-    run_end_type : DataType, default pyarrow.int32()
-        The data type of the run_ends array.
-
-        Accepted values are pyarrow.{int16(), int32(), int64()}.
-    """
+class RunEndEncodeOptions(FunctionOptions):
     # TODO: default is DataType(int32)
     def __init__(self, run_end_type: lib.DataType = ...) -> None: ...
 
+
 class ElementWiseAggregateOptions(FunctionOptions):
-    """
-    Options for element-wise aggregate functions.
-
-    Parameters
-    ----------
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    """
     def __init__(self, *, skip_nulls: bool = True) -> None: ...
 
+
 class ExtractRegexOptions(FunctionOptions):
-    """
-    Options for the `extract_regex` function.
-
-    Parameters
-    ----------
-    pattern : str
-        Regular expression with named capture fields.
-    """
     def __init__(self, pattern: str) -> None: ...
 
+
 class ExtractRegexSpanOptions(FunctionOptions):
-    """
-    Options for the `extract_regex_span` function.
-
-    Parameters
-    ----------
-    pattern : str
-        Regular expression with named capture fields.
-    """
     def __init__(self, pattern: str) -> None: ...
 
-class FilterOptions(FunctionOptions):
-    """
-    Options for selecting with a boolean filter.
 
-    Parameters
-    ----------
-    null_selection_behavior : str, default "drop"
-        How to handle nulls in the selection filter.
-        Accepted values are "drop", "emit_null".
-    """
+class FilterOptions(FunctionOptions):
+    def __init__(
+        self, null_selection_behavior: Literal["drop", "emit_null"] = "drop") -> None: ...
 
-    def __init__(self, null_selection_behavior: Literal["drop", "emit_null"] = "drop") -> None: ...
 
 class IndexOptions(FunctionOptions):
-    """
-    Options for the `index` function.
-
-    Parameters
-    ----------
-    value : Scalar
-        The value to search for.
-    """
     def __init__(self, value: lib.Scalar) -> None: ...
 
+
 class JoinOptions(FunctionOptions):
-    """
-    Options for the `binary_join_element_wise` function.
-
-    Parameters
-    ----------
-    null_handling : str, default "emit_null"
-        How to handle null values in the inputs.
-        Accepted values are "emit_null", "skip", "replace".
-    null_replacement : str, default ""
-        Replacement string to emit for null inputs if `null_handling`
-        is "replace".
-    """
     @overload
-    def __init__(self, null_handling: Literal["emit_null", "skip"] = "emit_null") -> None: ...
+    def __init__(
+        self, null_handling: Literal["emit_null", "skip"] = "emit_null") -> None: ...
+
     @overload
-    def __init__(self, null_handling: Literal["replace"], null_replacement: str = "") -> None: ...
+    def __init__(self, null_handling: Literal["replace"],
+                 null_replacement: str = "") -> None: ...
+
 
 class ListSliceOptions(FunctionOptions):
-    """
-    Options for list array slicing.
-
-    Parameters
-    ----------
-    start : int
-        Index to start slicing inner list elements (inclusive).
-    stop : Optional[int], default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end. (NotImplemented)
-    step : int, default 1
-        Slice step.
-    return_fixed_size_list : Optional[bool], default None
-        Whether to return a FixedSizeListArray. If true _and_ stop is after
-        a list element's length, nulls will be appended to create the
-        requested slice size. The default of `None` will return the same
-        type which was passed in.
-    """
     def __init__(
         self,
         start: int,
@@ -429,32 +216,12 @@ class ListSliceOptions(FunctionOptions):
         return_fixed_size_list: bool | None = None,
     ) -> None: ...
 
+
 class ListFlattenOptions(FunctionOptions):
-    """
-    Options for `list_flatten` function
-
-    Parameters
-    ----------
-    recursive : bool, default False
-        When True, the list array is flattened recursively until an array
-        of non-list values is formed.
-    """
     def __init__(self, recursive: bool = False) -> None: ...
 
+
 class MakeStructOptions(FunctionOptions):
-    """
-    Options for the `make_struct` function.
-
-    Parameters
-    ----------
-    field_names : sequence of str
-        Names of the struct fields to create.
-    field_nullability : sequence of bool, optional
-        Nullability information for each struct field.
-        If omitted, all fields are nullable.
-    field_metadata : sequence of KeyValueMetadata, optional
-        Metadata for each struct field.
-    """
     def __init__(
         self,
         field_names: Sequence[str] = (),
@@ -463,199 +230,63 @@ class MakeStructOptions(FunctionOptions):
         field_metadata: Sequence[lib.KeyValueMetadata] | None = None,
     ) -> None: ...
 
+
 class MapLookupOptions(FunctionOptions):
-    """
-    Options for the `map_lookup` function.
-
-    Parameters
-    ----------
-    query_key : Scalar or Object can be converted to Scalar
-        The key to search for.
-    occurrence : str
-        The occurrence(s) to return from the Map
-        Accepted values are "first", "last", or "all".
-    """
     # TODO: query_key: Scalar or Object can be converted to Scalar
     def __init__(
         self, query_key: lib.Scalar, occurrence: Literal["first", "last", "all"]
     ) -> None: ...
 
-class MatchSubstringOptions(FunctionOptions):
-    """
-    Options for looking for a substring.
-
-    Parameters
-    ----------
-    pattern : str
-        Substring pattern to look for inside input values.
-    ignore_case : bool, default False
-        Whether to perform a case-insensitive match.
-    """
 
+class MatchSubstringOptions(FunctionOptions):
     def __init__(self, pattern: str, *, ignore_case: bool = False) -> None: ...
 
+
 class ModeOptions(FunctionOptions):
-    """
-    Options for the `mode` function.
-
-    Parameters
-    ----------
-    n : int, default 1
-        Number of distinct most-common values to return.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
-    def __init__(self, n: int = 1, *, skip_nulls: bool = True, min_count: int = 0) -> None: ...
+    def __init__(self, n: int = 1, *, skip_nulls: bool = True,
+                 min_count: int = 0) -> None: ...
+
 
 class NullOptions(FunctionOptions):
-    """
-    Options for the `is_null` function.
-
-    Parameters
-    ----------
-    nan_is_null : bool, default False
-        Whether floating-point NaN values are considered null.
-    """
     def __init__(self, *, nan_is_null: bool = False) -> None: ...
 
+
 class PadOptions(FunctionOptions):
-    """
-    Options for padding strings.
-
-    Parameters
-    ----------
-    width : int
-        Desired string length.
-    padding : str, default " "
-        What to pad the string with. Should be one byte or codepoint.
-    lean_left_on_odd_padding : bool, default True
-        What to do if there is an odd number of padding characters (in case
-        of centered padding). Defaults to aligning on the left (i.e. adding
-        the extra padding character on the right).
-    """
     def __init__(
         self, width: int, padding: str = " ", lean_left_on_odd_padding: bool = True
     ) -> None: ...
 
+
 class PairwiseOptions(FunctionOptions):
-    """
-    Options for `pairwise` functions.
-
-    Parameters
-    ----------
-    period : int, default 1
-        Period for applying the period function.
-    """
     def __init__(self, period: int = 1) -> None: ...
 
+
 class PartitionNthOptions(FunctionOptions):
-    """
-    Options for the `partition_nth_indices` function.
-
-    Parameters
-    ----------
-    pivot : int
-        Index into the equivalent sorted array of the pivot element.
-    null_placement : str, default "at_end"
-        Where nulls in the input should be partitioned.
-        Accepted values are "at_start", "at_end".
-    """
-    def __init__(self, pivot: int, *, null_placement: _Placement = "at_end") -> None: ...
+    def __init__(self, pivot: int, *,
+                 null_placement: _Placement = "at_end") -> None: ...
+
 
 class WinsorizeOptions(FunctionOptions):
-    """
-    Options for the `winsorize` function.
-
-    Parameters
-    ----------
-    lower_limit : float, between 0 and 1
-        The quantile below which all values are replaced with the quantile's value.
-    upper_limit : float, between 0 and 1
-        The quantile above which all values are replaced with the quantile's value.
-    """
     def __init__(self, lower_limit: float, upper_limit: float) -> None: ...
 
+
 class QuantileOptions(FunctionOptions):
-    """
-    Options for the `quantile` function.
-
-    Parameters
-    ----------
-    q : double or sequence of double, default 0.5
-        Probability levels of the quantiles to compute. All values must be in
-        [0, 1].
-    interpolation : str, default "linear"
-        How to break ties between competing data points for a given quantile.
-        Accepted values are:
-
-        - "linear": compute an interpolation
-        - "lower": always use the smallest of the two data points
-        - "higher": always use the largest of the two data points
-        - "nearest": select the data point that is closest to the quantile
-        - "midpoint": compute the (unweighted) mean of the two data points
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
     def __init__(
         self,
         q: float | Sequence[float],
         *,
-        interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"] = "linear",
+        interpolation: Literal["linear", "lower",
+                               "higher", "nearest", "midpoint"] = "linear",
         skip_nulls: bool = True,
         min_count: int = 0,
     ) -> None: ...
 
+
 class RandomOptions(FunctionOptions):
-    """
-    Options for random generation.
-
-    Parameters
-    ----------
-    initializer : int or str
-        How to initialize the underlying random generator.
-        If an integer is given, it is used as a seed.
-        If "system" is given, the random generator is initialized with
-        a system-specific source of (hopefully true) randomness.
-        Other values are invalid.
-    """
     def __init__(self, *, initializer: int | Literal["system"] = "system") -> None: ...
 
+
 class RankOptions(FunctionOptions):
-    """
-    Options for the `rank` function.
-
-    Parameters
-    ----------
-    sort_keys : sequence of (name, order) tuples or str, default "ascending"
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-        Alternatively, one can simply pass "ascending" or "descending" as a string
-        if the input is array-like.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted.
-        Accepted values are "at_start", "at_end".
-    tiebreaker : str, default "first"
-        Configure how ties between equal values are handled.
-        Accepted values are:
-
-        - "min": Ties get the smallest possible rank in sorted order.
-        - "max": Ties get the largest possible rank in sorted order.
-        - "first": Ranks are assigned in order of when ties appear in the
-                   input. This ensures the ranks are a stable permutation
-                   of the input.
-        - "dense": The ranks span a dense [1, M] interval where M is the
-                   number of distinct values in the input.
-    """
     def __init__(
         self,
         sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
@@ -664,24 +295,8 @@ class RankOptions(FunctionOptions):
         tiebreaker: Literal["min", "max", "first", "dense"] = "first",
     ) -> None: ...
 
-class RankQuantileOptions(FunctionOptions):
-    """
-    Options for the `rank_quantile` function.
-
-    Parameters
-    ----------
-    sort_keys : sequence of (name, order) tuples or str, default "ascending"
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-        Alternatively, one can simply pass "ascending" or "descending" as a string
-        if the input is array-like.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted.
-        Accepted values are "at_start", "at_end".
-    """
 
+class RankQuantileOptions(FunctionOptions):
     def __init__(
         self,
         sort_keys: _Order | Sequence[tuple[str, _Order]] = "ascending",
@@ -689,22 +304,8 @@ class RankQuantileOptions(FunctionOptions):
         null_placement: _Placement = "at_end",
     ) -> None: ...
 
+
 class PivotWiderOptions(FunctionOptions):
-    """
-    Options for the `pivot_wider` function.
-
-    Parameters
-    ----------
-    key_names : sequence of str
-        The pivot key names expected in the pivot key column.
-        For each entry in `key_names`, a column with the same name is emitted
-        in the struct output.
-    unexpected_key_behavior : str, default "ignore"
-        The behavior when pivot keys not in `key_names` are encountered.
-        Accepted values are "ignore", "raise".
-        If "ignore", unexpected keys are silently ignored.
-        If "raise", unexpected keys raise a KeyError.
-    """
     def __init__(
         self,
         key_names: Sequence[str],
@@ -712,39 +313,17 @@ class PivotWiderOptions(FunctionOptions):
         unexpected_key_behavior: Literal["ignore", "raise"] = "ignore",
     ) -> None: ...
 
+
 class ReplaceSliceOptions(FunctionOptions):
-    """
-    Options for replacing slices.
-
-    Parameters
-    ----------
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int
-        Index to stop slicing at (exclusive).
-    replacement : str
-        What to replace the slice with.
-    """
     def __init__(self, start: int, stop: int, replacement: str) -> None: ...
 
+
 class ReplaceSubstringOptions(FunctionOptions):
-    """
-    Options for replacing matched substrings.
-
-    Parameters
-    ----------
-    pattern : str
-        Substring pattern to look for inside input values.
-    replacement : str
-        What to replace the pattern with.
-    max_replacements : int or None, default None
-        The maximum number of strings to replace in each
-        input value (unlimited if None).
-    """
     def __init__(
         self, pattern: str, replacement: str, *, max_replacements: int | None = None
     ) -> None: ...
 
+
 _RoundMode: TypeAlias = Literal[
     "down",
     "up",
@@ -758,43 +337,22 @@ _RoundMode: TypeAlias = Literal[
     "half_to_odd",
 ]
 
+
 class RoundBinaryOptions(FunctionOptions):
-    """
-    Options for rounding numbers when ndigits is provided by a second array
-
-    Parameters
-    ----------
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    """
     def __init__(
         self,
         round_mode: _RoundMode = "half_to_even",
     ) -> None: ...
 
+
 class RoundOptions(FunctionOptions):
-    """
-    Options for rounding numbers.
-
-    Parameters
-    ----------
-    ndigits : int, default 0
-        Number of fractional digits to round to.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    """
     def __init__(
         self,
         ndigits: int = 0,
         round_mode: _RoundMode = "half_to_even",
     ) -> None: ...
 
+
 _DateTimeUint: TypeAlias = Literal[
     "year",
     "quarter",
@@ -809,48 +367,8 @@ _DateTimeUint: TypeAlias = Literal[
     "nanosecond",
 ]
 
+
 class RoundTemporalOptions(FunctionOptions):
-    """
-    Options for rounding temporal values.
-
-    Parameters
-    ----------
-    multiple : int, default 1
-        Number of units to round to.
-    unit : str, default "day"
-        The unit in which `multiple` is expressed.
-        Accepted values are "year", "quarter", "month", "week", "day",
-        "hour", "minute", "second", "millisecond", "microsecond",
-        "nanosecond".
-    week_starts_monday : bool, default True
-        If True, weeks start on Monday; if False, on Sunday.
-    ceil_is_strictly_greater : bool, default False
-        If True, ceil returns a rounded value that is strictly greater than the
-        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
-        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
-        if set to False.
-        This applies to the ceil_temporal function only.
-    calendar_based_origin : bool, default False
-        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
-        rounding origin will be beginning of one less precise calendar unit.
-        E.g.: rounding to hours will use beginning of day as origin.
-
-        By default time is rounded to a multiple of units since
-        1970-01-01T00:00:00. By setting calendar_based_origin to true,
-        time will be rounded to number of units since the last greater
-        calendar unit.
-        For example: rounding to multiple of days since the beginning of the
-        month or to hours since the beginning of the day.
-        Exceptions: week and quarter are not used as greater units,
-        therefore days will be rounded to the beginning of the month not
-        week. Greater unit of week is a year.
-        Note that ceiling and rounding might change sorting order of an array
-        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
-        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
-        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
-        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
-        order of an already ordered array.
-    """
     def __init__(
         self,
         multiple: int = 1,
@@ -861,223 +379,67 @@ class RoundTemporalOptions(FunctionOptions):
         calendar_based_origin: bool = False,
     ) -> None: ...
 
+
 class RoundToMultipleOptions(FunctionOptions):
-    """
-    Options for rounding numbers to a multiple.
-
-    Parameters
-    ----------
-    multiple : numeric scalar, default 1.0
-        Multiple to round to. Should be a scalar of a type compatible
-        with the argument to be rounded.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    """
-    def __init__(self, multiple: float = 1.0, round_mode: _RoundMode = "half_to_even") -> None: ...
+    def __init__(self, multiple: float = 1.0,
+                 round_mode: _RoundMode = "half_to_even") -> None: ...
+
 
 class ScalarAggregateOptions(FunctionOptions):
-    """
-    Options for scalar aggregations.
-
-    Parameters
-    ----------
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
     def __init__(self, *, skip_nulls: bool = True, min_count: int = 1) -> None: ...
 
-class SelectKOptions(FunctionOptions):
-    """
-    Options for top/bottom k-selection.
-
-    Parameters
-    ----------
-    k : int
-        Number of leading values to select in sorted order
-        (i.e. the largest values if sort order is "descending",
-        the smallest otherwise).
-    sort_keys : sequence of (name, order) tuples
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-    """
 
+class SelectKOptions(FunctionOptions):
     def __init__(self, k: int, sort_keys: Sequence[tuple[str, _Order]]) -> None: ...
 
+
 class SetLookupOptions(FunctionOptions):
-    """
-    Options for the `is_in` and `index_in` functions.
-
-    Parameters
-    ----------
-    value_set : Array
-        Set of values to look for in the input.
-    skip_nulls : bool, default False
-        If False, nulls in the input are matched in the value_set just
-        like regular values.
-        If True, nulls in the input always fail matching.
-    """
     def __init__(self, value_set: lib.Array, *, skip_nulls: bool = True) -> None: ...
 
-class SliceOptions(FunctionOptions):
-    """
-    Options for slicing.
-
-    Parameters
-    ----------
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int or None, default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end.
-    step : int, default 1
-        Slice step.
-    """
 
+class SliceOptions(FunctionOptions):
     def __init__(self, start: int, stop: int | None = None, step: int = 1) -> None: ...
 
+
 class SortOptions(FunctionOptions):
-    """
-    Options for the `sort_indices` function.
-
-    Parameters
-    ----------
-    sort_keys : sequence of (name, order) tuples
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted, only applying to
-        columns/fields mentioned in `sort_keys`.
-        Accepted values are "at_start", "at_end".
-    """
     def __init__(
         self, sort_keys: Sequence[tuple[str, _Order]], *, null_placement: _Placement = "at_end"
     ) -> None: ...
 
-class SplitOptions(FunctionOptions):
-    """
-    Options for splitting on whitespace.
 
-    Parameters
-    ----------
-    max_splits : int or None, default None
-        Maximum number of splits for each input value (unlimited if None).
-    reverse : bool, default False
-        Whether to start splitting from the end of each input value.
-        This only has an effect if `max_splits` is not None.
-    """
+class SplitOptions(FunctionOptions):
+    def __init__(self, *, max_splits: int | None = None,
+                 reverse: bool = False) -> None: ...
 
-    def __init__(self, *, max_splits: int | None = None, reverse: bool = False) -> None: ...
 
 class SplitPatternOptions(FunctionOptions):
-    """
-    Options for splitting on a string pattern.
-
-    Parameters
-    ----------
-    pattern : str
-        String pattern to split on.
-    max_splits : int or None, default None
-        Maximum number of splits for each input value (unlimited if None).
-    reverse : bool, default False
-        Whether to start splitting from the end of each input value.
-        This only has an effect if `max_splits` is not None.
-    """
     def __init__(
         self, pattern: str, *, max_splits: int | None = None, reverse: bool = False
     ) -> None: ...
 
+
 class StrftimeOptions(FunctionOptions):
-    """
-    Options for the `strftime` function.
-
-    Parameters
-    ----------
-    format : str, default "%Y-%m-%dT%H:%M:%S"
-        Pattern for formatting input values.
-    locale : str, default "C"
-        Locale to use for locale-specific format specifiers.
-    """
-    def __init__(self, format: str = "%Y-%m-%dT%H:%M:%S", locale: str = "C") -> None: ...
+    def __init__(self, format: str = "%Y-%m-%dT%H:%M:%S",
+                 locale: str = "C") -> None: ...
+
 
 class StrptimeOptions(FunctionOptions):
-    """
-    Options for the `strptime` function.
-
-    Parameters
-    ----------
-    format : str
-        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
-        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
-        There are differences in behavior, for example how the "%y" placeholder
-        handles years with less than four digits.
-    unit : str
-        Timestamp unit of the output.
-        Accepted values are "s", "ms", "us", "ns".
-    error_is_null : boolean, default False
-        Return null on parsing errors if true or raise if false.
-    """
     def __init__(
         self, format: str, unit: Literal["s", "ms", "us", "ns"], error_is_null: bool = False
     ) -> None: ...
 
+
 class StructFieldOptions(FunctionOptions):
-    """
-    Options for the `struct_field` function.
-
-    Parameters
-    ----------
-    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
-        List of indices for chained field lookup, for example `[4, 1]`
-        will look up the second nested field in the fifth outer field.
-    """
     def __init__(
         self, indices: list[str] | list[bytes] | list[int] | Expression | bytes | str | int
     ) -> None: ...
 
+
 class TakeOptions(FunctionOptions):
-    """
-    Options for the `take` and `array_take` functions.
-
-    Parameters
-    ----------
-    boundscheck : boolean, default True
-        Whether to check indices are within bounds. If False and an
-        index is out of bounds, behavior is undefined (the process
-        may crash).
-    """
     def __init__(self, boundscheck: bool = True) -> None: ...
 
+
 class TDigestOptions(FunctionOptions):
-    """
-    Options for the `tdigest` function.
-
-    Parameters
-    ----------
-    q : double or sequence of double, default 0.5
-        Probability levels of the quantiles to approximate. All values must be
-        in [0, 1].
-    delta : int, default 100
-        Compression parameter for the T-digest algorithm.
-    buffer_size : int, default 500
-        Buffer size for the T-digest algorithm.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
     def __init__(
         self,
         q: float | Sequence[float] = 0.5,
@@ -1088,85 +450,27 @@ class TDigestOptions(FunctionOptions):
         min_count: int = 0,
     ) -> None: ...
 
+
 class TrimOptions(FunctionOptions):
-    """
-    Options for trimming characters from strings.
-
-    Parameters
-    ----------
-    characters : str
-        Individual characters to be trimmed from the string.
-    """
     def __init__(self, characters: str) -> None: ...
 
-class Utf8NormalizeOptions(FunctionOptions):
-    """
-    Options for the `utf8_normalize` function.
-
-    Parameters
-    ----------
-    form : str
-        Unicode normalization form.
-        Accepted values are "NFC", "NFKC", "NFD", NFKD".
-    """
 
+class Utf8NormalizeOptions(FunctionOptions):
     def __init__(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: ...
 
+
 class VarianceOptions(FunctionOptions):
-    """
-    Options for the `variance` and `stddev` functions.
-
-    Parameters
-    ----------
-    ddof : int, default 0
-        Number of degrees of freedom.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
-    def __init__(self, *, ddof: int = 0, skip_nulls: bool = True, min_count: int = 0) -> None: ...
+    def __init__(self, *, ddof: int = 0, skip_nulls: bool = True,
+                 min_count: int = 0) -> None: ...
+
 
 class SkewOptions(FunctionOptions):
-    """
-    Options for the `skew` and `kurtosis` functions.
-
-    Parameters
-    ----------
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    biased : bool, default True
-        Whether the calculated value is biased.
-        If False, the value computed includes a correction factor to reduce bias.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    """
     def __init__(
         self, *, skip_nulls: bool = True, biased: bool = True, min_count: int = 0
     ) -> None: ...
 
+
 class WeekOptions(FunctionOptions):
-    """
-    Options for the `week` function.
-
-    Parameters
-    ----------
-    week_starts_monday : bool, default True
-        If True, weeks start on Monday; if False, on Sunday.
-    count_from_zero : bool, default False
-        If True, dates at the start of a year that fall into the last week
-        of the previous year emit 0.
-        If False, they emit 52 or 53 (the week number of the last week
-        of the previous year).
-    first_week_is_fully_in_year : bool, default False
-        If True, week number 0 is fully in January.
-        If False, a week that begins on December 29, 30 or 31 is considered
-        to be week number 0 of the following year.
-    """
     def __init__(
         self,
         *,
@@ -1177,76 +481,31 @@ class WeekOptions(FunctionOptions):
 
 # ==================== _compute.pyx Functions ====================
 
+
 def call_function(
     name: str,
     args: list,
     options: FunctionOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
     length: int | None = None,
-) -> Any:
-    """
-    Call a named function.
-
-    The function is looked up in the global registry
-    (as returned by `function_registry()`).
-
-    Parameters
-    ----------
-    name : str
-        The name of the function to call.
-    args : list
-        The arguments to the function.
-    options : optional
-        options provided to the function.
-    memory_pool : MemoryPool, optional
-        memory pool to use for allocations during function execution.
-    length : int, optional
-        Batch size for execution, for nullary (no argument) functions. If not
-        passed, inferred from data.
-    """
-
+) -> Any: ...
 def function_registry() -> FunctionRegistry: ...
-def get_function(name: str) -> Function:
-    """
-    Get a function by name.
-
-    The function is looked up in the global registry
-    (as returned by `function_registry()`).
-
-    Parameters
-    ----------
-    name : str
-        The name of the function to lookup
-    """
-
-def list_functions() -> list[str]:
-    """
-    Return all function names in the global registry.
-    """
+def get_function(name: str) -> Function: ...
+def list_functions() -> list[str]: ...
 
 # ==================== _compute.pyx Udf ====================
 
+
 def call_tabular_function(
     function_name: str, args: Iterable | None = None, func_registry: FunctionRegistry | None = None
-) -> lib.RecordBatchReader:
-    """
-    Get a record batch iterator from a tabular function.
-
-    Parameters
-    ----------
-    function_name : str
-        Name of the function.
-    args : iterable
-        The arguments to pass to the function.  Accepted types depend
-        on the specific function.  Currently, only an empty args is supported.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-    """
+) -> lib.RecordBatchReader: ...
+
 
 class _FunctionDoc(TypedDict):
     summary: str
     description: str
 
+
 def register_scalar_function(
     func: Callable,
     function_name: str,
@@ -1254,80 +513,8 @@ def register_scalar_function(
     in_types: dict[str, lib.DataType],
     out_type: lib.DataType,
     func_registry: FunctionRegistry | None = None,
-) -> None:
-    """
-    Register a user-defined scalar function.
-
-    This API is EXPERIMENTAL.
-
-    A scalar function is a function that executes elementwise
-    operations on arrays or scalars, i.e. a scalar function must
-    be computed row-by-row with no state where each output row
-    is computed only from its corresponding input row.
-    In other words, all argument arrays have the same length,
-    and the output array is of the same length as the arguments.
-    Scalar functions are the only functions allowed in query engine
-    expressions.
-
-    Parameters
-    ----------
-    func : callable
-        A callable implementing the user-defined function.
-        The first argument is the context argument of type
-        UdfContext.
-        Then, it must take arguments equal to the number of
-        in_types defined. It must return an Array or Scalar
-        matching the out_type. It must return a Scalar if
-        all arguments are scalar, else it must return an Array.
-
-        To define a varargs function, pass a callable that takes
-        *args. The last in_type will be the type of all varargs
-        arguments.
-    function_name : str
-        Name of the function. There should only be one function
-        registered with this name in the function registry.
-    function_doc : dict
-        A dictionary object with keys "summary" (str),
-        and "description" (str).
-    in_types : Dict[str, DataType]
-        A dictionary mapping function argument names to
-        their respective DataType.
-        The argument names will be used to generate
-        documentation for the function. The number of
-        arguments specified here determines the function
-        arity.
-    out_type : DataType
-        Output type of the function.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>>
-    >>> func_doc = {}
-    >>> func_doc["summary"] = "simple udf"
-    >>> func_doc["description"] = "add a constant to a scalar"
-    >>>
-    >>> def add_constant(ctx, array):
-    ...     return pc.add(array, 1, memory_pool=ctx.memory_pool)
-    >>>
-    >>> func_name = "py_add_func"
-    >>> in_types = {"array": pa.int64()}
-    >>> out_type = pa.int64()
-    >>> pc.register_scalar_function(add_constant, func_name, func_doc, in_types, out_type)
-    >>>
-    >>> func = pc.get_function(func_name)
-    >>> func.name
-    'py_add_func'
-    >>> answer = pc.call_function(func_name, [pa.array([20])])
-    >>> answer
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      21
-    ]
-    """
+) -> None: ...
+
 
 def register_tabular_function(
     func: Callable,
@@ -1336,39 +523,8 @@ def register_tabular_function(
     in_types: dict[str, lib.DataType],
     out_type: lib.DataType,
     func_registry: FunctionRegistry | None = None,
-) -> None:
-    """
-    Register a user-defined tabular function.
-
-    This API is EXPERIMENTAL.
-
-    A tabular function is one accepting a context argument of type
-    UdfContext and returning a generator of struct arrays.
-    The in_types argument must be empty and the out_type argument
-    specifies a schema. Each struct array must have field types
-    corresponding to the schema.
-
-    Parameters
-    ----------
-    func : callable
-        A callable implementing the user-defined function.
-        The only argument is the context argument of type
-        UdfContext. It must return a callable that
-        returns on each invocation a StructArray matching
-        the out_type, where an empty array indicates end.
-    function_name : str
-        Name of the function. There should only be one function
-        registered with this name in the function registry.
-    function_doc : dict
-        A dictionary object with keys "summary" (str),
-        and "description" (str).
-    in_types : Dict[str, DataType]
-        Must be an empty dictionary (reserved for future use).
-    out_type : Union[Schema, DataType]
-        Schema of the function's output, or a corresponding flat struct type.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-    """
+) -> None: ...
+
 
 def register_aggregate_function(
     func: Callable,
@@ -1377,88 +533,8 @@ def register_aggregate_function(
     in_types: dict[str, lib.DataType],
     out_type: lib.DataType,
     func_registry: FunctionRegistry | None = None,
-) -> None:
-    """
-    Register a user-defined non-decomposable aggregate function.
-
-    This API is EXPERIMENTAL.
-
-    A non-decomposable aggregation function is a function that executes
-    aggregate operations on the whole data that it is aggregating.
-    In other words, non-decomposable aggregate function cannot be
-    split into consume/merge/finalize steps.
-
-    This is often used with ordered or segmented aggregation where groups
-    can be emit before accumulating all of the input data.
-
-    Note that currently the size of any input column cannot exceed 2 GB
-    for a single segment (all groups combined).
-
-    Parameters
-    ----------
-    func : callable
-        A callable implementing the user-defined function.
-        The first argument is the context argument of type
-        UdfContext.
-        Then, it must take arguments equal to the number of
-        in_types defined. It must return a Scalar matching the
-        out_type.
-        To define a varargs function, pass a callable that takes
-        *args. The in_type needs to match in type of inputs when
-        the function gets called.
-    function_name : str
-        Name of the function. This name must be unique, i.e.,
-        there should only be one function registered with
-        this name in the function registry.
-    function_doc : dict
-        A dictionary object with keys "summary" (str),
-        and "description" (str).
-    in_types : Dict[str, DataType]
-        A dictionary mapping function argument names to
-        their respective DataType.
-        The argument names will be used to generate
-        documentation for the function. The number of
-        arguments specified here determines the function
-        arity.
-    out_type : DataType
-        Output type of the function.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-
-    Examples
-    --------
-    >>> import numpy as np
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>>
-    >>> func_doc = {}
-    >>> func_doc["summary"] = "simple median udf"
-    >>> func_doc["description"] = "compute median"
-    >>>
-    >>> def compute_median(ctx, array):
-    ...     return pa.scalar(np.median(array))
-    >>>
-    >>> func_name = "py_compute_median"
-    >>> in_types = {"array": pa.int64()}
-    >>> out_type = pa.float64()
-    >>> pc.register_aggregate_function(compute_median, func_name, func_doc, in_types, out_type)
-    >>>
-    >>> func = pc.get_function(func_name)
-    >>> func.name
-    'py_compute_median'
-    >>> answer = pc.call_function(func_name, [pa.array([20, 40])])
-    >>> answer
-    <pyarrow.DoubleScalar: 30.0>
-    >>> table = pa.table([pa.array([1, 1, 2, 2]), pa.array([10, 20, 30, 40])], names=["k", "v"])
-    >>> result = table.group_by("k").aggregate([("v", "py_compute_median")])
-    >>> result
-    pyarrow.Table
-    k: int64
-    v_py_compute_median: double
-    ----
-    k: [[1,2]]
-    v_py_compute_median: [[15,35]]
-    """
+) -> None: ...
+
 
 def register_vector_function(
     func: Callable,
@@ -1467,182 +543,24 @@ def register_vector_function(
     in_types: dict[str, lib.DataType],
     out_type: lib.DataType,
     func_registry: FunctionRegistry | None = None,
-) -> None:
-    """
-    Register a user-defined vector function.
-
-    This API is EXPERIMENTAL.
-
-    A vector function is a function that executes vector
-    operations on arrays. Vector function is often used
-    when compute doesn't fit other more specific types of
-    functions (e.g., scalar and aggregate).
-
-    Parameters
-    ----------
-    func : callable
-        A callable implementing the user-defined function.
-        The first argument is the context argument of type
-        UdfContext.
-        Then, it must take arguments equal to the number of
-        in_types defined. It must return an Array or Scalar
-        matching the out_type. It must return a Scalar if
-        all arguments are scalar, else it must return an Array.
-
-        To define a varargs function, pass a callable that takes
-        *args. The last in_type will be the type of all varargs
-        arguments.
-    function_name : str
-        Name of the function. There should only be one function
-        registered with this name in the function registry.
-    function_doc : dict
-        A dictionary object with keys "summary" (str),
-        and "description" (str).
-    in_types : Dict[str, DataType]
-        A dictionary mapping function argument names to
-        their respective DataType.
-        The argument names will be used to generate
-        documentation for the function. The number of
-        arguments specified here determines the function
-        arity.
-    out_type : DataType
-        Output type of the function.
-    func_registry : FunctionRegistry
-        Optional function registry to use instead of the default global one.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>>
-    >>> func_doc = {}
-    >>> func_doc["summary"] = "percent rank"
-    >>> func_doc["description"] = "compute percent rank"
-    >>>
-    >>> def list_flatten_udf(ctx, x):
-    ...     return pc.list_flatten(x)
-    >>>
-    >>> func_name = "list_flatten_udf"
-    >>> in_types = {"array": pa.list_(pa.int64())}
-    >>> out_type = pa.int64()
-    >>> pc.register_vector_function(list_flatten_udf, func_name, func_doc, in_types, out_type)
-    >>>
-    >>> answer = pc.call_function(func_name, [pa.array([[1, 2], [3, 4]])])
-    >>> answer
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      1,
-      2,
-      3,
-      4
-    ]
-    """
-
-class UdfContext:
-    """
-    Per-invocation function context/state.
+) -> None: ...
 
-    This object will always be the first argument to a user-defined
-    function. It should not be used outside of a call to the function.
-    """
 
+class UdfContext:
     @property
-    def batch_length(self) -> int:
-        """
-        The common length of all input arguments (int).
-
-        In the case that all arguments are scalars, this value
-        is used to pass the "actual length" of the arguments,
-        e.g. because the scalar values are encoding a column
-        with a constant value.
-        """
+    def batch_length(self) -> int: ...
     @property
-    def memory_pool(self) -> lib.MemoryPool:
-        """
-        A memory pool for allocations (:class:`MemoryPool`).
-
-        This is the memory pool supplied by the user when they invoked
-        the function and it should be used in any calls to arrow that the
-        UDF makes if that call accepts a memory_pool.
-        """
+    def memory_pool(self) -> lib.MemoryPool: ...
 
 # ==================== _compute.pyx Expression ====================
-class Expression(lib._Weakrefable):
-    """
-    A logical expression to be evaluated against some input.
-
-    To create an expression:
-
-    - Use the factory function ``pyarrow.compute.scalar()`` to create a
-      scalar (not necessary when combined, see example below).
-    - Use the factory function ``pyarrow.compute.field()`` to reference
-      a field (column in table).
-    - Compare fields and scalars with ``<``, ``<=``, ``==``, ``>=``, ``>``.
-    - Combine expressions using python operators ``&`` (logical and),
-      ``|`` (logical or) and ``~`` (logical not).
-      Note: python keywords ``and``, ``or`` and ``not`` cannot be used
-      to combine expressions.
-    - Create expression predicates using Expression methods such as
-      ``pyarrow.compute.Expression.isin()``.
-
-    Examples
-    --------
-
-    >>> import pyarrow.compute as pc
-    >>> (pc.field("a") < pc.scalar(3)) | (pc.field("b") > 7)
-    <pyarrow.compute.Expression ((a < 3) or (b > 7))>
-    >>> pc.field("a") != 3
-    <pyarrow.compute.Expression (a != 3)>
-    >>> pc.field("a").isin([1, 2, 3])
-    <pyarrow.compute.Expression is_in(a, {value_set=int64:[
-      1,
-      2,
-      3
-    ], null_matching_behavior=MATCH})>
-    """
 
+
+class Expression(lib._Weakrefable):
     @staticmethod
-    def from_substrait(buffer: bytes | lib.Buffer) -> Expression:
-        """
-        Deserialize an expression from Substrait
-
-        The serialized message must be an ExtendedExpression message that has
-        only a single expression.  The name of the expression and the schema
-        the expression was bound to will be ignored.  Use
-        pyarrow.substrait.deserialize_expressions if this information is needed
-        or if the message might contain multiple expressions.
-
-        Parameters
-        ----------
-        message : bytes or Buffer or a protobuf Message
-            The Substrait message to deserialize
-
-        Returns
-        -------
-        Expression
-            The deserialized expression
-        """
-    def to_substrait(self, schema: lib.Schema, allow_arrow_extensions: bool = False) -> lib.Buffer:
-        """
-        Serialize the expression using Substrait
-
-        The expression will be serialized as an ExtendedExpression message that has a
-        single expression named "expression"
-
-        Parameters
-        ----------
-        schema : Schema
-            The input schema the expression will be bound to
-        allow_arrow_extensions : bool, default False
-            If False then only functions that are part of the core Substrait function
-            definitions will be allowed.  Set this to True to allow pyarrow-specific functions
-            but the result may not be accepted by other compute libraries.
-
-        Returns
-        -------
-        Buffer
-            A buffer containing the serialized Protobuf plan.
-        """
+    def from_substrait(buffer: bytes | lib.Buffer) -> Expression: ...
+    def to_substrait(self, schema: lib.Schema,
+                     allow_arrow_extensions: bool = False) -> lib.Buffer: ...
+
     def __invert__(self) -> Expression: ...
     def __and__(self, other) -> Expression: ...
     def __or__(self, other) -> Expression: ...
@@ -1656,83 +574,13 @@ class Expression(lib._Weakrefable):
     def __ge__(self, value: object) -> Expression: ...  # type: ignore[override]
     def __le__(self, value: object) -> Expression: ...  # type: ignore[override]
     def __truediv__(self, other) -> Expression: ...
-    def is_valid(self) -> bool:
-        """
-        Check whether the expression is not-null (valid).
-
-        This creates a new expression equivalent to calling the
-        `is_valid` compute function on this expression.
-
-        Returns
-        -------
-        is_valid : Expression
-        """
-    def is_null(self, nan_is_null: bool = False) -> Expression:
-        """
-        Check whether the expression is null.
-
-        This creates a new expression equivalent to calling the
-        `is_null` compute function on this expression.
-
-        Parameters
-        ----------
-        nan_is_null : boolean, default False
-            Whether floating-point NaNs are considered null.
-
-        Returns
-        -------
-        is_null : Expression
-        """
-    def is_nan(self) -> Expression:
-        """
-        Check whether the expression is NaN.
-
-        This creates a new expression equivalent to calling the
-        `is_nan` compute function on this expression.
-
-        Returns
-        -------
-        is_nan : Expression
-        """
+    def is_valid(self) -> bool: ...
+    def is_null(self, nan_is_null: bool = False) -> Expression: ...
+    def is_nan(self) -> Expression: ...
+
     def cast(
         self, type: lib.DataType, safe: bool = True, options: CastOptions | None = None
-    ) -> Expression:
-        """
-        Explicitly set or change the expression's data type.
-
-        This creates a new expression equivalent to calling the
-        `cast` compute function on this expression.
-
-        Parameters
-        ----------
-        type : DataType, default None
-            Type to cast array to.
-        safe : boolean, default True
-            Whether to check for conversion errors such as overflow.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-
-        Returns
-        -------
-        cast : Expression
-        """
-    def isin(self, values: lib.Array | Iterable) -> Expression:
-        """
-        Check whether the expression is contained in values.
-
-        This creates a new expression equivalent to calling the
-        `is_in` compute function on this expression.
-
-        Parameters
-        ----------
-        values : Array or iterable
-            The values to check for.
-
-        Returns
-        -------
-        isin : Expression
-            A new expression that, when evaluated, checks whether
-            this expression's value is contained in `values`.
-        """
+    ) -> Expression: ...
+    def isin(self, values: lib.Array | Iterable) -> Expression: ...
 
 # ==================== _compute.py ====================
diff --git a/python/pyarrow-stubs/_csv.pyi b/python/pyarrow-stubs/_csv.pyi
index c490d6be93a..c62ae725ec1 100644
--- a/python/pyarrow-stubs/_csv.pyi
+++ b/python/pyarrow-stubs/_csv.pyi
@@ -22,97 +22,9 @@ from _typeshed import StrPath
 
 from . import lib
 
+
 @dataclass(kw_only=True)
 class ReadOptions(lib._Weakrefable):
-    """
-    Options for reading CSV files.
-
-    Parameters
-    ----------
-    use_threads : bool, optional (default True)
-        Whether to use multiple threads to accelerate reading
-    block_size : int, optional
-        How much bytes to process at a time from the input stream.
-        This will determine multi-threading granularity as well as
-        the size of individual record batches or table chunks.
-        Minimum valid value for block size is 1
-    skip_rows : int, optional (default 0)
-        The number of rows to skip before the column names (if any)
-        and the CSV data.
-    skip_rows_after_names : int, optional (default 0)
-        The number of rows to skip after the column names.
-        This number can be larger than the number of rows in one
-        block, and empty rows are counted.
-        The order of application is as follows:
-        - `skip_rows` is applied (if non-zero);
-        - column names are read (unless `column_names` is set);
-        - `skip_rows_after_names` is applied (if non-zero).
-    column_names : list, optional
-        The column names of the target table.  If empty, fall back on
-        `autogenerate_column_names`.
-    autogenerate_column_names : bool, optional (default False)
-        Whether to autogenerate column names if `column_names` is empty.
-        If true, column names will be of the form "f0", "f1"...
-        If false, column names will be read from the first CSV row
-        after `skip_rows`.
-    encoding : str, optional (default 'utf8')
-        The character encoding of the CSV data.  Columns that cannot
-        decode using this encoding can still be read as Binary.
-
-    Examples
-    --------
-
-    Defining an example data:
-
-    >>> import io
-    >>> s = "1,2,3\\nFlamingo,2,2022-03-01\\nHorse,4,2022-03-02\\nBrittle stars,5,2022-03-03\\nCentipede,100,2022-03-04"
-    >>> print(s)
-    1,2,3
-    Flamingo,2,2022-03-01
-    Horse,4,2022-03-02
-    Brittle stars,5,2022-03-03
-    Centipede,100,2022-03-04
-
-    Ignore the first numbered row and substitute it with defined
-    or autogenerated column names:
-
-    >>> from pyarrow import csv
-    >>> read_options = csv.ReadOptions(column_names=["animals", "n_legs", "entry"], skip_rows=1)
-    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    entry: date32[day]
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    n_legs: [[2,4,5,100]]
-    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
-
-    >>> read_options = csv.ReadOptions(autogenerate_column_names=True, skip_rows=1)
-    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
-    pyarrow.Table
-    f0: string
-    f1: int64
-    f2: date32[day]
-    ----
-    f0: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    f1: [[2,4,5,100]]
-    f2: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
-
-    Remove the first 2 rows of the data:
-
-    >>> read_options = csv.ReadOptions(skip_rows_after_names=2)
-    >>> csv.read_csv(io.BytesIO(s.encode()), read_options=read_options)
-    pyarrow.Table
-    1: string
-    2: int64
-    3: date32[day]
-    ----
-    1: [["Brittle stars","Centipede"]]
-    2: [[5,100]]
-    3: [[2022-03-03,2022-03-04]]
-    """
-
     use_threads: bool = field(default=True, kw_only=False)
     block_size: int | None = None
     skip_rows: int = 0
@@ -120,84 +32,11 @@ class ReadOptions(lib._Weakrefable):
     column_names: list[str] | None = None
     autogenerate_column_names: bool = False
     encoding: str = "utf8"
-
     def validate(self) -> None: ...
 
+
 @dataclass(kw_only=True)
 class ParseOptions(lib._Weakrefable):
-    """
-    Options for parsing CSV files.
-
-    Parameters
-    ----------
-    delimiter : 1-character string, optional (default ',')
-        The character delimiting individual cells in the CSV data.
-    quote_char : 1-character string or False, optional (default '"')
-        The character used optionally for quoting CSV values
-        (False if quoting is not allowed).
-    double_quote : bool, optional (default True)
-        Whether two quotes in a quoted CSV value denote a single quote
-        in the data.
-    escape_char : 1-character string or False, optional (default False)
-        The character used optionally for escaping special characters
-        (False if escaping is not allowed).
-    newlines_in_values : bool, optional (default False)
-        Whether newline characters are allowed in CSV values.
-        Setting this to True reduces the performance of multi-threaded
-        CSV reading.
-    ignore_empty_lines : bool, optional (default True)
-        Whether empty lines are ignored in CSV input.
-        If False, an empty line is interpreted as containing a single empty
-        value (assuming a one-column CSV file).
-    invalid_row_handler : callable, optional (default None)
-        If not None, this object is called for each CSV row that fails
-        parsing (because of a mismatching number of columns).
-        It should accept a single InvalidRow argument and return either
-        "skip" or "error" depending on the desired outcome.
-
-    Examples
-    --------
-
-    Defining an example file from bytes object:
-
-    >>> import io
-    >>> s = (
-    ...     "animals;n_legs;entry\\n"
-    ...     "Flamingo;2;2022-03-01\\n"
-    ...     "# Comment here:\\n"
-    ...     "Horse;4;2022-03-02\\n"
-    ...     "Brittle stars;5;2022-03-03\\n"
-    ...     "Centipede;100;2022-03-04"
-    ... )
-    >>> print(s)
-    animals;n_legs;entry
-    Flamingo;2;2022-03-01
-    # Comment here:
-    Horse;4;2022-03-02
-    Brittle stars;5;2022-03-03
-    Centipede;100;2022-03-04
-    >>> source = io.BytesIO(s.encode())
-
-    Read the data from a file skipping rows with comments
-    and defining the delimiter:
-
-    >>> from pyarrow import csv
-    >>> def skip_comment(row):
-    ...     if row.text.startswith("# "):
-    ...         return "skip"
-    ...     else:
-    ...         return "error"
-    >>> parse_options = csv.ParseOptions(delimiter=";", invalid_row_handler=skip_comment)
-    >>> csv.read_csv(source, parse_options=parse_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    entry: date32[day]
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    n_legs: [[2,4,5,100]]
-    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
-    """
 
     delimiter: str = field(default=",", kw_only=False)
     quote_char: str | Literal[False] = '"'
@@ -209,210 +48,9 @@ class ParseOptions(lib._Weakrefable):
 
     def validate(self) -> None: ...
 
+
 @dataclass(kw_only=True)
 class ConvertOptions(lib._Weakrefable):
-    """
-    Options for converting CSV data.
-
-    Parameters
-    ----------
-    check_utf8 : bool, optional (default True)
-        Whether to check UTF8 validity of string columns.
-    column_types : pyarrow.Schema or dict, optional
-        Explicitly map column names to column types. Passing this argument
-        disables type inference on the defined columns.
-    null_values : list, optional
-        A sequence of strings that denote nulls in the data
-        (defaults are appropriate in most cases). Note that by default,
-        string columns are not checked for null values. To enable
-        null checking for those, specify ``strings_can_be_null=True``.
-    true_values : list, optional
-        A sequence of strings that denote true booleans in the data
-        (defaults are appropriate in most cases).
-    false_values : list, optional
-        A sequence of strings that denote false booleans in the data
-        (defaults are appropriate in most cases).
-    decimal_point : 1-character string, optional (default '.')
-        The character used as decimal point in floating-point and decimal
-        data.
-    strings_can_be_null : bool, optional (default False)
-        Whether string / binary columns can have null values.
-        If true, then strings in null_values are considered null for
-        string columns.
-        If false, then all strings are valid string values.
-    quoted_strings_can_be_null : bool, optional (default True)
-        Whether quoted values can be null.
-        If true, then strings in "null_values" are also considered null
-        when they appear quoted in the CSV file. Otherwise, quoted values
-        are never considered null.
-    include_columns : list, optional
-        The names of columns to include in the Table.
-        If empty, the Table will include all columns from the CSV file.
-        If not empty, only these columns will be included, in this order.
-    include_missing_columns : bool, optional (default False)
-        If false, columns in `include_columns` but not in the CSV file will
-        error out.
-        If true, columns in `include_columns` but not in the CSV file will
-        produce a column of nulls (whose type is selected using
-        `column_types`, or null by default).
-        This option is ignored if `include_columns` is empty.
-    auto_dict_encode : bool, optional (default False)
-        Whether to try to automatically dict-encode string / binary data.
-        If true, then when type inference detects a string or binary column,
-        it it dict-encoded up to `auto_dict_max_cardinality` distinct values
-        (per chunk), after which it switches to regular encoding.
-        This setting is ignored for non-inferred columns (those in
-        `column_types`).
-    auto_dict_max_cardinality : int, optional
-        The maximum dictionary cardinality for `auto_dict_encode`.
-        This value is per chunk.
-    timestamp_parsers : list, optional
-        A sequence of strptime()-compatible format strings, tried in order
-        when attempting to infer or convert timestamp values (the special
-        value ISO8601() can also be given).  By default, a fast built-in
-        ISO-8601 parser is used.
-
-    Examples
-    --------
-
-    Defining an example data:
-
-    >>> import io
-    >>> s = (
-    ...     "animals,n_legs,entry,fast\\n"
-    ...     "Flamingo,2,01/03/2022,Yes\\n"
-    ...     "Horse,4,02/03/2022,Yes\\n"
-    ...     "Brittle stars,5,03/03/2022,No\\n"
-    ...     "Centipede,100,04/03/2022,No\\n"
-    ...     ",6,05/03/2022,"
-    ... )
-    >>> print(s)
-    animals,n_legs,entry,fast
-    Flamingo,2,01/03/2022,Yes
-    Horse,4,02/03/2022,Yes
-    Brittle stars,5,03/03/2022,No
-    Centipede,100,04/03/2022,No
-    ,6,05/03/2022,
-
-    Change the type of a column:
-
-    >>> import pyarrow as pa
-    >>> from pyarrow import csv
-    >>> convert_options = csv.ConvertOptions(column_types={"n_legs": pa.float64()})
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: double
-    entry: string
-    fast: string
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-    n_legs: [[2,4,5,100,6]]
-    entry: [["01/03/2022","02/03/2022","03/03/2022","04/03/2022","05/03/2022"]]
-    fast: [["Yes","Yes","No","No",""]]
-
-    Define a date parsing format to get a timestamp type column
-    (in case dates are not in ISO format and not converted by default):
-
-    >>> convert_options = csv.ConvertOptions(timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"])
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    entry: timestamp[s]
-    fast: string
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-    n_legs: [[2,4,5,100,6]]
-    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
-    fast: [["Yes","Yes","No","No",""]]
-
-    Specify a subset of columns to be read:
-
-    >>> convert_options = csv.ConvertOptions(include_columns=["animals", "n_legs"])
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-    n_legs: [[2,4,5,100,6]]
-
-    List additional column to be included as a null typed column:
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     include_columns=["animals", "n_legs", "location"], include_missing_columns=True
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    location: null
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-    n_legs: [[2,4,5,100,6]]
-    location: [5 nulls]
-
-    Define columns as dictionary type (by default only the
-    string/binary columns are dictionary encoded):
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     timestamp_parsers=["%m/%d/%Y", "%m-%d-%Y"], auto_dict_encode=True
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: dictionary<values=string, indices=int32, ordered=0>
-    n_legs: int64
-    entry: timestamp[s]
-    fast: dictionary<values=string, indices=int32, ordered=0>
-    ----
-    animals: [  -- dictionary:
-    ["Flamingo","Horse","Brittle stars","Centipede",""]  -- indices:
-    [0,1,2,3,4]]
-    n_legs: [[2,4,5,100,6]]
-    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
-    fast: [  -- dictionary:
-    ["Yes","No",""]  -- indices:
-    [0,0,1,1,2]]
-
-    Set upper limit for the number of categories. If the categories
-    is more than the limit, the conversion to dictionary will not
-    happen:
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     include_columns=["animals"], auto_dict_encode=True, auto_dict_max_cardinality=2
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-
-    Set empty strings to missing values:
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     include_columns=["animals", "n_legs"], strings_can_be_null=True
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",null]]
-    n_legs: [[2,4,5,100,6]]
-
-    Define values to be True and False when converting a column
-    into a bool type:
-
-    >>> convert_options = csv.ConvertOptions(
-    ...     include_columns=["fast"], false_values=["No"], true_values=["Yes"]
-    ... )
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    fast: bool
-    ----
-    fast: [[true,true,false,false,null]]
-    """
 
     check_utf8: bool = field(default=True, kw_only=False)
     column_types: lib.Schema | dict | None = None
@@ -430,30 +68,9 @@ class ConvertOptions(lib._Weakrefable):
 
     def validate(self) -> None: ...
 
+
 @dataclass(kw_only=True)
 class WriteOptions(lib._Weakrefable):
-    """
-    Options for writing CSV files.
-
-    Parameters
-    ----------
-    include_header : bool, optional (default True)
-        Whether to write an initial header line with column names
-    batch_size : int, optional (default 1024)
-        How many rows to process together when converting and writing
-        CSV data
-    delimiter : 1-character string, optional (default ",")
-        The character delimiting individual cells in the CSV data.
-    quoting_style : str, optional (default "needed")
-        Whether to quote values, and if so, which quoting style to use.
-        The following values are accepted:
-
-        - "needed" (default): only enclose values in quotes when needed.
-        - "all_valid": enclose all valid values in quotes; nulls are not quoted.
-        - "none": do not enclose any values in quotes; values containing
-          special characters (such as quotes, cell delimiters or line endings)
-          will raise an error.
-    """
 
     include_header: bool = field(default=True, kw_only=False)
     batch_size: int = 1024
@@ -462,43 +79,17 @@ class WriteOptions(lib._Weakrefable):
 
     def validate(self) -> None: ...
 
+
 @dataclass
 class InvalidRow(lib._Weakrefable):
-    """
-    Description of an invalid row in a CSV file.
-
-    Parameters
-    ----------
-    expected_columns : int
-        The expected number of columns in the row.
-    actual_columns : int
-        The actual number of columns in the row.
-    number : int or None
-        The physical row number if known, otherwise None.
-    text : str
-        The contents of the row.
-    """
 
     expected_columns: int
     actual_columns: int
     number: int | None
     text: str
 
+
 class CSVWriter(lib._CRecordBatchWriter):
-    """
-    Writer to create a CSV file.
-
-    Parameters
-    ----------
-    sink : str, path, pyarrow.OutputStream or file-like object
-        The location where to write the CSV data.
-    schema : pyarrow.Schema
-        The schema of the data to be written.
-    write_options : pyarrow.csv.WriteOptions
-        Options to configure writing the CSV data.
-    memory_pool : MemoryPool, optional
-        Pool for temporary allocations.
-    """
 
     def __init__(
         self,
@@ -510,44 +101,22 @@ class CSVWriter(lib._CRecordBatchWriter):
         memory_pool: lib.MemoryPool | None = None,
     ) -> None: ...
 
-class CSVStreamingReader(lib.RecordBatchReader): ...
+
+class CSVStreamingReader(lib.RecordBatchReader):
+    ...
+
 
 ISO8601: lib._Weakrefable
 
+
 def open_csv(
     input_file: StrPath | IO[Any],
     read_options: ReadOptions | None = None,
     parse_options: ParseOptions | None = None,
     convert_options: ConvertOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> CSVStreamingReader:
-    """
-    Open a streaming reader of CSV data.
-
-    Reading using this function is always single-threaded.
-
-    Parameters
-    ----------
-    input_file : string, path or file-like object
-        The location of CSV data.  If a string or path, and if it ends
-        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
-        the data is automatically decompressed when reading.
-    read_options : pyarrow.csv.ReadOptions, optional
-        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
-        for defaults)
-    parse_options : pyarrow.csv.ParseOptions, optional
-        Options for the CSV parser
-        (see pyarrow.csv.ParseOptions constructor for defaults)
-    convert_options : pyarrow.csv.ConvertOptions, optional
-        Options for converting CSV data
-        (see pyarrow.csv.ConvertOptions constructor for defaults)
-    memory_pool : MemoryPool, optional
-        Pool to allocate RecordBatch memory from
-
-    Returns
-    -------
-    :class:`pyarrow.csv.CSVStreamingReader`
-    """
+) -> CSVStreamingReader: ...
+
 
 def read_csv(
     input_file: StrPath | IO[Any],
@@ -555,104 +124,12 @@ def read_csv(
     parse_options: ParseOptions | None = None,
     convert_options: ConvertOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Table:
-    """
-    Read a Table from a stream of CSV data.
-
-    Parameters
-    ----------
-    input_file : string, path or file-like object
-        The location of CSV data.  If a string or path, and if it ends
-        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
-        the data is automatically decompressed when reading.
-    read_options : pyarrow.csv.ReadOptions, optional
-        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
-        for defaults)
-    parse_options : pyarrow.csv.ParseOptions, optional
-        Options for the CSV parser
-        (see pyarrow.csv.ParseOptions constructor for defaults)
-    convert_options : pyarrow.csv.ConvertOptions, optional
-        Options for converting CSV data
-        (see pyarrow.csv.ConvertOptions constructor for defaults)
-    memory_pool : MemoryPool, optional
-        Pool to allocate Table memory from
-
-    Returns
-    -------
-    :class:`pyarrow.Table`
-        Contents of the CSV file as a in-memory table.
-
-    Examples
-    --------
-
-    Defining an example file from bytes object:
-
-    >>> import io
-    >>> s = (
-    ...     "animals,n_legs,entry\\n"
-    ...     "Flamingo,2,2022-03-01\\n"
-    ...     "Horse,4,2022-03-02\\n"
-    ...     "Brittle stars,5,2022-03-03\\n"
-    ...     "Centipede,100,2022-03-04"
-    ... )
-    >>> print(s)
-    animals,n_legs,entry
-    Flamingo,2,2022-03-01
-    Horse,4,2022-03-02
-    Brittle stars,5,2022-03-03
-    Centipede,100,2022-03-04
-    >>> source = io.BytesIO(s.encode())
-
-    Reading from the file
-
-    >>> from pyarrow import csv
-    >>> csv.read_csv(source)
-    pyarrow.Table
-    animals: string
-    n_legs: int64
-    entry: date32[day]
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    n_legs: [[2,4,5,100]]
-    entry: [[2022-03-01,2022-03-02,2022-03-03,2022-03-04]]
-    """
+) -> lib.Table: ...
+
 
 def write_csv(
     data: lib.RecordBatch | lib.Table,
     output_file: StrPath | lib.NativeFile | IO[Any],
     write_options: WriteOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> None:
-    """
-    Write record batch or table to a CSV file.
-
-    Parameters
-    ----------
-    data : pyarrow.RecordBatch or pyarrow.Table
-        The data to write.
-    output_file : string, path, pyarrow.NativeFile, or file-like object
-        The location where to write the CSV data.
-    write_options : pyarrow.csv.WriteOptions
-        Options to configure writing the CSV data.
-    memory_pool : MemoryPool, optional
-        Pool for temporary allocations.
-
-    Examples
-    --------
-
-    >>> import pyarrow as pa
-    >>> from pyarrow import csv
-
-    >>> legs = pa.array([2, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-    >>> entry_date = pa.array(["01/03/2022", "02/03/2022", "03/03/2022", "04/03/2022"])
-    >>> table = pa.table([animals, legs, entry_date], names=["animals", "n_legs", "entry"])
-
-    >>> csv.write_csv(table, "animals.csv")
-
-    >>> write_options = csv.WriteOptions(include_header=False)
-    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
-
-    >>> write_options = csv.WriteOptions(delimiter=";")
-    >>> csv.write_csv(table, "animals.csv", write_options=write_options)
-    """
+) -> None: ...
diff --git a/python/pyarrow-stubs/_cuda.pyi b/python/pyarrow-stubs/_cuda.pyi
index c96951b863c..3ec866ad668 100644
--- a/python/pyarrow-stubs/_cuda.pyi
+++ b/python/pyarrow-stubs/_cuda.pyi
@@ -24,278 +24,73 @@ from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-un
 from . import lib
 from ._stubs_typing import ArrayLike
 
+
 class Context(lib._Weakrefable):
-    """
-    CUDA driver context.
-    """
-
-    def __init__(self, device_number: int = 0, handle: int | None = None) -> None:
-        """
-        Create a CUDA driver context for a particular device.
-
-        If a CUDA context handle is passed, it is wrapped, otherwise
-        a default CUDA context for the given device is requested.
-
-        Parameters
-        ----------
-        device_number : int (default 0)
-          Specify the GPU device for which the CUDA driver context is
-          requested.
-        handle : int, optional
-          Specify CUDA handle for a shared context that has been created
-          by another library.
-        """
+
+    def __init__(self, device_number: int = 0, handle: int | None = None) -> None: ...
+
     @staticmethod
-    def from_numba(context: _numba_driver.Context | None = None) -> Context:
-        """
-        Create a Context instance from a Numba CUDA context.
-
-        Parameters
-        ----------
-        context : {numba.cuda.cudadrv.driver.Context, None}
-          A Numba CUDA context instance.
-          If None, the current Numba context is used.
-
-        Returns
-        -------
-        shared_context : pyarrow.cuda.Context
-          Context instance.
-        """
-    def to_numba(self) -> _numba_driver.Context:
-        """
-        Convert Context to a Numba CUDA context.
-
-        Returns
-        -------
-        context : numba.cuda.cudadrv.driver.Context
-          Numba CUDA context instance.
-        """
+    def from_numba(context: _numba_driver.Context | None = None) -> Context: ...
+
+    def to_numba(self) -> _numba_driver.Context: ...
+
     @staticmethod
-    def get_num_devices() -> int:
-        """Return the number of GPU devices."""
+    def get_num_devices() -> int: ...
+
     @property
-    def device_number(self) -> int:
-        """Return context device number."""
+    def device_number(self) -> int: ...
+
     @property
-    def handle(self) -> int:
-        """Return pointer to context handle."""
-    def synchronize(self) -> None:
-        """Blocks until the device has completed all preceding requested
-        tasks.
-        """
+    def handle(self) -> int: ...
+
+    def synchronize(self) -> None: ...
+
     @property
-    def bytes_allocated(self) -> int:
-        """Return the number of allocated bytes."""
-    def get_device_address(self, address: int) -> int:
-        """Return the device address that is reachable from kernels running in
-        the context
-
-        Parameters
-        ----------
-        address : int
-          Specify memory address value
-
-        Returns
-        -------
-        device_address : int
-          Device address accessible from device context
-
-        Notes
-        -----
-        The device address is defined as a memory address accessible
-        by device. While it is often a device memory address but it
-        can be also a host memory address, for instance, when the
-        memory is allocated as host memory (using cudaMallocHost or
-        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
-        or the host memory is page-locked (using cudaHostRegister).
-        """
-    def new_buffer(self, nbytes: int) -> CudaBuffer:
-        """Return new device buffer.
-
-        Parameters
-        ----------
-        nbytes : int
-          Specify the number of bytes to be allocated.
-
-        Returns
-        -------
-        buf : CudaBuffer
-          Allocated buffer.
-        """
+    def bytes_allocated(self) -> int: ...
+
+    def get_device_address(self, address: int) -> int: ...
+
+    def new_buffer(self, nbytes: int) -> CudaBuffer: ...
+
     @property
-    def memory_manager(self) -> lib.MemoryManager:
-        """
-        The default memory manager tied to this context's device.
-
-        Returns
-        -------
-        MemoryManager
-        """
+    def memory_manager(self) -> lib.MemoryManager: ...
+
     @property
-    def device(self) -> lib.Device:
-        """
-        The device instance associated with this context.
-
-        Returns
-        -------
-        Device
-        """
-    def foreign_buffer(self, address: int, size: int, base: Any | None = None) -> CudaBuffer:
-        """
-        Create device buffer from address and size as a view.
-
-        The caller is responsible for allocating and freeing the
-        memory. When `address==size==0` then a new zero-sized buffer
-        is returned.
-
-        Parameters
-        ----------
-        address : int
-          Specify the starting address of the buffer. The address can
-          refer to both device or host memory but it must be
-          accessible from device after mapping it with
-          `get_device_address` method.
-        size : int
-          Specify the size of device buffer in bytes.
-        base : {None, object}
-          Specify object that owns the referenced memory.
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of device reachable memory.
-
-        """
-    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer:
-        """Open existing CUDA IPC memory handle
-
-        Parameters
-        ----------
-        ipc_handle : IpcMemHandle
-          Specify opaque pointer to CUipcMemHandle (driver API).
-
-        Returns
-        -------
-        buf : CudaBuffer
-          referencing device buffer
-        """
+    def device(self) -> lib.Device: ...
+
+    def foreign_buffer(self, address: int, size: int, base: Any |
+                       None = None) -> CudaBuffer: ...
+
+    def open_ipc_buffer(self, ipc_handle: IpcMemHandle) -> CudaBuffer: ...
+
     def buffer_from_data(
         self,
         data: CudaBuffer | HostBuffer | lib.Buffer | ArrayLike,
         offset: int = 0,
         size: int = -1,
-    ) -> CudaBuffer:
-        """Create device buffer and initialize with data.
-
-        Parameters
-        ----------
-        data : {CudaBuffer, HostBuffer, Buffer, array-like}
-          Specify data to be copied to device buffer.
-        offset : int
-          Specify the offset of input buffer for device data
-          buffering. Default: 0.
-        size : int
-          Specify the size of device buffer in bytes. Default: all
-          (starting from input offset)
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer with copied data.
-        """
-    def buffer_from_object(self, obj: Any) -> CudaBuffer:
-        """Create device buffer view of arbitrary object that references
-        device accessible memory.
-
-        When the object contains a non-contiguous view of device
-        accessible memory then the returned device buffer will contain
-        contiguous view of the memory, that is, including the
-        intermediate data that is otherwise invisible to the input
-        object.
-
-        Parameters
-        ----------
-        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
-          Specify an object that holds (device or host) address that
-          can be accessed from device. This includes objects with
-          types defined in pyarrow.cuda as well as arbitrary objects
-          that implement the CUDA array interface as defined by numba.
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of device accessible memory.
-
-        """
+    ) -> CudaBuffer: ...
+
+    def buffer_from_object(self, obj: Any) -> CudaBuffer: ...
+
 
 class IpcMemHandle(lib._Weakrefable):
-    """A serializable container for a CUDA IPC handle."""
+
     @staticmethod
-    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle:
-        """Create IpcMemHandle from opaque buffer (e.g. from another
-        process)
-
-        Parameters
-        ----------
-        opaque_handle :
-          a CUipcMemHandle as a const void*
-
-        Returns
-        -------
-        ipc_handle : IpcMemHandle
-        """
-    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer:
-        """Write IpcMemHandle to a Buffer
-
-        Parameters
-        ----------
-        pool : {MemoryPool, None}
-          Specify a pool to allocate memory from
-
-        Returns
-        -------
-        buf : Buffer
-          The serialized buffer.
-        """
+    def from_buffer(opaque_handle: lib.Buffer) -> IpcMemHandle: ...
 
-class CudaBuffer(lib.Buffer):
-    """An Arrow buffer with data located in a GPU device.
+    def serialize(self, pool: lib.MemoryPool | None = None) -> lib.Buffer: ...
 
-    To create a CudaBuffer instance, use Context.device_buffer().
 
-    The memory allocated in a CudaBuffer is freed when the buffer object
-    is deleted.
-    """
+class CudaBuffer(lib.Buffer):
 
     @staticmethod
-    def from_buffer(buf: lib.Buffer) -> CudaBuffer:
-        """Convert back generic buffer into CudaBuffer
-
-        Parameters
-        ----------
-        buf : Buffer
-          Specify buffer containing CudaBuffer
-
-        Returns
-        -------
-        dbuf : CudaBuffer
-          Resulting device buffer.
-        """
+    def from_buffer(buf: lib.Buffer) -> CudaBuffer: ...
+
     @staticmethod
-    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer:
-        """Create a CudaBuffer view from numba MemoryPointer instance.
-
-        Parameters
-        ----------
-        mem :  numba.cuda.cudadrv.driver.MemoryPointer
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of numba MemoryPointer.
-        """
-    def to_numba(self) -> _numba_driver.MemoryPointer:
-        """Return numba memory pointer of CudaBuffer instance."""
+    def from_numba(mem: _numba_driver.MemoryPointer) -> CudaBuffer: ...
+
+    def to_numba(self) -> _numba_driver.MemoryPointer: ...
+
     def copy_to_host(
         self,
         position: int = 0,
@@ -303,243 +98,62 @@ class CudaBuffer(lib.Buffer):
         buf: lib.Buffer | None = None,
         memory_pool: lib.MemoryPool | None = None,
         resizable: bool = False,
-    ) -> lib.Buffer:
-        """Copy memory from GPU device to CPU host
-
-        Caller is responsible for ensuring that all tasks affecting
-        the memory are finished. Use
-
-          `<CudaBuffer instance>.context.synchronize()`
-
-        when needed.
-
-        Parameters
-        ----------
-        position : int
-          Specify the starting position of the source data in GPU
-          device buffer. Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          the position until host buffer is full).
-        buf : Buffer
-          Specify a pre-allocated output buffer in host. Default: None
-          (allocate new output buffer).
-        memory_pool : MemoryPool
-        resizable : bool
-          Specify extra arguments to allocate_buffer. Used only when
-          buf is None.
-
-        Returns
-        -------
-        buf : Buffer
-          Output buffer in host.
-
-        """
+    ) -> lib.Buffer: ...
+
     def copy_from_host(
         self, data: lib.Buffer | ArrayLike, position: int = 0, nbytes: int = -1
-    ) -> int:
-        """Copy data from host to device.
-
-        The device buffer must be pre-allocated.
-
-        Parameters
-        ----------
-        data : {Buffer, array-like}
-          Specify data in host. It can be array-like that is valid
-          argument to py_buffer
-        position : int
-          Specify the starting position of the copy in device buffer.
-          Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          source until device buffer, starting from position, is full)
-
-        Returns
-        -------
-        nbytes : int
-          Number of bytes copied.
-        """
-    def copy_from_device(self, buf: CudaBuffer, position: int = 0, nbytes: int = -1) -> int:
-        """Copy data from device to device.
-
-        Parameters
-        ----------
-        buf : CudaBuffer
-          Specify source device buffer.
-        position : int
-          Specify the starting position of the copy in device buffer.
-          Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          source until device buffer, starting from position, is full)
-
-        Returns
-        -------
-        nbytes : int
-          Number of bytes copied.
-
-        """
-    def export_for_ipc(self) -> IpcMemHandle:
-        """
-        Expose this device buffer as IPC memory which can be used in other
-        processes.
-
-        After calling this function, this device memory will not be
-        freed when the CudaBuffer is destructed.
-
-        Returns
-        -------
-        ipc_handle : IpcMemHandle
-          The exported IPC handle
-
-        """
+    ) -> int: ...
+
+    def copy_from_device(self, buf: CudaBuffer, position: int = 0,
+                         nbytes: int = -1) -> int: ...
+
+    def export_for_ipc(self) -> IpcMemHandle: ...
+
     @property
-    def context(self) -> Context:
-        """Returns the CUDA driver context of this buffer."""
-    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer:
-        """Return slice of device buffer
-
-        Parameters
-        ----------
-        offset : int, default 0
-          Specify offset from the start of device buffer to slice
-        length : int, default None
-          Specify the length of slice (default is until end of device
-          buffer starting from offset). If the length is larger than
-          the data available, the returned slice will have a size of
-          the available data starting from the offset.
-
-        Returns
-        -------
-        sliced : CudaBuffer
-          Zero-copy slice of device buffer.
-
-        """
-    def to_pybytes(self) -> bytes:
-        """Return device buffer content as Python bytes."""
+    def context(self) -> Context: ...
 
-class HostBuffer(lib.Buffer):
-    """Device-accessible CPU memory created using cudaHostAlloc.
+    def slice(self, offset: int = 0, length: int | None = None) -> CudaBuffer: ...
 
-    To create a HostBuffer instance, use
+    def to_pybytes(self) -> bytes: ...
+
+
+class HostBuffer(lib.Buffer):
 
-      cuda.new_host_buffer(<nbytes>)
-    """
     @property
     def size(self) -> int: ...
 
+
 class BufferReader(lib.NativeFile):
-    """File interface for zero-copy read from CUDA buffers.
 
-    Note: Read methods return pointers to device memory. This means
-    you must be careful using this interface with any Arrow code which
-    may expect to be able to do anything other than pointer arithmetic
-    on the returned buffers.
-    """
     def __init__(self, obj: CudaBuffer) -> None: ...
-    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer:
-        """Return a slice view of the underlying device buffer.
-
-        The slice will start at the current reader position and will
-        have specified size in bytes.
+    def read_buffer(self, nbytes: int | None = None) -> CudaBuffer: ...
 
-        Parameters
-        ----------
-        nbytes : int, default None
-          Specify the number of bytes to read. Default: None (read all
-          remaining bytes).
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          New device buffer.
-
-        """
 
 class BufferWriter(lib.NativeFile):
-    """File interface for writing to CUDA buffers.
 
-    By default writes are unbuffered. Use set_buffer_size to enable
-    buffering.
-    """
     def __init__(self, obj: CudaBuffer) -> None: ...
-    def writeat(self, position: int, data: ArrayLike) -> None:
-        """Write data to buffer starting from position.
-
-        Parameters
-        ----------
-        position : int
-          Specify device buffer position where the data will be
-          written.
-        data : array-like
-          Specify data, the data instance must implement buffer
-          protocol.
-        """
+    def writeat(self, position: int, data: ArrayLike) -> None: ...
+
     @property
-    def buffer_size(self) -> int:
-        """Returns size of host (CPU) buffer, 0 for unbuffered"""
+    def buffer_size(self) -> int: ...
+
     @buffer_size.setter
-    def buffer_size(self, buffer_size: int):
-        """Set CPU buffer size to limit calls to cudaMemcpy
-
-        Parameters
-        ----------
-        buffer_size : int
-          Specify the size of CPU buffer to allocate in bytes.
-        """
+    def buffer_size(self, buffer_size: int): ...
+
     @property
-    def num_bytes_buffered(self) -> int:
-        """Returns number of bytes buffered on host"""
-
-def new_host_buffer(size: int, device: int = 0) -> HostBuffer:
-    """Return buffer with CUDA-accessible memory on CPU host
-
-    Parameters
-    ----------
-    size : int
-      Specify the number of bytes to be allocated.
-    device : int
-      Specify GPU device number.
-
-    Returns
-    -------
-    dbuf : HostBuffer
-      Allocated host buffer
-    """
-
-def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer:
-    """Write record batch message to GPU device memory
-
-    Parameters
-    ----------
-    batch : RecordBatch
-      Record batch to write
-    ctx : Context
-      CUDA Context to allocate device memory from
-
-    Returns
-    -------
-    dbuf : CudaBuffer
-      device buffer which contains the record batch message
-    """
+    def num_bytes_buffered(self) -> int: ...
+
+
+def new_host_buffer(size: int, device: int = 0) -> HostBuffer: ...
+
+
+def serialize_record_batch(batch: lib.RecordBatch, ctx: Context) -> CudaBuffer: ...
+
 
 def read_message(
     source: CudaBuffer | cuda.BufferReader, pool: lib.MemoryManager | None = None
-) -> lib.Message:
-    """Read Arrow IPC message located on GPU device
-
-    Parameters
-    ----------
-    source : {CudaBuffer, cuda.BufferReader}
-      Device buffer or reader of device buffer.
-    pool : MemoryPool (optional)
-      Pool to allocate CPU memory for the metadata
-
-    Returns
-    -------
-    message : Message
-      The deserialized message, body still on device
-    """
+) -> lib.Message: ...
+
 
 def read_record_batch(
     buffer: lib.Buffer,
@@ -547,27 +161,4 @@ def read_record_batch(
     *,
     dictionary_memo: lib.DictionaryMemo | None = None,
     pool: lib.MemoryPool | None = None,
-) -> lib.RecordBatch:
-    """Construct RecordBatch referencing IPC message located on CUDA device.
-
-    While the metadata is copied to host memory for deserialization,
-    the record batch data remains on the device.
-
-    Parameters
-    ----------
-    buffer :
-      Device buffer containing the complete IPC message
-    schema : Schema
-      The schema for the record batch
-    dictionary_memo : DictionaryMemo, optional
-        If message contains dictionaries, must pass a populated
-        DictionaryMemo
-    pool : MemoryPool (optional)
-      Pool to allocate metadata from
-
-    Returns
-    -------
-    batch : RecordBatch
-      Reconstructed record batch, with device pointers
-
-    """
+) -> lib.RecordBatch: ...
diff --git a/python/pyarrow-stubs/_dataset.pyi b/python/pyarrow-stubs/_dataset.pyi
index 3665bdba00b..c3b3c4d9bec 100644
--- a/python/pyarrow-stubs/_dataset.pyi
+++ b/python/pyarrow-stubs/_dataset.pyi
@@ -42,48 +42,16 @@ from .acero import ExecNodeOptions
 from .compute import Expression
 from .ipc import IpcWriteOptions, RecordBatchReader
 
-class Dataset(lib._Weakrefable):
-    """
-    Collection of data fragments and potentially child datasets.
 
-    Arrow Datasets allow you to query against data that has been split across
-    multiple files. This sharding of data may indicate partitioning, which
-    can accelerate queries that only touch some partitions (files).
-    """
+class Dataset(lib._Weakrefable):
 
     @property
-    def partition_expression(self) -> Expression:
-        """
-        An Expression which evaluates to true for all data viewed by this
-        Dataset.
-        """
-    def replace_schema(self, schema: lib.Schema) -> None:
-        """
-        Return a copy of this Dataset with a different schema.
-
-        The copy will view the same Fragments. If the new schema is not
-        compatible with the original dataset's schema then an error will
-        be raised.
-
-        Parameters
-        ----------
-        schema : Schema
-            The new dataset schema.
-        """
-    def get_fragments(self, filter: Expression | None = None):
-        """Returns an iterator over the fragments in this dataset.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Return fragments matching the optional filter, either using the
-            partition_expression or internal information like Parquet's
-            statistics.
-
-        Returns
-        -------
-        fragments : iterator of Fragment
-        """
+    def partition_expression(self) -> Expression: ...
+
+    def replace_schema(self, schema: lib.Schema) -> None: ...
+
+    def get_fragments(self, filter: Expression | None = None): ...
+
     def scanner(
         self,
         columns: list[str] | None = None,
@@ -95,122 +63,8 @@ class Dataset(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner:
-        """
-        Build a scan operation against the dataset.
-
-        Data is not loaded immediately. Instead, this produces a Scanner,
-        which exposes further operations (e.g. loading all data as a
-        table, counting rows).
-
-        See the :meth:`Scanner.from_dataset` method for further information.
-
-        Parameters
-        ----------
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        scanner : Scanner
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>>
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, "dataset_scanner.parquet")
-
-        >>> import pyarrow.dataset as ds
-        >>> dataset = ds.dataset("dataset_scanner.parquet")
-
-        Selecting a subset of the columns:
-
-        >>> dataset.scanner(columns=["year", "n_legs"]).to_table()
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        ----
-        year: [[2020,2022,2021,2022,2019,2021]]
-        n_legs: [[2,2,4,4,5,100]]
-
-        Projecting selected columns using an expression:
-
-        >>> dataset.scanner(
-        ...     columns={
-        ...         "n_legs_uint": ds.field("n_legs").cast("uint8"),
-        ...     }
-        ... ).to_table()
-        pyarrow.Table
-        n_legs_uint: uint8
-        ----
-        n_legs_uint: [[2,2,4,4,5,100]]
-
-        Filtering rows while scanning:
-
-        >>> dataset.scanner(filter=ds.field("year") > 2020).to_table()
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        year: [[2022,2021,2022,2021]]
-        n_legs: [[2,4,4,100]]
-        animal: [["Parrot","Dog","Horse","Centipede"]]
-        """
+    ) -> Scanner: ...
+
     def to_batches(
         self,
         columns: list[str] | None = None,
@@ -222,65 +76,8 @@ class Dataset(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> Iterator[lib.RecordBatch]:
-        """
-        Read the dataset as materialized record batches.
-
-        Parameters
-        ----------
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
+    ) -> Iterator[lib.RecordBatch]: ...
+
     def to_table(
         self,
         columns: list[str] | dict[str, Expression] | None = None,
@@ -292,68 +89,8 @@ class Dataset(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Read the dataset to an Arrow table.
-
-        Note that this method reads all the selected data from the dataset
-        into memory.
-
-        Parameters
-        ----------
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        table : Table
-        """
+    ) -> lib.Table: ...
+
     def take(
         self,
         indices: Indices,
@@ -366,67 +103,8 @@ class Dataset(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Select rows of data by index.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            indices of rows to select in the dataset.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        table : Table
-        """
+    ) -> lib.Table: ...
+
     def head(
         self,
         num_rows: int,
@@ -439,67 +117,8 @@ class Dataset(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Load the first N rows of the dataset.
-
-        Parameters
-        ----------
-        num_rows : int
-            The number of rows to load.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        table : Table
-        """
+    ) -> lib.Table: ...
+
     def count_rows(
         self,
         filter: Expression | None = None,
@@ -510,82 +129,16 @@ class Dataset(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> int:
-        """
-        Count rows matching the scanner filter.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        count : int
-        """
+    ) -> int: ...
+
     @property
-    def schema(self) -> lib.Schema:
-        """The common schema of the full Dataset"""
-    def filter(self, expression: Expression) -> Self:
-        """
-        Apply a row filter to the dataset.
-
-        Parameters
-        ----------
-        expression : Expression
-            The filter that should be applied to the dataset.
-
-        Returns
-        -------
-        Dataset
-        """
-    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> InMemoryDataset:
-        """
-        Sort the Dataset by one or multiple columns.
-
-        Parameters
-        ----------
-        sorting : str or list[tuple(name, order)]
-            Name of the column to use to sort (ascending), or
-            a list of multiple sorting conditions where
-            each entry is a tuple with column name
-            and sorting order ("ascending" or "descending")
-        **kwargs : dict, optional
-            Additional sorting options.
-            As allowed by :class:`SortOptions`
-
-        Returns
-        -------
-        InMemoryDataset
-            A new dataset sorted according to the sort keys.
-        """
+    def schema(self) -> lib.Schema: ...
+
+    def filter(self, expression: Expression) -> Self: ...
+
+    def sort_by(self, sorting: str |
+                list[tuple[str, Order]], **kwargs) -> InMemoryDataset: ...
+
     def join(
         self,
         right_dataset: Dataset,
@@ -596,45 +149,8 @@ class Dataset(lib._Weakrefable):
         right_suffix: str | None = None,
         coalesce_keys: bool = True,
         use_threads: bool = True,
-    ) -> InMemoryDataset:
-        """
-        Perform a join between this dataset and another one.
-
-        Result of the join will be a new dataset, where further
-        operations can be applied.
-
-        Parameters
-        ----------
-        right_dataset : dataset
-            The dataset to join to the current one, acting as the right dataset
-            in the join operation.
-        keys : str or list[str]
-            The columns from current dataset that should be used as keys
-            of the join operation left side.
-        right_keys : str or list[str], default None
-            The columns from the right_dataset that should be used as keys
-            on the join operation right side.
-            When ``None`` use the same key names as the left dataset.
-        join_type : str, default "left outer"
-            The kind of join that should be performed, one of
-            ("left semi", "right semi", "left anti", "right anti",
-            "inner", "left outer", "right outer", "full outer")
-        left_suffix : str, default None
-            Which suffix to add to right column names. This prevents confusion
-            when the columns in left and right datasets have colliding names.
-        right_suffix : str, default None
-            Which suffix to add to the left column names. This prevents confusion
-            when the columns in left and right datasets have colliding names.
-        coalesce_keys : bool, default True
-            If the duplicated keys should be omitted from one of the sides
-            in the join result.
-        use_threads : bool, default True
-            Whenever to use multithreading or not.
-
-        Returns
-        -------
-        InMemoryDataset
-        """
+    ) -> InMemoryDataset: ...
+
     def join_asof(
         self,
         right_dataset: Dataset,
@@ -643,114 +159,20 @@ class Dataset(lib._Weakrefable):
         tolerance: int,
         right_on: str | list[str] | None = None,
         right_by: str | list[str] | None = None,
-    ) -> InMemoryDataset:
-        """
-        Perform an asof join between this dataset and another one.
-
-        This is similar to a left-join except that we match on nearest key rather
-        than equal keys. Both datasets must be sorted by the key. This type of join
-        is most useful for time series data that are not perfectly aligned.
-
-        Optionally match on equivalent keys with "by" before searching with "on".
-
-        Result of the join will be a new Dataset, where further
-        operations can be applied.
-
-        Parameters
-        ----------
-        right_dataset : dataset
-            The dataset to join to the current one, acting as the right dataset
-            in the join operation.
-        on : str
-            The column from current dataset that should be used as the "on" key
-            of the join operation left side.
-
-            An inexact match is used on the "on" key, i.e. a row is considered a
-            match if and only if left_on - tolerance <= right_on <= left_on.
-
-            The input table must be sorted by the "on" key. Must be a single
-            field of a common type.
-
-            Currently, the "on" key must be an integer, date, or timestamp type.
-        by : str or list[str]
-            The columns from current dataset that should be used as the keys
-            of the join operation left side. The join operation is then done
-            only for the matches in these columns.
-        tolerance : int
-            The tolerance for inexact "on" key matching. A right row is considered
-            a match with the left row `right.on - left.on <= tolerance`. The
-            `tolerance` may be:
-
-            - negative, in which case a past-as-of-join occurs;
-            - or positive, in which case a future-as-of-join occurs;
-            - or zero, in which case an exact-as-of-join occurs.
-
-            The tolerance is interpreted in the same units as the "on" key.
-        right_on : str or list[str], default None
-            The columns from the right_dataset that should be used as the on key
-            on the join operation right side.
-            When ``None`` use the same key name as the left dataset.
-        right_by : str or list[str], default None
-            The columns from the right_dataset that should be used as by keys
-            on the join operation right side.
-            When ``None`` use the same key names as the left dataset.
-
-        Returns
-        -------
-        InMemoryDataset
-        """
+    ) -> InMemoryDataset: ...
 
-class InMemoryDataset(Dataset):
-    """
-    A Dataset wrapping in-memory data.
-
-    Parameters
-    ----------
-    source : RecordBatch, Table, list, tuple
-        The data for this dataset. Can be a RecordBatch, Table, list of
-        RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
-        If an iterable is provided, the schema must also be provided.
-    schema : Schema, optional
-        Only required if passing an iterable as the source
-    """
 
-class UnionDataset(Dataset):
-    """
-    A Dataset wrapping child datasets.
+class InMemoryDataset(Dataset):
+    ...
 
-    Children's schemas must agree with the provided schema.
 
-    Parameters
-    ----------
-    schema : Schema
-        A known schema to conform to.
-    children : list of Dataset
-        One or more input children
-    """
+class UnionDataset(Dataset):
 
     @property
     def children(self) -> list[Dataset]: ...
 
+
 class FileSystemDataset(Dataset):
-    """
-    A Dataset of file fragments.
-
-    A FileSystemDataset is composed of one or more FileFragment.
-
-    Parameters
-    ----------
-    fragments : list[Fragments]
-        List of fragments to consume.
-    schema : Schema
-        The top-level schema of the Dataset.
-    format : FileFormat
-        File format of the fragments, currently only ParquetFileFormat,
-        IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
-    filesystem : FileSystem
-        FileSystem of the fragments.
-    root_partition : Expression, optional
-        The top-level partition of the DataDataset.
-    """
 
     def __init__(
         self,
@@ -760,6 +182,7 @@ class FileSystemDataset(Dataset):
         filesystem: SupportedFileSystem | None = None,
         root_partition: Expression | None = None,
     ) -> None: ...
+
     @classmethod
     def from_paths(
         cls,
@@ -769,69 +192,30 @@ class FileSystemDataset(Dataset):
         filesystem: SupportedFileSystem | None = None,
         partitions: list[Expression] | None = None,
         root_partition: Expression | None = None,
-    ) -> FileSystemDataset:
-        """
-        A Dataset created from a list of paths on a particular filesystem.
-
-        Parameters
-        ----------
-        paths : list of str
-            List of file paths to create the fragments from.
-        schema : Schema
-            The top-level schema of the DataDataset.
-        format : FileFormat
-            File format to create fragments from, currently only
-            ParquetFileFormat, IpcFileFormat, CsvFileFormat, and JsonFileFormat are supported.
-        filesystem : FileSystem
-            The filesystem which files are from.
-        partitions : list[Expression], optional
-            Attach additional partition information for the file paths.
-        root_partition : Expression, optional
-            The top-level partition of the DataDataset.
-        """
+    ) -> FileSystemDataset: ...
+
     @property
     def filesystem(self) -> FileSystem: ...
     @property
-    def partitioning(self) -> Partitioning | None:
-        """
-        The partitioning of the Dataset source, if discovered.
-
-        If the FileSystemDataset is created using the ``dataset()`` factory
-        function with a partitioning specified, this will return the
-        finalized Partitioning object from the dataset discovery. In all
-        other cases, this returns None.
-        """
+    def partitioning(self) -> Partitioning | None: ...
+
     @property
-    def files(self) -> list[str]:
-        """List of the files"""
+    def files(self) -> list[str]: ...
+
     @property
-    def format(self) -> FileFormat:
-        """The FileFormat of this source."""
+    def format(self) -> FileFormat: ...
+
 
 class FileWriteOptions(lib._Weakrefable):
     @property
     def format(self) -> FileFormat: ...
 
+
 class FileFormat(lib._Weakrefable):
     def inspect(
         self, file: StrPath | IO, filesystem: SupportedFileSystem | None = None
-    ) -> lib.Schema:
-        """
-        Infer the schema of a file.
-
-        Parameters
-        ----------
-        file : file-like object, path-like or str
-            The file or file path to infer a schema from.
-        filesystem : Filesystem, optional
-            If `filesystem` is given, `file` must be a string and specifies
-            the path of the file to read from the filesystem.
-
-        Returns
-        -------
-        schema : Schema
-            The schema inferred from the file
-        """
+    ) -> lib.Schema: ...
+
     def make_fragment(
         self,
         file: StrPath | IO,
@@ -839,29 +223,8 @@ class FileFormat(lib._Weakrefable):
         partition_expression: Expression | None = None,
         *,
         file_size: int | None = None,
-    ) -> Fragment:
-        """
-        Make a FileFragment from a given file.
-
-        Parameters
-        ----------
-        file : file-like object, path-like or str
-            The file or file path to make a fragment from.
-        filesystem : Filesystem, optional
-            If `filesystem` is given, `file` must be a string and specifies
-            the path of the file to read from the filesystem.
-        partition_expression : Expression, optional
-            An expression that is guaranteed true for all rows in the fragment.  Allows
-            fragment to be potentially skipped while scanning with a filter.
-        file_size : int, optional
-            The size of the file in bytes. Can improve performance with high-latency filesystems
-            when file size needs to be known before reading.
-
-        Returns
-        -------
-        fragment : Fragment
-            The file fragment
-        """
+    ) -> Fragment: ...
+
     def make_write_options(self) -> FileWriteOptions: ...
     @property
     def default_extname(self) -> str: ...
@@ -870,17 +233,15 @@ class FileFormat(lib._Weakrefable):
     @default_fragment_scan_options.setter
     def default_fragment_scan_options(self, options: FragmentScanOptions) -> None: ...
 
+
 class Fragment(lib._Weakrefable):
-    """Fragment of data from a Dataset."""
+
     @property
-    def physical_schema(self) -> lib.Schema:
-        """Return the physical schema of this Fragment. This schema can be
-        different from the dataset read schema."""
+    def physical_schema(self) -> lib.Schema: ...
+
     @property
-    def partition_expression(self) -> Expression:
-        """An Expression which evaluates to true for all data viewed by this
-        Fragment.
-        """
+    def partition_expression(self) -> Expression: ...
+
     def scanner(
         self,
         schema: lib.Schema | None = None,
@@ -893,73 +254,8 @@ class Fragment(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner:
-        """
-        Build a scan operation against the fragment.
-
-        Data is not loaded immediately. Instead, this produces a Scanner,
-        which exposes further operations (e.g. loading all data as a
-        table, counting rows).
-
-        Parameters
-        ----------
-        schema : Schema
-            Schema to use for scanning. This is used to unify a Fragment to
-            its Dataset's schema. If not specified this will use the
-            Fragment's physical schema which might differ for each Fragment.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        scanner : Scanner
-        """
+    ) -> Scanner: ...
+
     def to_batches(
         self,
         schema: lib.Schema | None = None,
@@ -972,67 +268,8 @@ class Fragment(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> Iterator[lib.RecordBatch]:
-        """
-        Read the fragment as materialized record batches.
-
-        Parameters
-        ----------
-        schema : Schema, optional
-            Concrete schema to use for scanning.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
+    ) -> Iterator[lib.RecordBatch]: ...
+
     def to_table(
         self,
         schema: lib.Schema | None = None,
@@ -1045,70 +282,8 @@ class Fragment(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Convert this Fragment into a Table.
-
-        Use this convenience utility with care. This will serially materialize
-        the Scan result in memory before creating the Table.
-
-        Parameters
-        ----------
-        schema : Schema, optional
-            Concrete schema to use for scanning.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        table : Table
-        """
+    ) -> lib.Table: ...
+
     def take(
         self,
         indices: Indices,
@@ -1121,67 +296,8 @@ class Fragment(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Select rows of data by index.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            The indices of row to select in the dataset.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        Table
-        """
+    ) -> lib.Table: ...
+
     def head(
         self,
         num_rows: int,
@@ -1194,67 +310,8 @@ class Fragment(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> lib.Table:
-        """
-        Load the first N rows of the fragment.
-
-        Parameters
-        ----------
-        num_rows : int
-            The number of rows to load.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        Table
-        """
+    ) -> lib.Table: ...
+
     def count_rows(
         self,
         columns: list[str] | None = None,
@@ -1266,113 +323,52 @@ class Fragment(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> int:
-        """
-        Count rows matching the scanner filter.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-
-        Returns
-        -------
-        count : int
-        """
+    ) -> int: ...
+
 
 class FileFragment(Fragment):
-    """A Fragment representing a data file."""
 
-    def open(self) -> lib.NativeFile:
-        """
-        Open a NativeFile of the buffer or file viewed by this fragment.
-        """
+    def open(self) -> lib.NativeFile: ...
+
     @property
-    def path(self) -> str:
-        """
-        The path of the data file viewed by this fragment, if it views a
-        file. If instead it views a buffer, this will be "<Buffer>".
-        """
+    def path(self) -> str: ...
+
     @property
-    def filesystem(self) -> FileSystem:
-        """
-        The FileSystem containing the data file viewed by this fragment, if
-        it views a file. If instead it views a buffer, this will be None.
-        """
+    def filesystem(self) -> FileSystem: ...
+
     @property
-    def buffer(self) -> lib.Buffer:
-        """
-        The buffer viewed by this fragment, if it views a buffer. If
-        instead it views a file, this will be None.
-        """
+    def buffer(self) -> lib.Buffer: ...
+
     @property
-    def format(self) -> FileFormat:
-        """
-        The format of the data file viewed by this fragment.
-        """
+    def format(self) -> FileFormat: ...
+
 
 class FragmentScanOptions(lib._Weakrefable):
-    """Scan options specific to a particular fragment and scan operation."""
 
     @property
     def type_name(self) -> str: ...
 
+
 class IpcFileWriteOptions(FileWriteOptions):
     @property
     def write_options(self) -> IpcWriteOptions: ...
     @write_options.setter
     def write_options(self, write_options: IpcWriteOptions) -> None: ...
 
+
 class IpcFileFormat(FileFormat):
     def equals(self, other: IpcFileFormat) -> bool: ...
     def make_write_options(self, **kwargs) -> IpcFileWriteOptions: ...
     @property
     def default_extname(self) -> str: ...
 
-class FeatherFileFormat(IpcFileFormat): ...
+
+class FeatherFileFormat(IpcFileFormat):
+    ...
+
 
 class CsvFileFormat(FileFormat):
-    """
-    FileFormat for CSV files.
-
-    Parameters
-    ----------
-    parse_options : pyarrow.csv.ParseOptions
-        Options regarding CSV parsing.
-    default_fragment_scan_options : CsvFragmentScanOptions
-        Default options for fragments scan.
-    convert_options : pyarrow.csv.ConvertOptions
-        Options regarding value conversion.
-    read_options : pyarrow.csv.ReadOptions
-        General read options.
-    """
+
     def __init__(
         self,
         parse_options: csv.ParseOptions | None = None,
@@ -1387,17 +383,8 @@ class CsvFileFormat(FileFormat):
     def parse_options(self, parse_options: csv.ParseOptions) -> None: ...
     def equals(self, other: CsvFileFormat) -> bool: ...
 
-class CsvFragmentScanOptions(FragmentScanOptions):
-    """
-    Scan-specific options for CSV fragments.
 
-    Parameters
-    ----------
-    convert_options : pyarrow.csv.ConvertOptions
-        Options regarding value conversion.
-    read_options : pyarrow.csv.ReadOptions
-        General read options.
-    """
+class CsvFragmentScanOptions(FragmentScanOptions):
 
     convert_options: csv.ConvertOptions
     read_options: csv.ReadOptions
@@ -1407,22 +394,13 @@ class CsvFragmentScanOptions(FragmentScanOptions):
     ) -> None: ...
     def equals(self, other: CsvFragmentScanOptions) -> bool: ...
 
+
 class CsvFileWriteOptions(FileWriteOptions):
     write_options: csv.WriteOptions
 
+
 class JsonFileFormat(FileFormat):
-    """
-    FileFormat for JSON files.
-
-    Parameters
-    ----------
-    default_fragment_scan_options : JsonFragmentScanOptions
-        Default options for fragments scan.
-    parse_options : pyarrow.json.ParseOptions
-        Options regarding json parsing.
-    read_options : pyarrow.json.ReadOptions
-        General read options.
-    """
+
     def __init__(
         self,
         default_fragment_scan_options: JsonFragmentScanOptions | None = None,
@@ -1431,118 +409,38 @@ class JsonFileFormat(FileFormat):
     ) -> None: ...
     def equals(self, other: JsonFileFormat) -> bool: ...
 
-class JsonFragmentScanOptions(FragmentScanOptions):
-    """
-    Scan-specific options for JSON fragments.
 
-    Parameters
-    ----------
-    parse_options : pyarrow.json.ParseOptions
-        Options regarding JSON parsing.
-    read_options : pyarrow.json.ReadOptions
-        General read options.
-    """
+class JsonFragmentScanOptions(FragmentScanOptions):
 
     parse_options: _json.ParseOptions
     read_options: _json.ReadOptions
+
     def __init__(
         self, parse_options: _json.ParseOptions, read_options: _json.ReadOptions
     ) -> None: ...
     def equals(self, other: JsonFragmentScanOptions) -> bool: ...
 
+
 class Partitioning(lib._Weakrefable):
-    def parse(self, path: str) -> Expression:
-        """
-        Parse a path into a partition expression.
-
-        Parameters
-        ----------
-        path : str
-
-        Returns
-        -------
-        pyarrow.dataset.Expression
-        """
-    def format(self, expr: Expression) -> tuple[str, str]:
-        """
-        Convert a filter expression into a tuple of (directory, filename) using
-        the current partitioning scheme
-
-        Parameters
-        ----------
-        expr : pyarrow.dataset.Expression
-
-        Returns
-        -------
-        tuple[str, str]
-
-        Examples
-        --------
-
-        Specify the Schema for paths like "/2009/June":
-
-        >>> import pyarrow as pa
-        >>> import pyarrow.dataset as ds
-        >>> import pyarrow.compute as pc
-        >>> part = ds.partitioning(pa.schema([("year", pa.int16()), ("month", pa.string())]))
-        >>> part.format((pc.field("year") == 1862) & (pc.field("month") == "Jan"))
-        ('1862/Jan', '')
-        """
+    def parse(self, path: str) -> Expression: ...
+
+    def format(self, expr: Expression) -> tuple[str, str]: ...
+
     @property
-    def schema(self) -> lib.Schema:
-        """The arrow Schema attached to the partitioning."""
+    def schema(self) -> lib.Schema: ...
+
 
 class PartitioningFactory(lib._Weakrefable):
     @property
     def type_name(self) -> str: ...
 
+
 class KeyValuePartitioning(Partitioning):
     @property
-    def dictionaries(self) -> list[lib.Array | None]:
-        """
-        The unique values for each partition field, if available.
+    def dictionaries(self) -> list[lib.Array | None]: ...
 
-        Those values are only available if the Partitioning object was
-        created through dataset discovery from a PartitioningFactory, or
-        if the dictionaries were manually specified in the constructor.
-        If no dictionary field is available, this returns an empty list.
-        """
 
 class DirectoryPartitioning(KeyValuePartitioning):
-    """
-    A Partitioning based on a specified Schema.
-
-    The DirectoryPartitioning expects one segment in the file path for each
-    field in the schema (all fields are required to be present).
-    For example given schema<year:int16, month:int8> the path "/2009/11" would
-    be parsed to ("year"_ == 2009 and "month"_ == 11).
-
-    Parameters
-    ----------
-    schema : Schema
-        The schema that describes the partitions present in the file path.
-    dictionaries : dict[str, Array]
-        If the type of any field of `schema` is a dictionary type, the
-        corresponding entry of `dictionaries` must be an array containing
-        every value which may be taken by the corresponding column or an
-        error will be raised in parsing.
-    segment_encoding : str, default "uri"
-        After splitting paths into segments, decode the segments. Valid
-        values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-    Returns
-    -------
-    DirectoryPartitioning
-
-    Examples
-    --------
-    >>> from pyarrow.dataset import DirectoryPartitioning
-    >>> partitioning = DirectoryPartitioning(
-    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
-    ... )
-    >>> print(partitioning.parse("/2009/11/"))
-    ((year == 2009) and (month == 11))
-    """
 
     @staticmethod
     def discover(
@@ -1551,38 +449,8 @@ class DirectoryPartitioning(KeyValuePartitioning):
         max_partition_dictionary_size: int = 0,
         schema: lib.Schema | None = None,
         segment_encoding: Literal["uri", "none"] = "uri",
-    ) -> PartitioningFactory:
-        """
-        Discover a DirectoryPartitioning.
-
-        Parameters
-        ----------
-        field_names : list of str
-            The names to associate with the values from the subdirectory names.
-            If schema is given, will be populated from the schema.
-        infer_dictionary : bool, default False
-            When inferring a schema for partition fields, yield dictionary
-            encoded types instead of plain types. This can be more efficient
-            when materializing virtual columns, and Expressions parsed by the
-            finished Partitioning will include dictionaries of all unique
-            inspected values for each field.
-        max_partition_dictionary_size : int, default 0
-            Synonymous with infer_dictionary for backwards compatibility with
-            1.0: setting this to -1 or None is equivalent to passing
-            infer_dictionary=True.
-        schema : Schema, default None
-            Use this schema instead of inferring a schema from partition
-            values. Partition values will be validated against this schema
-            before accumulation into the Partitioning's dictionary.
-        segment_encoding : str, default "uri"
-            After splitting paths into segments, decode the segments. Valid
-            values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-        Returns
-        -------
-        PartitioningFactory
-            To be used in the FileSystemFactoryOptions.
-        """
+    ) -> PartitioningFactory: ...
+
     def __init__(
         self,
         schema: lib.Schema,
@@ -1590,47 +458,9 @@ class DirectoryPartitioning(KeyValuePartitioning):
         segment_encoding: Literal["uri", "none"] = "uri",
     ) -> None: ...
 
+
 class HivePartitioning(KeyValuePartitioning):
-    """
-    A Partitioning for "/$key=$value/" nested directories as found in
-    Apache Hive.
-
-    Multi-level, directory based partitioning scheme originating from
-    Apache Hive with all data files stored in the leaf directories. Data is
-    partitioned by static values of a particular column in the schema.
-    Partition keys are represented in the form $key=$value in directory names.
-    Field order is ignored, as are missing or unrecognized field names.
-
-    For example, given schema<year:int16, month:int8, day:int8>, a possible
-    path would be "/year=2009/month=11/day=15".
-
-    Parameters
-    ----------
-    schema : Schema
-        The schema that describes the partitions present in the file path.
-    dictionaries : dict[str, Array]
-        If the type of any field of `schema` is a dictionary type, the
-        corresponding entry of `dictionaries` must be an array containing
-        every value which may be taken by the corresponding column or an
-        error will be raised in parsing.
-    null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
-        If any field is None then this fallback will be used as a label
-    segment_encoding : str, default "uri"
-        After splitting paths into segments, decode the segments. Valid
-        values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-    Returns
-    -------
-    HivePartitioning
-
-    Examples
-    --------
-    >>> from pyarrow.dataset import HivePartitioning
-    >>> partitioning = HivePartitioning(pa.schema([("year", pa.int16()), ("month", pa.int8())]))
-    >>> print(partitioning.parse("/year=2009/month=11/"))
-    ((year == 2009) and (month == 11))
-
-    """
+
     def __init__(
         self,
         schema: lib.Schema,
@@ -1638,6 +468,7 @@ class HivePartitioning(KeyValuePartitioning):
         null_fallback: str = "__HIVE_DEFAULT_PARTITION__",
         segment_encoding: Literal["uri", "none"] = "uri",
     ) -> None: ...
+
     @staticmethod
     def discover(
         infer_dictionary: bool = False,
@@ -1645,75 +476,10 @@ class HivePartitioning(KeyValuePartitioning):
         null_fallback="__HIVE_DEFAULT_PARTITION__",
         schema: lib.Schema | None = None,
         segment_encoding: Literal["uri", "none"] = "uri",
-    ) -> PartitioningFactory:
-        """
-        Discover a HivePartitioning.
-
-        Parameters
-        ----------
-        infer_dictionary : bool, default False
-            When inferring a schema for partition fields, yield dictionary
-            encoded types instead of plain. This can be more efficient when
-            materializing virtual columns, and Expressions parsed by the
-            finished Partitioning will include dictionaries of all unique
-            inspected values for each field.
-        max_partition_dictionary_size : int, default 0
-            Synonymous with infer_dictionary for backwards compatibility with
-            1.0: setting this to -1 or None is equivalent to passing
-            infer_dictionary=True.
-        null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
-            When inferring a schema for partition fields this value will be
-            replaced by null.  The default is set to __HIVE_DEFAULT_PARTITION__
-            for compatibility with Spark
-        schema : Schema, default None
-            Use this schema instead of inferring a schema from partition
-            values. Partition values will be validated against this schema
-            before accumulation into the Partitioning's dictionary.
-        segment_encoding : str, default "uri"
-            After splitting paths into segments, decode the segments. Valid
-            values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-        Returns
-        -------
-        PartitioningFactory
-            To be used in the FileSystemFactoryOptions.
-        """
+    ) -> PartitioningFactory: ...
+
 
 class FilenamePartitioning(KeyValuePartitioning):
-    """
-    A Partitioning based on a specified Schema.
-
-    The FilenamePartitioning expects one segment in the file name for each
-    field in the schema (all fields are required to be present) separated
-    by '_'. For example given schema<year:int16, month:int8> the name
-    ``"2009_11_"`` would be parsed to ("year" == 2009 and "month" == 11).
-
-    Parameters
-    ----------
-    schema : Schema
-        The schema that describes the partitions present in the file path.
-    dictionaries : dict[str, Array]
-        If the type of any field of `schema` is a dictionary type, the
-        corresponding entry of `dictionaries` must be an array containing
-        every value which may be taken by the corresponding column or an
-        error will be raised in parsing.
-    segment_encoding : str, default "uri"
-        After splitting paths into segments, decode the segments. Valid
-        values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-    Returns
-    -------
-    FilenamePartitioning
-
-    Examples
-    --------
-    >>> from pyarrow.dataset import FilenamePartitioning
-    >>> partitioning = FilenamePartitioning(
-    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())])
-    ... )
-    >>> print(partitioning.parse("2009_11_data.parquet"))
-    ((year == 2009) and (month == 11))
-    """
 
     def __init__(
         self,
@@ -1721,99 +487,27 @@ class FilenamePartitioning(KeyValuePartitioning):
         dictionaries: dict[str, lib.Array] | None = None,
         segment_encoding: Literal["uri", "none"] = "uri",
     ) -> None: ...
+
     @staticmethod
     def discover(
         field_names: list[str] | None = None,
         infer_dictionary: bool = False,
         schema: lib.Schema | None = None,
         segment_encoding: Literal["uri", "none"] = "uri",
-    ) -> PartitioningFactory:
-        """
-        Discover a FilenamePartitioning.
-
-        Parameters
-        ----------
-        field_names : list of str
-            The names to associate with the values from the subdirectory names.
-            If schema is given, will be populated from the schema.
-        infer_dictionary : bool, default False
-            When inferring a schema for partition fields, yield dictionary
-            encoded types instead of plain types. This can be more efficient
-            when materializing virtual columns, and Expressions parsed by the
-            finished Partitioning will include dictionaries of all unique
-            inspected values for each field.
-        schema : Schema, default None
-            Use this schema instead of inferring a schema from partition
-            values. Partition values will be validated against this schema
-            before accumulation into the Partitioning's dictionary.
-        segment_encoding : str, default "uri"
-            After splitting paths into segments, decode the segments. Valid
-            values are "uri" (URI-decode segments) and "none" (leave as-is).
-
-        Returns
-        -------
-        PartitioningFactory
-            To be used in the FileSystemFactoryOptions.
-        """
+    ) -> PartitioningFactory: ...
+
 
 class DatasetFactory(lib._Weakrefable):
-    """
-    DatasetFactory is used to create a Dataset, inspect the Schema
-    of the fragments contained in it, and declare a partitioning.
-    """
 
     root_partition: Expression
-    def finish(self, schema: lib.Schema | None = None) -> Dataset:
-        """
-        Create a Dataset using the inspected schema or an explicit schema
-        (if given).
-
-        Parameters
-        ----------
-        schema : Schema, default None
-            The schema to conform the source to.  If None, the inspected
-            schema is used.
-
-        Returns
-        -------
-        Dataset
-        """
-    def inspect(self) -> lib.Schema:
-        """
-        Inspect all data fragments and return a common Schema.
-
-        Returns
-        -------
-        Schema
-        """
+    def finish(self, schema: lib.Schema | None = None) -> Dataset: ...
+
+    def inspect(self) -> lib.Schema: ...
+
     def inspect_schemas(self) -> list[lib.Schema]: ...
 
+
 class FileSystemFactoryOptions(lib._Weakrefable):
-    """
-    Influences the discovery of filesystem paths.
-
-    Parameters
-    ----------
-    partition_base_dir : str, optional
-        For the purposes of applying the partitioning, paths will be
-        stripped of the partition_base_dir. Files not matching the
-        partition_base_dir prefix will be skipped for partitioning discovery.
-        The ignored files will still be part of the Dataset, but will not
-        have partition information.
-    partitioning : Partitioning/PartitioningFactory, optional
-       Apply the Partitioning to every discovered Fragment. See Partitioning or
-       PartitioningFactory documentation.
-    exclude_invalid_files : bool, optional (default True)
-        If True, invalid files will be excluded (file format specific check).
-        This will incur IO for each files in a serial and single threaded
-        fashion. Disabling this feature will skip the IO, but unsupported
-        files may be present in the Dataset (resulting in an error at scan
-        time).
-    selector_ignore_prefixes : list, optional
-        When discovering from a Selector (and not from an explicit file list),
-        ignore files and directories matching any of these prefixes.
-        By default this is ['.', '_'].
-    """
 
     partitioning: Partitioning
     partitioning_factory: PartitioningFactory
@@ -1829,21 +523,8 @@ class FileSystemFactoryOptions(lib._Weakrefable):
         selector_ignore_prefixes: list[str] | None = None,
     ) -> None: ...
 
+
 class FileSystemDatasetFactory(DatasetFactory):
-    """
-    Create a DatasetFactory from a list of paths with schema inspection.
-
-    Parameters
-    ----------
-    filesystem : pyarrow.fs.FileSystem
-        Filesystem to discover.
-    paths_or_selector : pyarrow.fs.FileSelector or list of path-likes
-        Either a Selector object or a list of path-like objects.
-    format : FileFormat
-        Currently only ParquetFileFormat and IpcFileFormat are supported.
-    options : FileSystemFactoryOptions, optional
-        Various flags influencing the discovery of filesystem paths.
-    """
 
     def __init__(
         self,
@@ -1853,50 +534,35 @@ class FileSystemDatasetFactory(DatasetFactory):
         options: FileSystemFactoryOptions | None = None,
     ) -> None: ...
 
+
 class UnionDatasetFactory(DatasetFactory):
-    """
-    Provides a way to inspect/discover a Dataset's expected schema before
-    materialization.
-
-    Parameters
-    ----------
-    factories : list of DatasetFactory
-    """
+
     def __init__(self, factories: list[DatasetFactory]) -> None: ...
 
+
 _RecordBatchT = TypeVar("_RecordBatchT", bound=lib.RecordBatch)
 
+
 class RecordBatchIterator(lib._Weakrefable, Generic[_RecordBatchT]):
-    """An iterator over a sequence of record batches."""
+
     def __iter__(self) -> Self: ...
     def __next__(self) -> _RecordBatchT: ...
 
-class TaggedRecordBatch(NamedTuple):
-    """
-    A combination of a record batch and the fragment it came from.
 
-    Parameters
-    ----------
-    record_batch : RecordBatch
-        The record batch.
-    fragment : Fragment
-        Fragment of the record batch.
-    """
+class TaggedRecordBatch(NamedTuple):
 
     record_batch: lib.RecordBatch
     fragment: Fragment
 
+
 class TaggedRecordBatchIterator(lib._Weakrefable):
-    """An iterator over a sequence of record batches with fragments."""
+
     def __iter__(self) -> Self: ...
     def __next__(self) -> TaggedRecordBatch: ...
 
+
 class Scanner(lib._Weakrefable):
-    """A materialized scan operation with context and options bound.
 
-    A scanner is the class that glues the scan tasks, data fragments and data
-    sources together.
-    """
     @staticmethod
     def from_dataset(
         dataset: Dataset,
@@ -1910,63 +576,8 @@ class Scanner(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner:
-        """
-        Create Scanner from Dataset,
-
-        Parameters
-        ----------
-        dataset : Dataset
-            Dataset to scan.
-        columns : list[str] or dict[str, Expression], default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-        """
+    ) -> Scanner: ...
+
     @staticmethod
     def from_fragment(
         fragment: Fragment,
@@ -1981,65 +592,8 @@ class Scanner(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner:
-        """
-        Create Scanner from Fragment,
-
-        Parameters
-        ----------
-        fragment : Fragment
-            fragment to scan.
-        schema : Schema, optional
-            The schema of the fragment.
-        columns : list[str] or dict[str, Expression], default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-        """
+    ) -> Scanner: ...
+
     @staticmethod
     def from_batches(
         source: Iterator[lib.RecordBatch] | RecordBatchReader,
@@ -2054,196 +608,37 @@ class Scanner(lib._Weakrefable):
         use_threads: bool = True,
         cache_metadata: bool = True,
         memory_pool: lib.MemoryPool | None = None,
-    ) -> Scanner:
-        """
-        Create a Scanner from an iterator of batches.
-
-        This creates a scanner which can be used only once. It is
-        intended to support writing a dataset (which takes a scanner)
-        from a source which can be read only once (e.g. a
-        RecordBatchReader or generator).
-
-        Parameters
-        ----------
-        source : Iterator or Arrow-compatible stream object
-            The iterator of Batches. This can be a pyarrow RecordBatchReader,
-            any object that implements the Arrow PyCapsule Protocol for
-            streams, or an actual Python iterator of RecordBatches.
-        schema : Schema
-            The schema of the batches (required when passing a Python
-            iterator).
-        columns : list[str] or dict[str, Expression], default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-
-            The list of columns or expressions may use the special fields
-            `__batch_index` (the index of the batch within the fragment),
-            `__fragment_index` (the index of the fragment within the dataset),
-            `__last_in_fragment` (whether the batch is last in fragment), and
-            `__filename` (the name of the source file or a description of the
-            source fragment).
-
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 131_072
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        batch_readahead : int, default 16
-            The number of batches to read ahead in a file. This might not work
-            for all file formats. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_readahead : int, default 4
-            The number of files to read ahead. Increasing this number will increase
-            RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        cache_metadata : bool, default True
-            If enabled, metadata may be cached when scanning to speed up
-            repeated scans.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-        """
+    ) -> Scanner: ...
+
     @property
-    def dataset_schema(self) -> lib.Schema:
-        """The schema with which batches will be read from fragments."""
+    def dataset_schema(self) -> lib.Schema: ...
+
     @property
-    def projected_schema(self) -> lib.Schema:
-        """
-        The materialized schema of the data, accounting for projections.
-
-        This is the schema of any data returned from the scanner.
-        """
-    def to_batches(self) -> Iterator[lib.RecordBatch]:
-        """
-        Consume a Scanner in record batches.
-
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
-    def scan_batches(self) -> TaggedRecordBatchIterator:
-        """
-        Consume a Scanner in record batches with corresponding fragments.
-
-        Returns
-        -------
-        record_batches : iterator of TaggedRecordBatch
-        """
-    def to_table(self) -> lib.Table:
-        """
-        Convert a Scanner into a Table.
-
-        Use this convenience utility with care. This will serially materialize
-        the Scan result in memory before creating the Table.
-
-        Returns
-        -------
-        Table
-        """
-    def take(self, indices: Indices) -> lib.Table:
-        """
-        Select rows of data by index.
-
-        Will only consume as many batches of the underlying dataset as
-        needed. Otherwise, this is equivalent to
-        ``to_table().take(indices)``.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            indices of rows to select in the dataset.
-
-        Returns
-        -------
-        Table
-        """
-    def head(self, num_rows: int) -> lib.Table:
-        """
-        Load the first N rows of the dataset.
-
-        Parameters
-        ----------
-        num_rows : int
-            The number of rows to load.
-
-        Returns
-        -------
-        Table
-        """
-    def count_rows(self) -> int:
-        """
-        Count rows matching the scanner filter.
-
-        Returns
-        -------
-        count : int
-        """
-    def to_reader(self) -> RecordBatchReader:
-        """Consume this scanner as a RecordBatchReader.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
-
-def get_partition_keys(partition_expression: Expression) -> dict[str, Any]:
-    """
-    Extract partition keys (equality constraints between a field and a scalar)
-    from an expression as a dict mapping the field's name to its value.
-
-    NB: All expressions yielded by a HivePartitioning or DirectoryPartitioning
-    will be conjunctions of equality conditions and are accessible through this
-    function. Other subexpressions will be ignored.
-
-    Parameters
-    ----------
-    partition_expression : pyarrow.dataset.Expression
-
-    Returns
-    -------
-    dict
-
-    Examples
-    --------
-
-    For example, an expression of
-    <pyarrow.dataset.Expression ((part == A:string) and (year == 2016:int32))>
-    is converted to {'part': 'A', 'year': 2016}
-    """
+    def projected_schema(self) -> lib.Schema: ...
+
+    def to_batches(self) -> Iterator[lib.RecordBatch]: ...
+
+    def scan_batches(self) -> TaggedRecordBatchIterator: ...
+
+    def to_table(self) -> lib.Table: ...
+
+    def take(self, indices: Indices) -> lib.Table: ...
+
+    def head(self, num_rows: int) -> lib.Table: ...
+
+    def count_rows(self) -> int: ...
+
+    def to_reader(self) -> RecordBatchReader: ...
+
+
+def get_partition_keys(partition_expression: Expression) -> dict[str, Any]: ...
+
 
 class WrittenFile(lib._Weakrefable):
-    """
-    Metadata information about files written as
-    part of a dataset write operation
-
-    Parameters
-    ----------
-    path : str
-        Path to the file.
-    metadata : pyarrow.parquet.FileMetaData, optional
-        For Parquet files, the Parquet file metadata.
-    size : int
-        The size of the file in bytes.
-    """
-    def __init__(self, path: str, metadata: _parquet.FileMetaData | None, size: int) -> None: ...
+
+    def __init__(self, path: str, metadata: _parquet.FileMetaData |
+                 None, size: int) -> None: ...
+
 
 def _filesystemdataset_write(
     data: Scanner,
@@ -2262,37 +657,12 @@ def _filesystemdataset_write(
     create_dir: bool,
 ): ...
 
+
 class _ScanNodeOptions(ExecNodeOptions):
     def _set_options(self, dataset: Dataset, scan_options: dict) -> None: ...
 
+
 class ScanNodeOptions(_ScanNodeOptions):
-    """
-    A Source node which yields batches from a Dataset scan.
-
-    This is the option class for the "scan" node factory.
-
-    This node is capable of applying pushdown projections or filters
-    to the file readers which reduce the amount of data that needs to
-    be read (if supported by the file format). But note that this does not
-    construct associated filter or project nodes to perform the final
-    filtering or projection. Rather, you may supply the same filter
-    expression or projection to the scan node that you also supply
-    to the filter or project node.
-
-    Yielded batches will be augmented with fragment/batch indices when
-    implicit_ordering=True to enable stable ordering for simple ExecPlans.
-
-    Parameters
-    ----------
-    dataset : pyarrow.dataset.Dataset
-        The table which acts as the data source.
-    **kwargs : dict, optional
-        Scan options. See `Scanner.from_dataset` for possible arguments.
-    require_sequenced_output : bool, default False
-        Batches are yielded sequentially, like single-threaded
-    implicit_ordering : bool, default False
-        Preserve implicit ordering of data.
-    """
 
     def __init__(
         self, dataset: Dataset, require_sequenced_output: bool = False, **kwargs
diff --git a/python/pyarrow-stubs/_dataset_orc.pyi b/python/pyarrow-stubs/_dataset_orc.pyi
index d4e5784750f..62f49bf5d30 100644
--- a/python/pyarrow-stubs/_dataset_orc.pyi
+++ b/python/pyarrow-stubs/_dataset_orc.pyi
@@ -17,6 +17,7 @@
 
 from ._dataset import FileFormat
 
+
 class OrcFileFormat(FileFormat):
     def equals(self, other: OrcFileFormat) -> bool: ...
     @property
diff --git a/python/pyarrow-stubs/_dataset_parquet.pyi b/python/pyarrow-stubs/_dataset_parquet.pyi
index 007d3404a18..df9536ef725 100644
--- a/python/pyarrow-stubs/_dataset_parquet.pyi
+++ b/python/pyarrow-stubs/_dataset_parquet.pyi
@@ -38,19 +38,9 @@ from .lib import CacheOptions, Schema, _Weakrefable
 
 parquet_encryption_enabled: bool
 
+
 class ParquetFileFormat(FileFormat):
-    """
-    FileFormat for Parquet
-
-    Parameters
-    ----------
-    read_options : ParquetReadOptions
-        Read options for the file.
-    default_fragment_scan_options : ParquetFragmentScanOptions
-        Scan Options for the file.
-    **kwargs : dict
-        Additional options for read option or scan option
-    """
+
     def __init__(
         self,
         read_options: ParquetReadOptions | None = None,
@@ -59,10 +49,13 @@ class ParquetFileFormat(FileFormat):
     ) -> None: ...
     @property
     def read_options(self) -> ParquetReadOptions: ...
-    def make_write_options(self) -> ParquetFileWriteOptions: ...  # type: ignore[override]
+    def make_write_options(
+        self) -> ParquetFileWriteOptions: ...  # type: ignore[override]
+
     def equals(self, other: ParquetFileFormat) -> bool: ...
     @property
     def default_extname(self) -> str: ...
+
     def make_fragment(
         self,
         file: StrPath | IO,
@@ -71,49 +64,15 @@ class ParquetFileFormat(FileFormat):
         row_groups: Iterable[int] | None = None,
         *,
         file_size: int | None = None,
-    ) -> Fragment:
-        """
-        Make a FileFragment from a given file.
-
-        Parameters
-        ----------
-        file : file-like object, path-like or str
-            The file or file path to make a fragment from.
-        filesystem : Filesystem, optional
-            If `filesystem` is given, `file` must be a string and specifies
-            the path of the file to read from the filesystem.
-        partition_expression : Expression, optional
-            An expression that is guaranteed true for all rows in the fragment.  Allows
-            fragment to be potentially skipped while scanning with a filter.
-        row_groups : Iterable, optional
-            The indices of the row groups to include
-        file_size : int, optional
-            The size of the file in bytes. Can improve performance with high-latency filesystems
-            when file size needs to be known before reading.
-
-        Returns
-        -------
-        fragment : Fragment
-            The file fragment
-        """
+    ) -> Fragment: ...
+
 
 class _NameStats(TypedDict):
     min: Any
     max: Any
 
+
 class RowGroupInfo:
-    """
-    A wrapper class for RowGroup information
-
-    Parameters
-    ----------
-    id : integer
-        The group ID.
-    metadata : FileMetaData
-        The rowgroup metadata.
-    schema : Schema
-        Schema of the rows.
-    """
 
     id: int
     metadata: FileMetaData
@@ -127,8 +86,8 @@ class RowGroupInfo:
     @property
     def statistics(self) -> dict[str, _NameStats]: ...
 
+
 class ParquetFileFragment(FileFragment):
-    """A Fragment representing a parquet file."""
 
     def ensure_complete_metadata(self) -> None: ...
     @property
@@ -136,79 +95,22 @@ class ParquetFileFragment(FileFragment):
     @property
     def metadata(self) -> FileMetaData: ...
     @property
-    def num_row_groups(self) -> int:
-        """
-        Return the number of row groups viewed by this fragment (not the
-        number of row groups in the origin file).
-        """
+    def num_row_groups(self) -> int: ...
+
     def split_by_row_group(
         self, filter: Expression | None = None, schema: Schema | None = None
-    ) -> list[Fragment]:
-        """
-        Split the fragment into multiple fragments.
-
-        Yield a Fragment wrapping each row group in this ParquetFileFragment.
-        Row groups will be excluded whose metadata contradicts the optional
-        filter.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Only include the row groups which satisfy this predicate (using
-            the Parquet RowGroup statistics).
-        schema : Schema, default None
-            Schema to use when filtering row groups. Defaults to the
-            Fragment's physical schema
-
-        Returns
-        -------
-        A list of Fragments
-        """
+    ) -> list[Fragment]: ...
+
     def subset(
         self,
         filter: Expression | None = None,
         schema: Schema | None = None,
         row_group_ids: list[int] | None = None,
-    ) -> ParquetFileFormat:
-        """
-        Create a subset of the fragment (viewing a subset of the row groups).
-
-        Subset can be specified by either a filter predicate (with optional
-        schema) or by a list of row group IDs. Note that when using a filter,
-        the resulting fragment can be empty (viewing no row groups).
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Only include the row groups which satisfy this predicate (using
-            the Parquet RowGroup statistics).
-        schema : Schema, default None
-            Schema to use when filtering row groups. Defaults to the
-            Fragment's physical schema
-        row_group_ids : list of ints
-            The row group IDs to include in the subset. Can only be specified
-            if `filter` is None.
-
-        Returns
-        -------
-        ParquetFileFragment
-        """
+    ) -> ParquetFileFormat: ...
+
 
 class ParquetReadOptions(_Weakrefable):
-    """
-    Parquet format specific options for reading.
-
-    Parameters
-    ----------
-    dictionary_columns : list of string, default None
-        Names of columns which should be dictionary encoded as
-        they are read
-    coerce_int96_timestamp_unit : str, default None
-        Cast timestamps that are stored in INT96 format to a particular
-        resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be inferred as timestamps
-        in nanoseconds
-    """
+
     def __init__(
         self, dictionary_columns: list[str] | None, coerce_int96_timestamp_unit: str | None = None
     ) -> None: ...
@@ -218,53 +120,16 @@ class ParquetReadOptions(_Weakrefable):
     def coerce_int96_timestamp_unit(self, unit: str) -> None: ...
     def equals(self, other: ParquetReadOptions) -> bool: ...
 
+
 class ParquetFileWriteOptions(FileWriteOptions):
     def update(self, **kwargs) -> None: ...
     def _set_properties(self) -> None: ...
     def _set_arrow_properties(self) -> None: ...
     def _set_encryption_config(self) -> None: ...
 
+
 @dataclass(kw_only=True)
 class ParquetFragmentScanOptions(FragmentScanOptions):
-    """
-    Scan-specific options for Parquet fragments.
-
-    Parameters
-    ----------
-    use_buffered_stream : bool, default False
-        Read files through buffered input streams rather than loading entire
-        row groups at once. This may be enabled to reduce memory overhead.
-        Disabled by default.
-    buffer_size : int, default 8192
-        Size of buffered stream, if enabled. Default is 8KB.
-    pre_buffer : bool, default True
-        If enabled, pre-buffer the raw Parquet data instead of issuing one
-        read per column chunk. This can improve performance on high-latency
-        filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
-        parallel using a background I/O thread pool.
-        Set to False if you want to prioritize minimal memory usage
-        over maximum speed.
-    cache_options : pyarrow.CacheOptions, default None
-        Cache options used when pre_buffer is enabled. The default values should
-        be good for most use cases. You may want to adjust these for example if
-        you have exceptionally high latency to the file system.
-    thrift_string_size_limit : int, default None
-        If not None, override the maximum total string size allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    thrift_container_size_limit : int, default None
-        If not None, override the maximum total size of containers allocated
-        when decoding Thrift structures. The default limit should be
-        sufficient for most Parquet files.
-    decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None
-        If not None, use the provided ParquetDecryptionConfig to decrypt the
-        Parquet file.
-    decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None
-        If not None, use the provided FileDecryptionProperties to decrypt encrypted
-        Parquet file.
-    page_checksum_verification : bool, default False
-        If True, verify the page checksum for each page read from the file.
-    """
 
     use_buffered_stream: bool = False
     buffer_size: int = 8192
@@ -278,50 +143,17 @@ class ParquetFragmentScanOptions(FragmentScanOptions):
 
     def equals(self, other: ParquetFragmentScanOptions) -> bool: ...
 
+
 @dataclass
 class ParquetFactoryOptions(_Weakrefable):
-    """
-    Influences the discovery of parquet dataset.
-
-    Parameters
-    ----------
-    partition_base_dir : str, optional
-        For the purposes of applying the partitioning, paths will be
-        stripped of the partition_base_dir. Files not matching the
-        partition_base_dir prefix will be skipped for partitioning discovery.
-        The ignored files will still be part of the Dataset, but will not
-        have partition information.
-    partitioning : Partitioning, PartitioningFactory, optional
-        The partitioning scheme applied to fragments, see ``Partitioning``.
-    validate_column_chunk_paths : bool, default False
-        Assert that all ColumnChunk paths are consistent. The parquet spec
-        allows for ColumnChunk data to be stored in multiple files, but
-        ParquetDatasetFactory supports only a single file with all ColumnChunk
-        data. If this flag is set construction of a ParquetDatasetFactory will
-        raise an error if ColumnChunk data is not resident in a single file.
-    """
 
     partition_base_dir: str | None = None
     partitioning: Partitioning | PartitioningFactory | None = None
     validate_column_chunk_paths: bool = False
 
+
 class ParquetDatasetFactory(DatasetFactory):
-    """
-    Create a ParquetDatasetFactory from a Parquet `_metadata` file.
-
-    Parameters
-    ----------
-    metadata_path : str
-        Path to the `_metadata` parquet metadata-only file generated with
-        `pyarrow.parquet.write_metadata`.
-    filesystem : pyarrow.fs.FileSystem
-        Filesystem to read the metadata_path from, and subsequent parquet
-        files.
-    format : ParquetFileFormat
-        Parquet format options.
-    options : ParquetFactoryOptions, optional
-        Various flags influencing the discovery of filesystem paths.
-    """
+
     def __init__(
         self,
         metadata_path: str,
diff --git a/python/pyarrow-stubs/_dataset_parquet_encryption.pyi b/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
index be40c0b39b3..d8338776481 100644
--- a/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
+++ b/python/pyarrow-stubs/_dataset_parquet_encryption.pyi
@@ -20,33 +20,9 @@ from ._parquet import FileDecryptionProperties
 from ._parquet_encryption import CryptoFactory, EncryptionConfiguration, KmsConnectionConfig
 from .lib import _Weakrefable
 
-class ParquetEncryptionConfig(_Weakrefable):
-    """
-    Core configuration class encapsulating parameters for high-level encryption
-    within the Parquet framework.
-
-    The ParquetEncryptionConfig class serves as a bridge for passing encryption-related
-    parameters to the appropriate components within the Parquet library. It maintains references
-    to objects that define the encryption strategy, Key Management Service (KMS) configuration,
-    and specific encryption configurations for Parquet data.
 
-    Parameters
-    ----------
-    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
-        Shared pointer to a `CryptoFactory` object. The `CryptoFactory` is responsible for
-        creating cryptographic components, such as encryptors and decryptors.
-    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
-        Shared pointer to a `KmsConnectionConfig` object. This object holds the configuration
-        parameters necessary for connecting to a Key Management Service (KMS).
-    encryption_config : pyarrow.parquet.encryption.EncryptionConfiguration
-        Shared pointer to an `EncryptionConfiguration` object. This object defines specific
-        encryption settings for Parquet data, including the keys assigned to different columns.
+class ParquetEncryptionConfig(_Weakrefable):
 
-    Raises
-    ------
-    ValueError
-        Raised if `encryption_config` is None.
-    """
     def __init__(
         self,
         crypto_factory: CryptoFactory,
@@ -54,33 +30,9 @@ class ParquetEncryptionConfig(_Weakrefable):
         encryption_config: EncryptionConfiguration,
     ) -> None: ...
 
-class ParquetDecryptionConfig(_Weakrefable):
-    """
-    Core configuration class encapsulating parameters for high-level decryption
-    within the Parquet framework.
-
-    ParquetDecryptionConfig is designed to pass decryption-related parameters to
-    the appropriate decryption components within the Parquet library. It holds references to
-    objects that define the decryption strategy, Key Management Service (KMS) configuration,
-    and specific decryption configurations for reading encrypted Parquet data.
 
-    Parameters
-    ----------
-    crypto_factory : pyarrow.parquet.encryption.CryptoFactory
-        Shared pointer to a `CryptoFactory` object, pivotal in creating cryptographic
-        components for the decryption process.
-    kms_connection_config : pyarrow.parquet.encryption.KmsConnectionConfig
-        Shared pointer to a `KmsConnectionConfig` object, containing parameters necessary
-        for connecting to a Key Management Service (KMS) during decryption.
-    decryption_config : pyarrow.parquet.encryption.DecryptionConfiguration
-        Shared pointer to a `DecryptionConfiguration` object, specifying decryption settings
-        for reading encrypted Parquet data.
+class ParquetDecryptionConfig(_Weakrefable):
 
-    Raises
-    ------
-    ValueError
-        Raised if `decryption_config` is None.
-    """
     def __init__(
         self,
         crypto_factory: CryptoFactory,
@@ -88,14 +40,19 @@ class ParquetDecryptionConfig(_Weakrefable):
         encryption_config: EncryptionConfiguration,
     ) -> None: ...
 
+
 def set_encryption_config(
     opts: ParquetFileWriteOptions,
     config: ParquetEncryptionConfig,
 ) -> None: ...
+
+
 def set_decryption_properties(
     opts: ParquetFragmentScanOptions,
     config: FileDecryptionProperties,
 ): ...
+
+
 def set_decryption_config(
     opts: ParquetFragmentScanOptions,
     config: ParquetDecryptionConfig,
diff --git a/python/pyarrow-stubs/_feather.pyi b/python/pyarrow-stubs/_feather.pyi
index 373fe38cdce..edd3a089f82 100644
--- a/python/pyarrow-stubs/_feather.pyi
+++ b/python/pyarrow-stubs/_feather.pyi
@@ -21,7 +21,10 @@ from _typeshed import StrPath
 
 from .lib import Buffer, NativeFile, Table, _Weakrefable
 
-class FeatherError(Exception): ...
+
+class FeatherError(Exception):
+    ...
+
 
 def write_feather(
     table: Table,
@@ -32,6 +35,7 @@ def write_feather(
     version: int = 2,
 ): ...
 
+
 class FeatherReader(_Weakrefable):
     def __init__(
         self,
diff --git a/python/pyarrow-stubs/_flight.pyi b/python/pyarrow-stubs/_flight.pyi
index a79475a8796..e4d226a9f60 100644
--- a/python/pyarrow-stubs/_flight.pyi
+++ b/python/pyarrow-stubs/_flight.pyi
@@ -47,7 +47,7 @@ from .lib import (
 _T = TypeVar("_T")
 
 class FlightCallOptions(_Weakrefable):
-    """RPC-layer options for a Flight call."""
+
 
     def __init__(
         self,
@@ -55,168 +55,92 @@ class FlightCallOptions(_Weakrefable):
         write_options: IpcWriteOptions | None = None,
         headers: list[tuple[str, str]] | None = None,
         read_options: IpcReadOptions | None = None,
-    ) -> None:
-        """Create call options.
-
-        Parameters
-        ----------
-        timeout : float, None
-            A timeout for the call, in seconds. None means that the
-            timeout defaults to an implementation-specific value.
-        write_options : pyarrow.ipc.IpcWriteOptions, optional
-            IPC write options. The default options can be controlled
-            by environment variables (see pyarrow.ipc).
-        headers : List[Tuple[str, str]], optional
-            A list of arbitrary headers as key, value tuples
-        read_options : pyarrow.ipc.IpcReadOptions, optional
-            Serialization options for reading IPC format.
-        """
+    ) -> None: ...
+
 
 class CertKeyPair(NamedTuple):
-    """A TLS certificate and key for use in Flight."""
+
 
     cert: str
     key: str
 
 class FlightError(Exception):
-    """
-    The base class for Flight-specific errors.
-
-    A server may raise this class or one of its subclasses to provide
-    a more detailed error to clients.
-
-    Parameters
-    ----------
-    message : str, optional
-        The error message.
-    extra_info : bytes, optional
-        Extra binary error details that were provided by the
-        server/will be sent to the client.
-
-    Attributes
-    ----------
-    extra_info : bytes
-        Extra binary error details that were provided by the
-        server/will be sent to the client.
-    """
+
 
     extra_info: bytes
 
-class FlightInternalError(FlightError, ArrowException):
-    """An error internal to the Flight server occurred."""
+class FlightInternalError(FlightError, ArrowException): ...
+
+
+class FlightTimedOutError(FlightError, ArrowException): ...
+
+
+class FlightCancelledError(FlightError, ArrowCancelled): ...
+
+
+class FlightServerError(FlightError, ArrowException): ...
 
-class FlightTimedOutError(FlightError, ArrowException):
-    """The Flight RPC call timed out."""
 
-class FlightCancelledError(FlightError, ArrowCancelled):
-    """The operation was cancelled."""
+class FlightUnauthenticatedError(FlightError, ArrowException): ...
 
-class FlightServerError(FlightError, ArrowException):
-    """A server error occurred."""
 
-class FlightUnauthenticatedError(FlightError, ArrowException):
-    """The client is not authenticated."""
+class FlightUnauthorizedError(FlightError, ArrowException): ...
 
-class FlightUnauthorizedError(FlightError, ArrowException):
-    """The client is not authorized to perform the given operation."""
 
-class FlightUnavailableError(FlightError, ArrowException):
-    """The server is not reachable or available."""
+class FlightUnavailableError(FlightError, ArrowException): ...
+
 
 class FlightWriteSizeExceededError(ArrowInvalid):
-    """A write operation exceeded the client-configured limit."""
+
 
     limit: int
     actual: int
 
 class Action(_Weakrefable):
-    """An action executable on a Flight service."""
 
-    def __init__(self, action_type: bytes | str, buf: Buffer | bytes) -> None:
-        """Create an action from a type and a buffer.
 
-        Parameters
-        ----------
-        action_type : bytes or str
-        buf : Buffer or bytes-like object
-        """
+    def __init__(self, action_type: bytes | str, buf: Buffer | bytes) -> None: ...
+
     @property
-    def type(self) -> str:
-        """The action type."""
+    def type(self) -> str: ...
+
     @property
-    def body(self) -> Buffer:
-        """The action body (arguments for the action)."""
-    def serialize(self) -> bytes:
-        """Get the wire-format representation of this type.
+    def body(self) -> Buffer: ...
 
-        Useful when interoperating with non-Flight systems (e.g. REST
-        services) that may want to return Flight types.
+    def serialize(self) -> bytes: ...
 
-        """
     @classmethod
-    def deserialize(cls, serialized: bytes) -> Self:
-        """Parse the wire-format representation of this type.
-
-        Useful when interoperating with non-Flight systems (e.g. REST
-        services) that may want to return Flight types.
+    def deserialize(cls, serialized: bytes) -> Self: ...
 
-        """
 
 class ActionType(NamedTuple):
-    """A type of action that is executable on a Flight service."""
+
 
     type: str
     description: str
 
-    def make_action(self, buf: Buffer | bytes) -> Action:
-        """Create an Action with this type.
+    def make_action(self, buf: Buffer | bytes) -> Action: ...
 
-        Parameters
-        ----------
-        buf : obj
-            An Arrow buffer or Python bytes or bytes-like object.
-        """
 
 class Result(_Weakrefable):
-    """A result from executing an Action."""
-    def __init__(self, buf: Buffer | bytes) -> None:
-        """Create a new result.
-
-        Parameters
-        ----------
-        buf : Buffer or bytes-like object
-        """
+
+    def __init__(self, buf: Buffer | bytes) -> None: ...
+
     @property
-    def body(self) -> Buffer:
-        """Get the Buffer containing the result."""
-    def serialize(self) -> bytes:
-        """Get the wire-format representation of this type.
+    def body(self) -> Buffer: ...
 
-        Useful when interoperating with non-Flight systems (e.g. REST
-        services) that may want to return Flight types.
+    def serialize(self) -> bytes: ...
 
-        """
     @classmethod
-    def deserialize(cls, serialized: bytes) -> Self:
-        """Parse the wire-format representation of this type.
-
-        Useful when interoperating with non-Flight systems (e.g. REST
-        services) that may want to return Flight types.
+    def deserialize(cls, serialized: bytes) -> Self: ...
 
-        """
 
 class BasicAuth(_Weakrefable):
-    """A container for basic auth."""
+
     def __init__(
         self, username: str | bytes | None = None, password: str | bytes | None = None
-    ) -> None:
-        """Create a new basic auth object.
-
-        Parameters
-        ----------
-        username : string
-        password : string
-        """
+    ) -> None: ...
+
     @property
     def username(self) -> bytes: ...
     @property
@@ -226,29 +150,14 @@ class BasicAuth(_Weakrefable):
     def deserialize(serialized: str | bytes) -> BasicAuth: ...
 
 class DescriptorType(enum.Enum):
-    """
-    The type of a FlightDescriptor.
 
-    Attributes
-    ----------
-
-    UNKNOWN
-        An unknown descriptor type.
-
-    PATH
-        A Flight stream represented by a path.
-
-    CMD
-        A Flight stream represented by an application-defined command.
-
-    """
 
     UNKNOWN = 0
     PATH = 1
     CMD = 2
 
 class FlightMethod(enum.Enum):
-    """The implemented methods in Flight."""
+
 
     INVALID = 0
     HANDSHAKE = 1
@@ -262,29 +171,29 @@ class FlightMethod(enum.Enum):
     DO_EXCHANGE = 9
 
 class FlightDescriptor(_Weakrefable):
-    """A description of a data stream available from a Flight service."""
+
     @staticmethod
-    def for_path(*path: str | bytes) -> FlightDescriptor:
-        """Create a FlightDescriptor for a resource path."""
+    def for_path(*path: str | bytes) -> FlightDescriptor: ...
+
 
     @staticmethod
-    def for_command(command: str | bytes) -> FlightDescriptor:
-        """Create a FlightDescriptor for an opaque command."""
+    def for_command(command: str | bytes) -> FlightDescriptor: ...
+
     @property
-    def descriptor_type(self) -> DescriptorType:
-        """Get the type of this descriptor."""
+    def descriptor_type(self) -> DescriptorType: ...
+
     @property
-    def path(self) -> list[bytes] | None:
-        """Get the path for this descriptor."""
+    def path(self) -> list[bytes] | None: ...
+
     @property
-    def command(self) -> bytes | None:
-        """Get the command for this descriptor."""
+    def command(self) -> bytes | None: ...
+
     def serialize(self) -> bytes: ...
     @classmethod
     def deserialize(cls, serialized: bytes) -> Self: ...
 
 class Ticket(_Weakrefable):
-    """A ticket for requesting a Flight stream."""
+
     def __init__(self, ticket: str | bytes) -> None: ...
     @property
     def ticket(self) -> bytes: ...
@@ -293,90 +202,60 @@ class Ticket(_Weakrefable):
     def deserialize(cls, serialized: bytes) -> Self: ...
 
 class Location(_Weakrefable):
-    """The location of a Flight service."""
+
     def __init__(self, uri: str | bytes) -> None: ...
     @property
     def uri(self) -> bytes: ...
     def equals(self, other: Location) -> bool: ...
     @staticmethod
-    def for_grpc_tcp(host: str | bytes, port: int) -> Location:
-        """Create a Location for a TCP-based gRPC service."""
+    def for_grpc_tcp(host: str | bytes, port: int) -> Location: ...
+
     @staticmethod
-    def for_grpc_tls(host: str | bytes, port: int) -> Location:
-        """Create a Location for a TLS-based gRPC service."""
+    def for_grpc_tls(host: str | bytes, port: int) -> Location: ...
+
     @staticmethod
-    def for_grpc_unix(path: str | bytes) -> Location:
-        """Create a Location for a domain socket-based gRPC service."""
+    def for_grpc_unix(path: str | bytes) -> Location: ...
+
 
 class FlightEndpoint(_Weakrefable):
-    """A Flight stream, along with the ticket and locations to access it."""
+
     def __init__(
         self,
         ticket: Ticket | str | bytes,
         locations: list[str | Location],
         expiration_time: TimestampScalar | None = ...,
         app_metadata: bytes | str = ...,
-    ):
-        """Create a FlightEndpoint from a ticket and list of locations.
-
-        Parameters
-        ----------
-        ticket : Ticket or bytes
-            the ticket needed to access this flight
-        locations : list of string URIs
-            locations where this flight is available
-        expiration_time : TimestampScalar, default None
-            Expiration time of this stream. If present, clients may assume
-            they can retry DoGet requests. Otherwise, clients should avoid
-            retrying DoGet requests.
-        app_metadata : bytes or str, default ""
-            Application-defined opaque metadata.
-
-        Raises
-        ------
-        ArrowException
-            If one of the location URIs is not a valid URI.
-        """
+    ): ...
+
     @property
-    def ticket(self) -> Ticket:
-        """Get the ticket in this endpoint."""
+    def ticket(self) -> Ticket: ...
+
     @property
-    def locations(self) -> list[Location]:
-        """Get locations where this flight is available."""
+    def locations(self) -> list[Location]: ...
+
     def serialize(self) -> bytes: ...
     @property
-    def expiration_time(self) -> TimestampScalar | None:
-        """Get the expiration time of this stream.
-
-        If present, clients may assume they can retry DoGet requests.
-        Otherwise, clients should avoid retrying DoGet requests.
+    def expiration_time(self) -> TimestampScalar | None: ...
 
-        """
     @property
-    def app_metadata(self) -> bytes | str:
-        """Get application-defined opaque metadata."""
+    def app_metadata(self) -> bytes | str: ...
+
     @classmethod
     def deserialize(cls, serialized: bytes) -> Self: ...
 
 class SchemaResult(_Weakrefable):
-    """The serialized schema returned from a GetSchema request."""
-    def __init__(self, schema: Schema) -> None:
-        """Create a SchemaResult from a schema.
-
-        Parameters
-        ----------
-        schema: Schema
-            the schema of the data in this flight.
-        """
+
+    def __init__(self, schema: Schema) -> None: ...
+
     @property
-    def schema(self) -> Schema:
-        """The schema of the data in this flight."""
+    def schema(self) -> Schema: ...
+
     def serialize(self) -> bytes: ...
     @classmethod
     def deserialize(cls, serialized: bytes) -> Self: ...
 
 class FlightInfo(_Weakrefable):
-    """A description of a Flight stream."""
+
     def __init__(
         self,
         schema: Schema,
@@ -386,62 +265,35 @@ class FlightInfo(_Weakrefable):
         total_bytes: int = ...,
         ordered: bool = ...,
         app_metadata: bytes | str = ...,
-    ) -> None:
-        """Create a FlightInfo object from a schema, descriptor, and endpoints.
-
-        Parameters
-        ----------
-        schema : Schema
-            the schema of the data in this flight.
-        descriptor : FlightDescriptor
-            the descriptor for this flight.
-        endpoints : list of FlightEndpoint
-            a list of endpoints where this flight is available.
-        total_records : int, default None
-            the total records in this flight, -1 or None if unknown.
-        total_bytes : int, default None
-            the total bytes in this flight, -1 or None if unknown.
-        ordered : boolean, default False
-            Whether endpoints are in the same order as the data.
-        app_metadata : bytes or str, default ""
-            Application-defined opaque metadata.
-        """
-    @property
-    def schema(self) -> Schema:
-        """The schema of the data in this flight."""
+    ) -> None: ...
+
     @property
-    def descriptor(self) -> FlightDescriptor:
-        """The descriptor of the data in this flight."""
+    def schema(self) -> Schema: ...
+
     @property
-    def endpoints(self) -> list[FlightEndpoint]:
-        """The endpoints where this flight is available."""
+    def descriptor(self) -> FlightDescriptor: ...
+
     @property
-    def total_records(self) -> int:
-        """The total record count of this flight, or -1 if unknown."""
+    def endpoints(self) -> list[FlightEndpoint]: ...
+
     @property
-    def total_bytes(self) -> int:
-        """The size in bytes of the data in this flight, or -1 if unknown."""
+    def total_records(self) -> int: ...
+
     @property
-    def ordered(self) -> bool:
-        """Whether endpoints are in the same order as the data."""
+    def total_bytes(self) -> int: ...
+
     @property
-    def app_metadata(self) -> bytes | str:
-        """
-        Application-defined opaque metadata.
+    def ordered(self) -> bool: ...
 
-        There is no inherent or required relationship between this and the
-        app_metadata fields in the FlightEndpoints or resulting FlightData
-        messages. Since this metadata is application-defined, a given
-        application could define there to be a relationship, but there is
-        none required by the spec.
+    @property
+    def app_metadata(self) -> bytes | str: ...
 
-        """
     def serialize(self) -> bytes: ...
     @classmethod
     def deserialize(cls, serialized: bytes) -> Self: ...
 
 class FlightStreamChunk(_Weakrefable):
-    """A RecordBatch with application metadata on the side."""
+
     @property
     def data(self) -> RecordBatch | None: ...
     @property
@@ -449,7 +301,7 @@ class FlightStreamChunk(_Weakrefable):
     def __iter__(self): ...
 
 class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
-    """A reader for Flight streams."""
+
 
     # Needs to be separate class so the "real" class can subclass the
     # pure-Python mixin class
@@ -457,115 +309,58 @@ class _MetadataRecordBatchReader(_Weakrefable, _ReadPandasMixin):
     def __iter__(self) -> Self: ...
     def __next__(self) -> FlightStreamChunk: ...
     @property
-    def schema(self) -> Schema:
-        """Get the schema for this reader."""
-    def read_all(self) -> Table:
-        """Read the entire contents of the stream as a Table."""
-    def read_chunk(self) -> FlightStreamChunk:
-        """Read the next FlightStreamChunk along with any metadata.
-
-        Returns
-        -------
-        chunk : FlightStreamChunk
-            The next FlightStreamChunk in the stream.
-
-        Raises
-        ------
-        StopIteration
-            when the stream is finished
-        """
-    def to_reader(self) -> RecordBatchReader:
-        """Convert this reader into a regular RecordBatchReader.
-
-        This may fail if the schema cannot be read from the remote end.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
-
-class MetadataRecordBatchReader(_MetadataRecordBatchReader):
-    """The base class for readers for Flight streams.
-
-    See Also
-    --------
-    FlightStreamReader
-    """
+    def schema(self) -> Schema: ...
+
+    def read_all(self) -> Table: ...
+
+    def read_chunk(self) -> FlightStreamChunk: ...
+
+    def to_reader(self) -> RecordBatchReader: ...
+
+
+class MetadataRecordBatchReader(_MetadataRecordBatchReader): ...
+
 
 class FlightStreamReader(MetadataRecordBatchReader):
-    """A reader that can also be canceled."""
-    def cancel(self) -> None:
-        """Cancel the read operation."""
-    def read_all(self) -> Table:
-        """Read the entire contents of the stream as a Table."""
+
+    def cancel(self) -> None: ...
+
+    def read_all(self) -> Table: ...
+
 
 class MetadataRecordBatchWriter(_CRecordBatchWriter):
-    """A RecordBatchWriter that also allows writing application metadata.
-
-    This class is a context manager; on exit, close() will be called.
-    """
-
-    def begin(self, schema: Schema, options: IpcWriteOptions | None = None) -> None:
-        """Prepare to write data to this stream with the given schema."""
-    def write_metadata(self, buf: Buffer) -> None:
-        """Write Flight metadata by itself."""
-    def write_batch(self, batch: RecordBatch) -> None:  # type: ignore[override]
-        """
-        Write RecordBatch to stream.
-
-        Parameters
-        ----------
-        batch : RecordBatch
-        """
-    def write_table(self, table: Table, max_chunksize: int | None = None, **kwargs) -> None:
-        """
-        Write Table to stream in (contiguous) RecordBatch objects.
-
-        Parameters
-        ----------
-        table : Table
-        max_chunksize : int, default None
-            Maximum number of rows for RecordBatch chunks. Individual chunks may
-            be smaller depending on the chunk layout of individual columns.
-        """
-    def close(self) -> None:
-        """
-        Close stream and write end-of-stream 0 marker.
-        """
-    def write_with_metadata(self, batch: RecordBatch, buf: Buffer) -> None:
-        """Write a RecordBatch along with Flight metadata.
-
-        Parameters
-        ----------
-        batch : RecordBatch
-            The next RecordBatch in the stream.
-        buf : Buffer
-            Application-specific metadata for the batch as defined by
-            Flight.
-        """
+
+
+    def begin(self, schema: Schema, options: IpcWriteOptions | None = None) -> None: ...
+
+    def write_metadata(self, buf: Buffer) -> None: ...
+
+    def write_batch(self, batch: RecordBatch) -> None: ...  # type: ignore[override]
+
+    def write_table(self, table: Table, max_chunksize: int | None = None, **kwargs) -> None: ...
+
+    def close(self) -> None: ...
+
+    def write_with_metadata(self, batch: RecordBatch, buf: Buffer) -> None: ...
+
 
 class FlightStreamWriter(MetadataRecordBatchWriter):
-    """A writer that also allows closing the write side of a stream."""
-    def done_writing(self) -> None:
-        """Indicate that the client is done writing, but not done reading."""
+
+    def done_writing(self) -> None: ...
+
 
 class FlightMetadataReader(_Weakrefable):
-    """A reader for Flight metadata messages sent during a DoPut."""
-    def read(self) -> Buffer | None:
-        """Read the next metadata message."""
+
+    def read(self) -> Buffer | None: ...
+
 
 class FlightMetadataWriter(_Weakrefable):
-    """A sender for Flight metadata messages during a DoPut."""
-    def write(self, message: Buffer) -> None:
-        """Write the next metadata message.
 
-        Parameters
-        ----------
-        message : Buffer
-        """
+    def write(self, message: Buffer) -> None: ...
+
 
 class AsyncioCall(Generic[_T]):
-    """State for an async RPC using asyncio."""
+
 
     _future: asyncio.Future[_T]
 
@@ -573,11 +368,7 @@ class AsyncioCall(Generic[_T]):
     def wakeup(self, result_or_exception: BaseException | _T) -> None: ...
 
 class AsyncioFlightClient:
-    """
-    A FlightClient with an asyncio-based async interface.
 
-    This interface is EXPERIMENTAL.
-    """
 
     def __init__(self, client: FlightClient) -> None: ...
     async def get_flight_info(
@@ -588,40 +379,7 @@ class AsyncioFlightClient:
     ): ...
 
 class FlightClient(_Weakrefable):
-    """A client to a Flight service.
-
-    Connect to a Flight service on the given host and port.
-
-    Parameters
-    ----------
-    location : str, tuple or Location
-        Location to connect to. Either a gRPC URI like `grpc://localhost:port`,
-        a tuple of (host, port) pair, or a Location instance.
-    tls_root_certs : bytes or None
-        PEM-encoded
-    cert_chain: bytes or None
-        Client certificate if using mutual TLS
-    private_key: bytes or None
-        Client private key for cert_chain is using mutual TLS
-    override_hostname : str or None
-        Override the hostname checked by TLS. Insecure, use with caution.
-    middleware : list optional, default None
-        A list of ClientMiddlewareFactory instances.
-    write_size_limit_bytes : int optional, default None
-        A soft limit on the size of a data payload sent to the
-        server. Enabled if positive. If enabled, writing a record
-        batch that (when serialized) exceeds this limit will raise an
-        exception; the client can retry the write with a smaller
-        batch.
-    disable_server_verification : boolean optional, default False
-        A flag that indicates that, if the client is connecting
-        with TLS, that it skips server verification. If this is
-        enabled, all other TLS settings are overridden.
-    generic_options : list optional, default None
-        A list of generic (string, int or string) option tuples passed
-        to the underlying transport. Effect is implementation
-        dependent.
-    """
+
     def __init__(
         self,
         location: str | tuple[str, int] | Location,
@@ -638,14 +396,8 @@ class FlightClient(_Weakrefable):
     @property
     def supports_async(self) -> bool: ...
     def as_async(self) -> AsyncioFlightClient: ...
-    def wait_for_available(self, timeout: int = 5) -> None:
-        """Block until the server can be contacted.
-
-        Parameters
-        ----------
-        timeout : int, default 5
-            The maximum seconds to wait.
-        """
+    def wait_for_available(self, timeout: int = 5) -> None: ...
+
     @deprecated(
         "Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead."
     )
@@ -658,439 +410,167 @@ class FlightClient(_Weakrefable):
         private_key: str | None = None,
         override_hostname: str | None = None,
         disable_server_verification: bool = False,
-    ) -> FlightClient:
-        """Connect to a Flight server.
+    ) -> FlightClient: ...
 
-        .. deprecated:: 0.15.0
-            Use the ``FlightClient`` constructor or ``pyarrow.flight.connect`` function instead.
-        """
     def authenticate(
         self, auth_handler: ClientAuthHandler, options: FlightCallOptions | None = None
-    ) -> None:
-        """Authenticate to the server.
-
-        Parameters
-        ----------
-        auth_handler : ClientAuthHandler
-            The authentication mechanism to use.
-        options : FlightCallOptions
-            Options for this call.
-        """
+    ) -> None: ...
+
     def authenticate_basic_token(
         self, username: str, password: str, options: FlightCallOptions | None = None
-    ) -> tuple[str, str]:
-        """Authenticate to the server with HTTP basic authentication.
-
-        Parameters
-        ----------
-        username : string
-            Username to authenticate with
-        password : string
-            Password to authenticate with
-        options  : FlightCallOptions
-            Options for this call
-
-        Returns
-        -------
-        tuple : Tuple[str, str]
-            A tuple representing the FlightCallOptions authorization
-            header entry of a bearer token.
-        """
-    def list_actions(self, options: FlightCallOptions | None = None) -> list[Action]:
-        """List the actions available on a service."""
+    ) -> tuple[str, str]: ...
+
+    def list_actions(self, options: FlightCallOptions | None = None) -> list[Action]: ...
+
     def do_action(
         self, action: Action, options: FlightCallOptions | None = None
-    ) -> Iterator[Result]:
-        """
-        Execute an action on a service.
-
-        Parameters
-        ----------
-        action : str, tuple, or Action
-            Can be action type name (no body), type and body, or any Action
-            object
-        options : FlightCallOptions
-            RPC options
-
-        Returns
-        -------
-        results : iterator of Result values
-        """
+    ) -> Iterator[Result]: ...
+
     def list_flights(
         self, criteria: str | None = None, options: FlightCallOptions | None = None
-    ) -> Generator[FlightInfo, None, None]:
-        """List the flights available on a service."""
+    ) -> Generator[FlightInfo, None, None]: ...
+
     def get_flight_info(
         self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
-    ) -> FlightInfo:
-        """Request information about an available flight."""
+    ) -> FlightInfo: ...
+
     def get_schema(
         self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
-    ) -> Schema:
-        """Request schema for an available flight."""
+    ) -> Schema: ...
+
     def do_get(
         self, ticket: Ticket, options: FlightCallOptions | None = None
-    ) -> FlightStreamReader:
-        """Request the data for a flight.
+    ) -> FlightStreamReader: ...
 
-        Returns
-        -------
-        reader : FlightStreamReader
-        """
     def do_put(
         self,
         descriptor: FlightDescriptor,
         schema: Schema,
         options: FlightCallOptions | None = None,
-    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
-        """Upload data to a flight.
-
-        Returns
-        -------
-        writer : FlightStreamWriter
-        reader : FlightMetadataReader
-        """
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]: ...
+
     def do_exchange(
         self, descriptor: FlightDescriptor, options: FlightCallOptions | None = None
-    ) -> tuple[FlightStreamWriter, FlightStreamReader]:
-        """Start a bidirectional data exchange with a server.
-
-        Parameters
-        ----------
-        descriptor : FlightDescriptor
-            A descriptor for the flight.
-        options : FlightCallOptions
-            RPC options.
-
-        Returns
-        -------
-        writer : FlightStreamWriter
-        reader : FlightStreamReader
-        """
-    def close(self) -> None:
-        """Close the client and disconnect."""
+    ) -> tuple[FlightStreamWriter, FlightStreamReader]: ...
+
+    def close(self) -> None: ...
+
     def __enter__(self) -> Self: ...
     def __exit__(self, exc_type, exc_value, traceback) -> None: ...
 
-class FlightDataStream(_Weakrefable):
-    """
-    Abstract base class for Flight data streams.
+class FlightDataStream(_Weakrefable): ...
 
-    See Also
-    --------
-    RecordBatchStream
-    GeneratorStream
-    """
 
 class RecordBatchStream(FlightDataStream):
-    """A Flight data stream backed by RecordBatches.
-
-    The remainder of this DoGet request will be handled in C++,
-    without having to acquire the GIL.
 
-    """
     def __init__(
         self, data_source: RecordBatchReader | Table, options: IpcWriteOptions | None = None
-    ) -> None:
-        """Create a RecordBatchStream from a data source.
+    ) -> None: ...
 
-        Parameters
-        ----------
-        data_source : RecordBatchReader or Table
-            The data to stream to the client.
-        options : pyarrow.ipc.IpcWriteOptions, optional
-            Optional IPC options to control how to write the data.
-        """
 
 class GeneratorStream(FlightDataStream):
-    """A Flight data stream backed by a Python generator."""
+
     def __init__(
         self,
         schema: Schema,
         generator: Iterable[FlightDataStream | Table | RecordBatch | RecordBatchReader],
         options: IpcWriteOptions | None = None,
-    ) -> None:
-        """Create a GeneratorStream from a Python generator.
+    ) -> None: ...
 
-        Parameters
-        ----------
-        schema : Schema
-            The schema for the data to be returned.
 
-        generator : iterator or iterable
-            The generator should yield other FlightDataStream objects,
-            Tables, RecordBatches, or RecordBatchReaders.
+class ServerCallContext(_Weakrefable):
 
-        options : pyarrow.ipc.IpcWriteOptions, optional
-        """
+    def peer_identity(self) -> bytes: ...
+
+    def peer(self) -> str: ...
 
-class ServerCallContext(_Weakrefable):
-    """Per-call state/context."""
-    def peer_identity(self) -> bytes:
-        """Get the identity of the authenticated peer.
-
-        May be the empty string.
-        """
-    def peer(self) -> str:
-        """Get the address of the peer."""
         # Set safe=True as gRPC on Windows sometimes gives garbage bytes
-    def is_cancelled(self) -> bool:
-        """Check if the current RPC call has been canceled by the client."""
-    def add_header(self, key: str, value: str) -> None:
-        """Add a response header."""
-    def add_trailer(self, key: str, value: str) -> None:
-        """Add a response trailer."""
-    def get_middleware(self, key: str) -> ServerMiddleware | None:
-        """
-        Get a middleware instance by key.
-
-        Returns None if the middleware was not found.
-        """
+    def is_cancelled(self) -> bool: ...
+
+    def add_header(self, key: str, value: str) -> None: ...
+
+    def add_trailer(self, key: str, value: str) -> None: ...
+
+    def get_middleware(self, key: str) -> ServerMiddleware | None: ...
+
 
 class ServerAuthReader(_Weakrefable):
-    """A reader for messages from the client during an auth handshake."""
+
     def read(self) -> str: ...
 
 class ServerAuthSender(_Weakrefable):
-    """A writer for messages to the client during an auth handshake."""
+
     def write(self, message: str) -> None: ...
 
 class ClientAuthReader(_Weakrefable):
-    """A reader for messages from the server during an auth handshake."""
+
     def read(self) -> str: ...
 
 class ClientAuthSender(_Weakrefable):
-    """A writer for messages to the server during an auth handshake."""
+
     def write(self, message: str) -> None: ...
 
 class ServerAuthHandler(_Weakrefable):
-    """Authentication middleware for a server.
-
-    To implement an authentication mechanism, subclass this class and
-    override its methods.
 
-    """
-    def authenticate(self, outgoing: ServerAuthSender, incoming: ServerAuthReader):
-        """Conduct the handshake with the client.
+    def authenticate(self, outgoing: ServerAuthSender, incoming: ServerAuthReader): ...
 
-        May raise an error if the client cannot authenticate.
+    def is_valid(self, token: str) -> bool: ...
 
-        Parameters
-        ----------
-        outgoing : ServerAuthSender
-            A channel to send messages to the client.
-        incoming : ServerAuthReader
-            A channel to read messages from the client.
-        """
-    def is_valid(self, token: str) -> bool:
-        """Validate a client token, returning their identity.
 
-        May return an empty string (if the auth mechanism does not
-        name the peer) or raise an exception (if the token is
-        invalid).
+class ClientAuthHandler(_Weakrefable):
 
-        Parameters
-        ----------
-        token : bytes
-            The authentication token from the client.
+    def authenticate(self, outgoing: ClientAuthSender, incoming: ClientAuthReader): ...
 
-        """
+    def get_token(self) -> str: ...
 
-class ClientAuthHandler(_Weakrefable):
-    """Authentication plugin for a client."""
-    def authenticate(self, outgoing: ClientAuthSender, incoming: ClientAuthReader):
-        """Conduct the handshake with the server.
-
-        Parameters
-        ----------
-        outgoing : ClientAuthSender
-            A channel to send messages to the server.
-        incoming : ClientAuthReader
-            A channel to read messages from the server.
-        """
-    def get_token(self) -> str:
-        """Get the auth token for a call."""
 
 class CallInfo(NamedTuple):
-    """Information about a particular RPC for Flight middleware."""
+
 
     method: FlightMethod
 
 class ClientMiddlewareFactory(_Weakrefable):
-    """A factory for new middleware instances.
-
-    All middleware methods will be called from the same thread as the
-    RPC method implementation. That is, thread-locals set in the
-    client are accessible from the middleware itself.
 
-    """
-    def start_call(self, info: CallInfo) -> ClientMiddleware | None:
-        """Called at the start of an RPC.
+    def start_call(self, info: CallInfo) -> ClientMiddleware | None: ...
 
-        This must be thread-safe and must not raise exceptions.
-
-        Parameters
-        ----------
-        info : CallInfo
-            Information about the call.
-
-        Returns
-        -------
-        instance : ClientMiddleware
-            An instance of ClientMiddleware (the instance to use for
-            the call), or None if this call is not intercepted.
-
-        """
 
 class ClientMiddleware(_Weakrefable):
-    """Client-side middleware for a call, instantiated per RPC.
-
-    Methods here should be fast and must be infallible: they should
-    not raise exceptions or stall indefinitely.
-
-    """
-
-    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
-        """A callback before headers are sent.
-
-        Returns
-        -------
-        headers : dict
-            A dictionary of header values to add to the request, or
-            None if no headers are to be added. The dictionary should
-            have string keys and string or list-of-string values.
-
-            Bytes values are allowed, but the underlying transport may
-            not support them or may restrict them. For gRPC, binary
-            values are only allowed on headers ending in "-bin".
-
-            Header names must be lowercase ASCII.
-
-        """
 
-    def received_headers(self, headers: dict[str, list[str] | list[bytes]]):
-        """A callback when headers are received.
 
-        The default implementation does nothing.
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]: ...
 
-        Parameters
-        ----------
-        headers : dict
-            A dictionary of headers from the server. Keys are strings
-            and values are lists of strings (for text headers) or
-            bytes (for binary headers).
 
-        """
+    def received_headers(self, headers: dict[str, list[str] | list[bytes]]): ...
 
-    def call_completed(self, exception: ArrowException):
-        """A callback when the call finishes.
 
-        The default implementation does nothing.
+    def call_completed(self, exception: ArrowException): ...
 
-        Parameters
-        ----------
-        exception : ArrowException
-            If the call errored, this is the equivalent
-            exception. Will be None if the call succeeded.
-
-        """
 
 class ServerMiddlewareFactory(_Weakrefable):
-    """A factory for new middleware instances.
-
-    All middleware methods will be called from the same thread as the
-    RPC method implementation. That is, thread-locals set in the
-    middleware are accessible from the method itself.
 
-    """
 
     def start_call(
         self, info: CallInfo, headers: dict[str, list[str] | list[bytes]]
-    ) -> ServerMiddleware | None:
-        """Called at the start of an RPC.
-
-        This must be thread-safe.
-
-        Parameters
-        ----------
-        info : CallInfo
-            Information about the call.
-        headers : dict
-            A dictionary of headers from the client. Keys are strings
-            and values are lists of strings (for text headers) or
-            bytes (for binary headers).
-
-        Returns
-        -------
-        instance : ServerMiddleware
-            An instance of ServerMiddleware (the instance to use for
-            the call), or None if this call is not intercepted.
+    ) -> ServerMiddleware | None: ...
 
-        Raises
-        ------
-        exception : pyarrow.ArrowException
-            If an exception is raised, the call will be rejected with
-            the given error.
 
-        """
+class TracingServerMiddlewareFactory(ServerMiddlewareFactory): ...
 
-class TracingServerMiddlewareFactory(ServerMiddlewareFactory):
-    """A factory for tracing middleware instances.
-
-    This enables OpenTelemetry support in Arrow (if Arrow was compiled
-    with OpenTelemetry support enabled). A new span will be started on
-    each RPC call. The TracingServerMiddleware instance can then be
-    retrieved within an RPC handler to get the propagated context,
-    which can be used to start a new span on the Python side.
-
-    Because the Python/C++ OpenTelemetry libraries do not
-    interoperate, spans on the C++ side are not directly visible to
-    the Python side and vice versa.
-
-    """
 
 class ServerMiddleware(_Weakrefable):
-    """Server-side middleware for a call, instantiated per RPC.
-
-    Methods here should be fast and must be infallible: they should
-    not raise exceptions or stall indefinitely.
 
-    """
 
-    def sending_headers(self) -> dict[str, list[str] | list[bytes]]:
-        """A callback before headers are sent.
+    def sending_headers(self) -> dict[str, list[str] | list[bytes]]: ...
 
-        Returns
-        -------
-        headers : dict
-            A dictionary of header values to add to the response, or
-            None if no headers are to be added. The dictionary should
-            have string keys and string or list-of-string values.
+    def call_completed(self, exception: ArrowException): ...
 
-            Bytes values are allowed, but the underlying transport may
-            not support them or may restrict them. For gRPC, binary
-            values are only allowed on headers ending in "-bin".
-
-            Header names must be lowercase ASCII.
-
-        """
-    def call_completed(self, exception: ArrowException):
-        """A callback when the call finishes.
-
-        Parameters
-        ----------
-        exception : pyarrow.ArrowException
-            If the call errored, this is the equivalent
-            exception. Will be None if the call succeeded.
-
-        """
 
 class TracingServerMiddleware(ServerMiddleware):
     trace_context: dict
     def __init__(self, trace_context: dict) -> None: ...
 
 class _ServerMiddlewareFactoryWrapper(ServerMiddlewareFactory):
-    """Wrapper to bundle server middleware into a single C++ one."""
+
 
     def __init__(self, factories: dict[str, ServerMiddlewareFactory]) -> None: ...
     def start_call(  # type: ignore[override]
@@ -1103,47 +583,12 @@ class _ServerMiddlewareWrapper(ServerMiddleware):
     def call_completed(self, exception: ArrowException) -> None: ...
 
 class _FlightServerFinalizer(_Weakrefable):
-    """
-    A finalizer that shuts down the server on destruction.
 
-    See ARROW-16597. If the server is still active at interpreter
-    exit, the process may segfault.
-    """
 
     def finalize(self) -> None: ...
 
 class FlightServerBase(_Weakrefable):
-    """A Flight service definition.
-
-    To start the server, create an instance of this class with an
-    appropriate location. The server will be running as soon as the
-    instance is created; it is not required to call :meth:`serve`.
-
-    Override methods to define your Flight service.
-
-    Parameters
-    ----------
-    location : str, tuple or Location optional, default None
-        Location to serve on. Either a gRPC URI like `grpc://localhost:port`,
-        a tuple of (host, port) pair, or a Location instance.
-        If None is passed then the server will be started on localhost with a
-        system provided random port.
-    auth_handler : ServerAuthHandler optional, default None
-        An authentication mechanism to use. May be None.
-    tls_certificates : list optional, default None
-        A list of (certificate, key) pairs.
-    verify_client : boolean optional, default False
-        If True, then enable mutual TLS: require the client to present
-        a client certificate, and validate the certificate.
-    root_certificates : bytes optional, default None
-        If enabling mutual TLS, this specifies the PEM-encoded root
-        certificate used to validate client certificates.
-    middleware : dict optional, default None
-        A dictionary of :class:`ServerMiddlewareFactory` instances. The
-        string keys can be used to retrieve the middleware instance within
-        RPC handlers (see :meth:`ServerCallContext.get_middleware`).
-
-    """
+
     def __init__(
         self,
         location: str | tuple[str, int] | Location | None = None,
@@ -1154,197 +599,46 @@ class FlightServerBase(_Weakrefable):
         middleware: dict[str, ServerMiddlewareFactory] | None = None,
     ): ...
     @property
-    def port(self) -> int:
-        """
-        Get the port that this server is listening on.
-
-        Returns a non-positive value if the operation is invalid
-        (e.g. init() was not called or server is listening on a domain
-        socket).
-        """
-    def list_flights(self, context: ServerCallContext, criteria: str) -> Iterator[FlightInfo]:
-        """List flights available on this service.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        criteria : bytes
-            Filter criteria provided by the client.
-
-        Returns
-        -------
-        iterator of FlightInfo
-
-        """
+    def port(self) -> int: ...
+
+    def list_flights(self, context: ServerCallContext, criteria: str) -> Iterator[FlightInfo]: ...
+
     def get_flight_info(
         self, context: ServerCallContext, descriptor: FlightDescriptor
-    ) -> FlightInfo:
-        """Get information about a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        descriptor : FlightDescriptor
-            The descriptor for the flight provided by the client.
-
-        Returns
-        -------
-        FlightInfo
-
-        """
-    def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor) -> Schema:
-        """Get the schema of a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        descriptor : FlightDescriptor
-            The descriptor for the flight provided by the client.
-
-        Returns
-        -------
-        Schema
-
-        """
+    ) -> FlightInfo: ...
+
+    def get_schema(self, context: ServerCallContext, descriptor: FlightDescriptor) -> Schema: ...
+
     def do_put(
         self,
         context: ServerCallContext,
         descriptor: FlightDescriptor,
         reader: MetadataRecordBatchReader,
         writer: FlightMetadataWriter,
-    ) -> None:
-        """Write data to a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        descriptor : FlightDescriptor
-            The descriptor for the flight provided by the client.
-        reader : MetadataRecordBatchReader
-            A reader for data uploaded by the client.
-        writer : FlightMetadataWriter
-            A writer to send responses to the client.
-
-        """
-    def do_get(self, context: ServerCallContext, ticket: Ticket) -> FlightDataStream:
-        """Write data to a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        ticket : Ticket
-            The ticket for the flight.
-
-        Returns
-        -------
-        FlightDataStream
-            A stream of data to send back to the client.
-
-        """
+    ) -> None: ...
+
+    def do_get(self, context: ServerCallContext, ticket: Ticket) -> FlightDataStream: ...
+
     def do_exchange(
         self,
         context: ServerCallContext,
         descriptor: FlightDescriptor,
         reader: MetadataRecordBatchReader,
         writer: MetadataRecordBatchWriter,
-    ) -> None:
-        """Write data to a flight.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        descriptor : FlightDescriptor
-            The descriptor for the flight provided by the client.
-        reader : MetadataRecordBatchReader
-            A reader for data uploaded by the client.
-        writer : MetadataRecordBatchWriter
-            A writer to send responses to the client.
-
-        """
-    def list_actions(self, context: ServerCallContext) -> Iterable[Action]:
-        """List custom actions available on this server.
-
-        Applications should override this method to implement their
-        own behavior. The default method raises a NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-
-        Returns
-        -------
-        iterator of ActionType or tuple
-
-        """
-    def do_action(self, context: ServerCallContext, action: Action) -> Iterable[bytes]:
-        """Execute a custom action.
-
-        This method should return an iterator, or it should be a
-        generator. Applications should override this method to
-        implement their own behavior. The default method raises a
-        NotImplementedError.
-
-        Parameters
-        ----------
-        context : ServerCallContext
-            Common contextual information.
-        action : Action
-            The action to execute.
-
-        Returns
-        -------
-        iterator of bytes
-
-        """
-    def serve(self) -> None:
-        """Block until the server shuts down.
-
-        This method only returns if shutdown() is called or a signal is
-        received.
-        """
-    def run(self) -> None:
-        """Block until the server shuts down.
-
-        .. deprecated:: 0.15.0
-            Use the ``FlightServer.serve`` method instead
-        """
-    def shutdown(self) -> None:
-        """Shut down the server, blocking until current requests finish.
-
-        Do not call this directly from the implementation of a Flight
-        method, as then the server will block forever waiting for that
-        request to finish. Instead, call this method from a background
-        thread.
-
-        This method should only be called once.
-        """
-    def wait(self) -> None:
-        """Block until server is terminated with shutdown."""
+    ) -> None: ...
+
+    def list_actions(self, context: ServerCallContext) -> Iterable[Action]: ...
+
+    def do_action(self, context: ServerCallContext, action: Action) -> Iterable[bytes]: ...
+
+    def serve(self) -> None: ...
+
+    def run(self) -> None: ...
+
+    def shutdown(self) -> None: ...
+
+    def wait(self) -> None: ...
+
     def __enter__(self) -> Self: ...
     def __exit__(self, exc_type, exc_value, traceback): ...
 
@@ -1359,39 +653,4 @@ def connect(
     write_size_limit_bytes: int | None = None,
     disable_server_verification: bool = False,
     generic_options: list[tuple[str, int | str]] | None = None,
-) -> FlightClient:
-    """
-    Connect to a Flight server.
-
-    Parameters
-    ----------
-    location : str, tuple, or Location
-        Location to connect to. Either a URI like "grpc://localhost:port",
-        a tuple of (host, port), or a Location instance.
-    tls_root_certs : bytes or None
-        PEM-encoded.
-    cert_chain: str or None
-        If provided, enables TLS mutual authentication.
-    private_key: str or None
-        If provided, enables TLS mutual authentication.
-    override_hostname : str or None
-        Override the hostname checked by TLS. Insecure, use with caution.
-    middleware : list or None
-        A list of ClientMiddlewareFactory instances to apply.
-    write_size_limit_bytes : int or None
-        A soft limit on the size of a data payload sent to the
-        server. Enabled if positive. If enabled, writing a record
-        batch that (when serialized) exceeds this limit will raise an
-        exception; the client can retry the write with a smaller
-        batch.
-    disable_server_verification : boolean or None
-        Disable verifying the server when using TLS.
-        Insecure, use with caution.
-    generic_options : list or None
-        A list of generic (string, int or string) options to pass to
-        the underlying transport.
-
-    Returns
-    -------
-    client : FlightClient
-    """
+) -> FlightClient: ...
diff --git a/python/pyarrow-stubs/_fs.pyi b/python/pyarrow-stubs/_fs.pyi
index 1f3667ef413..42ea8543738 100644
--- a/python/pyarrow-stubs/_fs.pyi
+++ b/python/pyarrow-stubs/_fs.pyi
@@ -36,77 +36,15 @@ from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
 
 from .lib import NativeFile, _Weakrefable
 
+
 class FileType(enum.IntFlag):
     NotFound = enum.auto()
     Unknown = enum.auto()
     File = enum.auto()
     Directory = enum.auto()
 
+
 class FileInfo(_Weakrefable):
-    """
-    FileSystem entry info.
-
-    Parameters
-    ----------
-    path : str
-        The full path to the filesystem entry.
-    type : FileType
-        The type of the filesystem entry.
-    mtime : datetime or float, default None
-        If given, the modification time of the filesystem entry.
-        If a float is given, it is the number of seconds since the
-        Unix epoch.
-    mtime_ns : int, default None
-        If given, the modification time of the filesystem entry,
-        in nanoseconds since the Unix epoch.
-        `mtime` and `mtime_ns` are mutually exclusive.
-    size : int, default None
-        If given, the filesystem entry size in bytes.  This should only
-        be given if `type` is `FileType.File`.
-
-    Examples
-    --------
-    Generate a file:
-
-    >>> from pyarrow import fs
-    >>> local = fs.LocalFileSystem()
-    >>> path_fs = local_path + "/pyarrow-fs-example.dat"
-    >>> with local.open_output_stream(path_fs) as stream:
-    ...     stream.write(b"data")
-    4
-
-    Get FileInfo object using ``get_file_info()``:
-
-    >>> file_info = local.get_file_info(path_fs)
-    >>> file_info
-    <FileInfo for '.../pyarrow-fs-example.dat': type=FileType.File, size=4>
-
-    Inspect FileInfo attributes:
-
-    >>> file_info.type
-    <FileType.File: 2>
-
-    >>> file_info.is_file
-    True
-
-    >>> file_info.path
-    '/.../pyarrow-fs-example.dat'
-
-    >>> file_info.base_name
-    'pyarrow-fs-example.dat'
-
-    >>> file_info.size
-    4
-
-    >>> file_info.extension
-    'dat'
-
-    >>> file_info.mtime  # doctest: +SKIP
-    datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
-
-    >>> file_info.mtime_ns  # doctest: +SKIP
-    1656489370873922073
-    """
 
     def __init__(
         self,
@@ -118,901 +56,161 @@ class FileInfo(_Weakrefable):
         size: int | None = None,
     ): ...
     @property
-    def type(self) -> FileType:
-        """
-        Type of the file.
-
-        The returned enum values can be the following:
-
-        - FileType.NotFound: target does not exist
-        - FileType.Unknown: target exists but its type is unknown (could be a
-          special file such as a Unix socket or character device, or
-          Windows NUL / CON / ...)
-        - FileType.File: target is a regular file
-        - FileType.Directory: target is a regular directory
-
-        Returns
-        -------
-        type : FileType
-        """
+    def type(self) -> FileType: ...
+
     @property
     def is_file(self) -> bool: ...
     @property
-    def path(self) -> str:
-        """
-        The full file path in the filesystem.
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.path
-        '/.../pyarrow-fs-example.dat'
-        """
-    @property
-    def base_name(self) -> str:
-        """
-        The file base name.
-
-        Component after the last directory separator.
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.base_name
-        'pyarrow-fs-example.dat'
-        """
+    def path(self) -> str: ...
+
     @property
-    def size(self) -> int:
-        """
-        The size in bytes, if available.
+    def base_name(self) -> str: ...
 
-        Only regular files are guaranteed to have a size.
+    @property
+    def size(self) -> int: ...
 
-        Returns
-        -------
-        size : int or None
-        """
     @property
-    def extension(self) -> str:
-        """
-        The file extension.
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.extension
-        'dat'
-        """
+    def extension(self) -> str: ...
+
     @property
-    def mtime(self) -> dt.datetime | None:
-        """
-        The time of last modification, if available.
-
-        Returns
-        -------
-        mtime : datetime.datetime or None
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.mtime  # doctest: +SKIP
-        datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
-        """
+    def mtime(self) -> dt.datetime | None: ...
+
     @property
-    def mtime_ns(self) -> int | None:
-        """
-        The time of last modification, if available, expressed in nanoseconds
-        since the Unix epoch.
-
-        Returns
-        -------
-        mtime_ns : int or None
-
-        Examples
-        --------
-        >>> file_info = local.get_file_info(path)
-        >>> file_info.mtime_ns  # doctest: +SKIP
-        1656489370873922073
-        """
+    def mtime_ns(self) -> int | None: ...
+
 
 class FileSelector(_Weakrefable):
-    """
-    File and directory selector.
-
-    It contains a set of options that describes how to search for files and
-    directories.
-
-    Parameters
-    ----------
-    base_dir : str
-        The directory in which to select files. Relative paths also work, use
-        '.' for the current directory and '..' for the parent.
-    allow_not_found : bool, default False
-        The behavior if `base_dir` doesn't exist in the filesystem.
-        If false, an error is returned.
-        If true, an empty selection is returned.
-    recursive : bool, default False
-        Whether to recurse into subdirectories.
-
-    Examples
-    --------
-    List the contents of a directory and subdirectories:
-
-    >>> selector_1 = fs.FileSelector(local_path, recursive=True)
-    >>> local.get_file_info(selector_1)  # doctest: +SKIP
-    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
-    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>,
-    <FileInfo for 'tmp/alphabet/subdir/example_copy.dat': type=FileType.File, size=4>]
-
-    List only the contents of the base directory:
-
-    >>> selector_2 = fs.FileSelector(local_path)
-    >>> local.get_file_info(selector_2)  # doctest: +SKIP
-    [<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
-    <FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>]
-
-    Return empty selection if the directory doesn't exist:
-
-    >>> selector_not_found = fs.FileSelector(
-    ...     local_path + "/missing", recursive=True, allow_not_found=True
-    ... )
-    >>> local.get_file_info(selector_not_found)
-    []
-    """
 
     base_dir: str
     allow_not_found: bool
     recursive: bool
-    def __init__(self, base_dir: str, allow_not_found: bool = False, recursive: bool = False): ...
+    def __init__(self, base_dir: str, allow_not_found: bool = False,
+                 recursive: bool = False): ...
+
 
 class FileSystem(_Weakrefable):
-    """
-    Abstract file system API.
-    """
 
     @classmethod
-    def from_uri(cls, uri: str) -> tuple[Self, str]:
-        """
-        Create a new FileSystem from URI or Path.
-
-        Recognized URI schemes are "file", "mock", "s3fs", "gs", "gcs", "hdfs" and "viewfs".
-        In addition, the argument can be a pathlib.Path object, or a string
-        describing an absolute local path.
-
-        Parameters
-        ----------
-        uri : string
-            URI-based path, for example: file:///some/local/path.
-
-        Returns
-        -------
-        tuple of (FileSystem, str path)
-            With (filesystem, path) tuple where path is the abstract path
-            inside the FileSystem instance.
-
-        Examples
-        --------
-        Create a new FileSystem subclass from a URI:
-
-        >>> uri = "file:///{}/pyarrow-fs-example.dat".format(local_path)
-        >>> local_new, path_new = fs.FileSystem.from_uri(uri)
-        >>> local_new
-        <pyarrow._fs.LocalFileSystem object at ...
-        >>> path_new
-        '/.../pyarrow-fs-example.dat'
-
-        Or from a s3 bucket:
-
-        >>> fs.FileSystem.from_uri("s3://usgs-landsat/collection02/")
-        (<pyarrow._s3fs.S3FileSystem object at ...>, 'usgs-landsat/collection02')
-        """
-    def equals(self, other: FileSystem) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.fs.FileSystem
-
-        Returns
-        -------
-        bool
-        """
+    def from_uri(cls, uri: str) -> tuple[Self, str]: ...
+
+    def equals(self, other: FileSystem) -> bool: ...
+
     @property
-    def type_name(self) -> str:
-        """
-        The filesystem's type name.
-        """
-    def get_file_info(self, paths_or_selector: str | FileSelector | list[str]) -> FileInfo | list[FileInfo]:
-        """
-        Get info for the given files.
-
-        Any symlink is automatically dereferenced, recursively. A non-existing
-        or unreachable file returns a FileStat object and has a FileType of
-        value NotFound. An exception indicates a truly exceptional condition
-        (low-level I/O error, etc.).
-
-        Parameters
-        ----------
-        paths_or_selector : FileSelector, path-like or list of path-likes
-            Either a selector object, a path-like object or a list of
-            path-like objects. The selector's base directory will not be
-            part of the results, even if it exists. If it doesn't exist,
-            use `allow_not_found`.
-
-        Returns
-        -------
-        FileInfo or list of FileInfo
-            Single FileInfo object is returned for a single path, otherwise
-            a list of FileInfo objects is returned.
-
-        Examples
-        --------
-        >>> local
-        <pyarrow._fs.LocalFileSystem object at ...>
-        >>> local.get_file_info("/{}/pyarrow-fs-example.dat".format(local_path))
-        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
-        """
-    def create_dir(self, path: str, *, recursive: bool = True) -> None:
-        """
-        Create a directory and subdirectories.
-
-        This function succeeds if the directory already exists.
-
-        Parameters
-        ----------
-        path : str
-            The path of the new directory.
-        recursive : bool, default True
-            Create nested directories as well.
-        """
-    def delete_dir(self, path: str) -> None:
-        """
-        Delete a directory and its contents, recursively.
-
-        Parameters
-        ----------
-        path : str
-            The path of the directory to be deleted.
-        """
+    def type_name(self) -> str: ...
+
+    def get_file_info(self, paths_or_selector: str | FileSelector |
+                      list[str]) -> FileInfo | list[FileInfo]: ...
+
+    def create_dir(self, path: str, *, recursive: bool = True) -> None: ...
+
+    def delete_dir(self, path: str) -> None: ...
+
     def delete_dir_contents(
         self, path: str, *, accept_root_dir: bool = False, missing_dir_ok: bool = False
-    ) -> None:
-        """
-        Delete a directory's contents, recursively.
-
-        Like delete_dir, but doesn't delete the directory itself.
-
-        Parameters
-        ----------
-        path : str
-            The path of the directory to be deleted.
-        accept_root_dir : boolean, default False
-            Allow deleting the root directory's contents
-            (if path is empty or "/")
-        missing_dir_ok : boolean, default False
-            If False then an error is raised if path does
-            not exist
-        """
-    def move(self, src: str, dest: str) -> None:
-        """
-        Move / rename a file or directory.
-
-        If the destination exists:
-        - if it is a non-empty directory, an error is returned
-        - otherwise, if it has the same type as the source, it is replaced
-        - otherwise, behavior is unspecified (implementation-dependent).
-
-        Parameters
-        ----------
-        src : str
-            The path of the file or the directory to be moved.
-        dest : str
-            The destination path where the file or directory is moved to.
-
-        Examples
-        --------
-        Create a new folder with a file:
-
-        >>> local.create_dir("/tmp/other_dir")
-        >>> local.copy_file(path, "/tmp/move_example.dat")
-
-        Move the file:
-
-        >>> local.move("/tmp/move_example.dat", "/tmp/other_dir/move_example_2.dat")
-
-        Inspect the file info:
-
-        >>> local.get_file_info("/tmp/other_dir/move_example_2.dat")
-        <FileInfo for '/tmp/other_dir/move_example_2.dat': type=FileType.File, size=4>
-        >>> local.get_file_info("/tmp/move_example.dat")
-        <FileInfo for '/tmp/move_example.dat': type=FileType.NotFound>
-
-        Delete the folder:
-        >>> local.delete_dir("/tmp/other_dir")
-        """
-    def copy_file(self, src: str, dest: str) -> None:
-        """
-        Copy a file.
-
-        If the destination exists and is a directory, an error is returned.
-        Otherwise, it is replaced.
-
-        Parameters
-        ----------
-        src : str
-            The path of the file to be copied from.
-        dest : str
-            The destination path where the file is copied to.
-
-        Examples
-        --------
-        >>> local.copy_file(path, local_path + "/pyarrow-fs-example_copy.dat")
-
-        Inspect the file info:
-
-        >>> local.get_file_info(local_path + "/pyarrow-fs-example_copy.dat")
-        <FileInfo for '/.../pyarrow-fs-example_copy.dat': type=FileType.File, size=4>
-        >>> local.get_file_info(path)
-        <FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
-        """
-    def delete_file(self, path: str) -> None:
-        """
-        Delete a file.
-
-        Parameters
-        ----------
-        path : str
-            The path of the file to be deleted.
-        """
-    def open_input_file(self, path: str) -> NativeFile:
-        """
-        Open an input file for random access reading.
-
-        Parameters
-        ----------
-        path : str
-            The source to open for reading.
-
-        Returns
-        -------
-        stream : NativeFile
-
-        Examples
-        --------
-        Print the data from the file with `open_input_file()`:
-
-        >>> with local.open_input_file(path) as f:
-        ...     print(f.readall())
-        b'data'
-        """
+    ) -> None: ...
+
+    def move(self, src: str, dest: str) -> None: ...
+
+    def copy_file(self, src: str, dest: str) -> None: ...
+
+    def delete_file(self, path: str) -> None: ...
+
+    def open_input_file(self, path: str) -> NativeFile: ...
+
     def open_input_stream(
         self, path: str, compression: str | None = "detect", buffer_size: int | None = None
-    ) -> NativeFile:
-        """
-        Open an input stream for sequential reading.
-
-        Parameters
-        ----------
-        path : str
-            The source to open for reading.
-        compression : str optional, default 'detect'
-            The compression algorithm to use for on-the-fly decompression.
-            If "detect" and source is a file path, then compression will be
-            chosen based on the file extension.
-            If None, no compression will be applied. Otherwise, a well-known
-            algorithm name must be supplied (e.g. "gzip").
-        buffer_size : int optional, default None
-            If None or 0, no buffering will happen. Otherwise the size of the
-            temporary read buffer.
-
-        Returns
-        -------
-        stream : NativeFile
-
-        Examples
-        --------
-        Print the data from the file with `open_input_stream()`:
-
-        >>> with local.open_input_stream(path) as f:
-        ...     print(f.readall())
-        b'data'
-        """
+    ) -> NativeFile: ...
+
     def open_output_stream(
         self,
         path: str,
         compression: str | None = "detect",
         buffer_size: int | None = None,
         metadata: dict[str, str] | None = None,
-    ) -> NativeFile:
-        """
-        Open an output stream for sequential writing.
-
-        If the target already exists, existing data is truncated.
-
-        Parameters
-        ----------
-        path : str
-            The source to open for writing.
-        compression : str optional, default 'detect'
-            The compression algorithm to use for on-the-fly compression.
-            If "detect" and source is a file path, then compression will be
-            chosen based on the file extension.
-            If None, no compression will be applied. Otherwise, a well-known
-            algorithm name must be supplied (e.g. "gzip").
-        buffer_size : int optional, default None
-            If None or 0, no buffering will happen. Otherwise the size of the
-            temporary write buffer.
-        metadata : dict optional, default None
-            If not None, a mapping of string keys to string values.
-            Some filesystems support storing metadata along the file
-            (such as "Content-Type").
-            Unsupported metadata keys will be ignored.
-
-        Returns
-        -------
-        stream : NativeFile
-
-        Examples
-        --------
-        >>> local = fs.LocalFileSystem()
-        >>> with local.open_output_stream(path) as stream:
-        ...     stream.write(b"data")
-        4
-        """
+    ) -> NativeFile: ...
+
     def open_append_stream(
         self,
         path: str,
         compression: str | None = "detect",
         buffer_size: int | None = None,
         metadata: dict[str, str] | None = None,
-    ):
-        """
-        Open an output stream for appending.
-
-        If the target doesn't exist, a new empty file is created.
-
-        .. note::
-            Some filesystem implementations do not support efficient
-            appending to an existing file, in which case this method will
-            raise NotImplementedError.
-            Consider writing to multiple files (using e.g. the dataset layer)
-            instead.
-
-        Parameters
-        ----------
-        path : str
-            The source to open for writing.
-        compression : str optional, default 'detect'
-            The compression algorithm to use for on-the-fly compression.
-            If "detect" and source is a file path, then compression will be
-            chosen based on the file extension.
-            If None, no compression will be applied. Otherwise, a well-known
-            algorithm name must be supplied (e.g. "gzip").
-        buffer_size : int optional, default None
-            If None or 0, no buffering will happen. Otherwise the size of the
-            temporary write buffer.
-        metadata : dict optional, default None
-            If not None, a mapping of string keys to string values.
-            Some filesystems support storing metadata along the file
-            (such as "Content-Type").
-            Unsupported metadata keys will be ignored.
-
-        Returns
-        -------
-        stream : NativeFile
-
-        Examples
-        --------
-        Append new data to a FileSystem subclass with nonempty file:
-
-        >>> with local.open_append_stream(path) as f:
-        ...     f.write(b"+newly added")
-        12
-
-        Print out the content to the file:
-
-        >>> with local.open_input_file(path) as f:
-        ...     print(f.readall())
-        b'data+newly added'
-        """
-    def normalize_path(self, path: str) -> str:
-        """
-        Normalize filesystem path.
-
-        Parameters
-        ----------
-        path : str
-            The path to normalize
-
-        Returns
-        -------
-        normalized_path : str
-            The normalized path
-        """
+    ): ...
 
-class LocalFileSystem(FileSystem):
-    """
-    A FileSystem implementation accessing files on the local machine.
+    def normalize_path(self, path: str) -> str: ...
 
-    Details such as symlinks are abstracted away (symlinks are always followed,
-    except when deleting an entry).
-
-    Parameters
-    ----------
-    use_mmap : bool, default False
-        Whether open_input_stream and open_input_file should return
-        a mmap'ed file or a regular file.
-
-    Examples
-    --------
-    Create a FileSystem object with LocalFileSystem constructor:
-
-    >>> from pyarrow import fs
-    >>> local = fs.LocalFileSystem()
-    >>> local
-    <pyarrow._fs.LocalFileSystem object at ...>
-
-    and write data on to the file:
-
-    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
-    ...     stream.write(b"data")
-    4
-    >>> with local.open_input_stream("/tmp/local_fs.dat") as stream:
-    ...     print(stream.readall())
-    b'data'
-
-    Create a FileSystem object inferred from a URI of the saved file:
-
-    >>> local_new, path = fs.LocalFileSystem().from_uri("/tmp/local_fs.dat")
-    >>> local_new
-    <pyarrow._fs.LocalFileSystem object at ...
-    >>> path
-    '/tmp/local_fs.dat'
-
-    Check if FileSystems `local` and `local_new` are equal:
-
-    >>> local.equals(local_new)
-    True
-
-    Compare two different FileSystems:
-
-    >>> local2 = fs.LocalFileSystem(use_mmap=True)
-    >>> local.equals(local2)
-    False
-
-    Copy a file and print out the data:
-
-    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/local_fs-copy.dat")
-    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as stream:
-    ...     print(stream.readall())
-    b'data'
-
-    Open an output stream for appending, add text and print the new data:
-
-    >>> with local.open_append_stream("/tmp/local_fs-copy.dat") as f:
-    ...     f.write(b"+newly added")
-    12
-
-    >>> with local.open_input_stream("/tmp/local_fs-copy.dat") as f:
-    ...     print(f.readall())
-    b'data+newly added'
-
-    Create a directory, copy a file into it and then delete the whole directory:
-
-    >>> local.create_dir("/tmp/new_folder")
-    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
-    >>> local.get_file_info("/tmp/new_folder")
-    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
-    >>> local.delete_dir("/tmp/new_folder")
-    >>> local.get_file_info("/tmp/new_folder")
-    <FileInfo for '/tmp/new_folder': type=FileType.NotFound>
-
-    Create a directory, copy a file into it and then delete
-    the content of the directory:
-
-    >>> local.create_dir("/tmp/new_folder")
-    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
-    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
-    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.File, size=4>
-    >>> local.delete_dir_contents("/tmp/new_folder")
-    >>> local.get_file_info("/tmp/new_folder")
-    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
-    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
-    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
-
-    Create a directory, copy a file into it and then delete
-    the file from the directory:
-
-    >>> local.create_dir("/tmp/new_folder")
-    >>> local.copy_file("/tmp/local_fs.dat", "/tmp/new_folder/local_fs.dat")
-    >>> local.delete_file("/tmp/new_folder/local_fs.dat")
-    >>> local.get_file_info("/tmp/new_folder/local_fs.dat")
-    <FileInfo for '/tmp/new_folder/local_fs.dat': type=FileType.NotFound>
-    >>> local.get_file_info("/tmp/new_folder")
-    <FileInfo for '/tmp/new_folder': type=FileType.Directory>
-
-    Move the file:
 
-    >>> local.move("/tmp/local_fs-copy.dat", "/tmp/new_folder/local_fs-copy.dat")
-    >>> local.get_file_info("/tmp/new_folder/local_fs-copy.dat")
-    <FileInfo for '/tmp/new_folder/local_fs-copy.dat': type=FileType.File, size=16>
-    >>> local.get_file_info("/tmp/local_fs-copy.dat")
-    <FileInfo for '/tmp/local_fs-copy.dat': type=FileType.NotFound>
-
-    To finish delete the file left:
-    >>> local.delete_file("/tmp/local_fs.dat")
-    """
+class LocalFileSystem(FileSystem):
 
     def __init__(self, *, use_mmap: bool = False) -> None: ...
 
-class SubTreeFileSystem(FileSystem):
-    """
-    Delegates to another implementation after prepending a fixed base path.
-
-    This is useful to expose a logical view of a subtree of a filesystem,
-    for example a directory in a LocalFileSystem.
-
-    Note, that this makes no security guarantee. For example, symlinks may
-    allow to "escape" the subtree and access other parts of the underlying
-    filesystem.
-
-    Parameters
-    ----------
-    base_path : str
-        The root of the subtree.
-    base_fs : FileSystem
-        FileSystem object the operations delegated to.
-
-    Examples
-    --------
-    Create a LocalFileSystem instance:
-
-    >>> from pyarrow import fs
-    >>> local = fs.LocalFileSystem()
-    >>> with local.open_output_stream("/tmp/local_fs.dat") as stream:
-    ...     stream.write(b"data")
-    4
 
-    Create a directory and a SubTreeFileSystem instance:
-
-    >>> local.create_dir("/tmp/sub_tree")
-    >>> subtree = fs.SubTreeFileSystem("/tmp/sub_tree", local)
-
-    Write data into the existing file:
-
-    >>> with subtree.open_append_stream("sub_tree_fs.dat") as f:
-    ...     f.write(b"+newly added")
-    12
-
-    Print out the attributes:
-
-    >>> subtree.base_fs
-    <pyarrow._fs.LocalFileSystem object at ...>
-    >>> subtree.base_path
-    '/tmp/sub_tree/'
-
-    Get info for the given directory or given file:
-
-    >>> subtree.get_file_info("")
-    <FileInfo for '': type=FileType.Directory>
-    >>> subtree.get_file_info("sub_tree_fs.dat")
-    <FileInfo for 'sub_tree_fs.dat': type=FileType.File, size=12>
-
-    Delete the file and directory:
-
-    >>> subtree.delete_file("sub_tree_fs.dat")
-    >>> local.delete_dir("/tmp/sub_tree")
-    >>> local.delete_file("/tmp/local_fs.dat")
+class SubTreeFileSystem(FileSystem):
 
-    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
-    """
     def __init__(self, base_path: str, base_fs: FileSystem): ...
     @property
     def base_path(self) -> str: ...
     @property
     def base_fs(self) -> FileSystem: ...
 
+
 class _MockFileSystem(FileSystem):
     def __init__(self, current_time: dt.datetime | None = None) -> None: ...
 
-class PyFileSystem(FileSystem):
-    """
-    A FileSystem with behavior implemented in Python.
-
-    Parameters
-    ----------
-    handler : FileSystemHandler
-        The handler object implementing custom filesystem behavior.
-
-    Examples
-    --------
-    Create an fsspec-based filesystem object for GitHub:
-
-    >>> from fsspec.implementations import github
-    >>> gfs = github.GithubFileSystem("apache", "arrow")  # doctest: +SKIP
-
-    Get a PyArrow FileSystem object:
 
-    >>> from pyarrow.fs import PyFileSystem, FSSpecHandler
-    >>> pa_fs = PyFileSystem(FSSpecHandler(gfs))  # doctest: +SKIP
-
-    Use :func:`~pyarrow.fs.FileSystem` functionality ``get_file_info()``:
+class PyFileSystem(FileSystem):
 
-    >>> pa_fs.get_file_info("README.md")  # doctest: +SKIP
-    <FileInfo for 'README.md': type=FileType.File, size=...>
-    """
     def __init__(self, handler: FileSystemHandler) -> None: ...
     @property
-    def handler(self) -> FileSystemHandler:
-        """
-        The filesystem's underlying handler.
+    def handler(self) -> FileSystemHandler: ...
 
-        Returns
-        -------
-        handler : FileSystemHandler
-        """
 
 class FileSystemHandler(ABC):
-    """
-    An abstract class exposing methods to implement PyFileSystem's behavior.
-    """
-    @abstractmethod
-    def get_type_name(self) -> str:
-        """
-        Implement PyFileSystem.type_name.
-        """
+
     @abstractmethod
-    def get_file_info(self, paths: str | list[str]) -> FileInfo | list[FileInfo]:
-        """
-        Implement PyFileSystem.get_file_info(paths).
-
-        Parameters
-        ----------
-        paths : list of str
-            paths for which we want to retrieve the info.
-        """
+    def get_type_name(self) -> str: ...
+
     @abstractmethod
-    def get_file_info_selector(self, selector: FileSelector) -> list[FileInfo]:
-        """
-        Implement PyFileSystem.get_file_info(selector).
+    def get_file_info(self, paths: str | list[str]) -> FileInfo | list[FileInfo]: ...
 
-        Parameters
-        ----------
-        selector : FileSelector
-            selector for which we want to retrieve the info.
-        """
+    @abstractmethod
+    def get_file_info_selector(self, selector: FileSelector) -> list[FileInfo]: ...
 
     @abstractmethod
-    def create_dir(self, path: str, recursive: bool) -> None:
-        """
-        Implement PyFileSystem.create_dir(...).
-
-        Parameters
-        ----------
-        path : str
-            path of the directory.
-        recursive : bool
-            if the parent directories should be created too.
-        """
+    def create_dir(self, path: str, recursive: bool) -> None: ...
+
     @abstractmethod
-    def delete_dir(self, path: str) -> None:
-        """
-        Implement PyFileSystem.delete_dir(...).
-
-        Parameters
-        ----------
-        path : str
-            path of the directory.
-        """
+    def delete_dir(self, path: str) -> None: ...
+
     @abstractmethod
-    def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None:
-        """
-        Implement PyFileSystem.delete_dir_contents(...).
-
-        Parameters
-        ----------
-        path : str
-            path of the directory.
-        missing_dir_ok : bool
-            if False an error should be raised if path does not exist
-        """
+    def delete_dir_contents(self, path: str, missing_dir_ok: bool = False) -> None: ...
+
     @abstractmethod
-    def delete_root_dir_contents(self) -> None:
-        """
-        Implement PyFileSystem.delete_dir_contents("/", accept_root_dir=True).
-        """
+    def delete_root_dir_contents(self) -> None: ...
+
     @abstractmethod
-    def delete_file(self, path: str) -> None:
-        """
-        Implement PyFileSystem.delete_file(...).
-
-        Parameters
-        ----------
-        path : str
-            path of the file.
-        """
+    def delete_file(self, path: str) -> None: ...
+
     @abstractmethod
-    def move(self, src: str, dest: str) -> None:
-        """
-        Implement PyFileSystem.move(...).
-
-        Parameters
-        ----------
-        src : str
-            path of what should be moved.
-        dest : str
-            path of where it should be moved to.
-        """
+    def move(self, src: str, dest: str) -> None: ...
 
     @abstractmethod
-    def copy_file(self, src: str, dest: str) -> None:
-        """
-        Implement PyFileSystem.copy_file(...).
-
-        Parameters
-        ----------
-        src : str
-            path of what should be copied.
-        dest : str
-            path of where it should be copied to.
-        """
+    def copy_file(self, src: str, dest: str) -> None: ...
+
     @abstractmethod
-    def open_input_stream(self, path: str) -> NativeFile:
-        """
-        Implement PyFileSystem.open_input_stream(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be opened.
-        """
+    def open_input_stream(self, path: str) -> NativeFile: ...
+
     @abstractmethod
-    def open_input_file(self, path: str) -> NativeFile:
-        """
-        Implement PyFileSystem.open_input_file(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be opened.
-        """
+    def open_input_file(self, path: str) -> NativeFile: ...
+
     @abstractmethod
-    def open_output_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
-        """
-        Implement PyFileSystem.open_output_stream(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be opened.
-        metadata :  mapping
-            Mapping of string keys to string values.
-            Some filesystems support storing metadata along the file
-            (such as "Content-Type").
-        """
+    def open_output_stream(self, path: str, metadata: dict[str, str]) -> NativeFile: ...
 
     @abstractmethod
-    def open_append_stream(self, path: str, metadata: dict[str, str]) -> NativeFile:
-        """
-        Implement PyFileSystem.open_append_stream(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be opened.
-        metadata :  mapping
-            Mapping of string keys to string values.
-            Some filesystems support storing metadata along the file
-            (such as "Content-Type").
-        """
+    def open_append_stream(self, path: str, metadata: dict[str, str]) -> NativeFile: ...
+
     @abstractmethod
-    def normalize_path(self, path: str) -> str:
-        """
-        Implement PyFileSystem.normalize_path(...).
-
-        Parameters
-        ----------
-        path : str
-            path of what should be normalized.
-        """
+    def normalize_path(self, path: str) -> str: ...
+
 
 SupportedFileSystem: TypeAlias = Union[AbstractFileSystem, FileSystem]
diff --git a/python/pyarrow-stubs/_gcsfs.pyi b/python/pyarrow-stubs/_gcsfs.pyi
index 0ced106615a..631c7ae4932 100644
--- a/python/pyarrow-stubs/_gcsfs.pyi
+++ b/python/pyarrow-stubs/_gcsfs.pyi
@@ -20,59 +20,8 @@ import datetime as dt
 from ._fs import FileSystem
 from .lib import KeyValueMetadata
 
-class GcsFileSystem(FileSystem):
-    """
-    Google Cloud Storage (GCS) backed FileSystem implementation
-
-    By default uses the process described in https://google.aip.dev/auth/4110
-    to resolve credentials. If not running on Google Cloud Platform (GCP),
-    this generally requires the environment variable
-    GOOGLE_APPLICATION_CREDENTIALS to point to a JSON file
-    containing credentials.
-
-    Note: GCS buckets are special and the operations available on them may be
-    limited or more expensive than expected compared to local file systems.
 
-    Note: When pickling a GcsFileSystem that uses default credentials, resolution
-    credentials are not stored in the serialized data. Therefore, when unpickling
-    it is assumed that the necessary credentials are in place for the target
-    process.
-
-    Parameters
-    ----------
-    anonymous : boolean, default False
-        Whether to connect anonymously.
-        If true, will not attempt to look up credentials using standard GCP
-        configuration methods.
-    access_token : str, default None
-        GCP access token.  If provided, temporary credentials will be fetched by
-        assuming this role; also, a `credential_token_expiration` must be
-        specified as well.
-    target_service_account : str, default None
-        An optional service account to try to impersonate when accessing GCS. This
-        requires the specified credential user or service account to have the necessary
-        permissions.
-    credential_token_expiration : datetime, default None
-        Expiration for credential generated with an access token. Must be specified
-        if `access_token` is specified.
-    default_bucket_location : str, default 'US'
-        GCP region to create buckets in.
-    scheme : str, default 'https'
-        GCS connection transport scheme.
-    endpoint_override : str, default None
-        Override endpoint with a connect string such as "localhost:9000"
-    default_metadata : mapping or pyarrow.KeyValueMetadata, default None
-        Default metadata for `open_output_stream`.  This will be ignored if
-        non-empty metadata is passed to `open_output_stream`.
-    retry_time_limit : timedelta, default None
-        Set the maximum amount of time the GCS client will attempt to retry
-        transient errors. Subsecond granularity is ignored.
-    project_id : str, default None
-        The GCP project identifier to use for creating buckets.
-        If not set, the library uses the GOOGLE_CLOUD_PROJECT environment
-        variable. Most I/O operations do not need a project id, only applications
-        that create new buckets need a project id.
-    """
+class GcsFileSystem(FileSystem):
 
     def __init__(
         self,
@@ -89,12 +38,7 @@ class GcsFileSystem(FileSystem):
         project_id: str | None = None,
     ): ...
     @property
-    def default_bucket_location(self) -> str:
-        """
-        The GCP location this filesystem will write to.
-        """
+    def default_bucket_location(self) -> str: ...
+
     @property
-    def project_id(self) -> str:
-        """
-        The GCP project id this filesystem will use.
-        """
+    def project_id(self) -> str: ...
diff --git a/python/pyarrow-stubs/_hdfs.pyi b/python/pyarrow-stubs/_hdfs.pyi
index ed367379171..ee1253d64b6 100644
--- a/python/pyarrow-stubs/_hdfs.pyi
+++ b/python/pyarrow-stubs/_hdfs.pyi
@@ -19,42 +19,9 @@ from _typeshed import StrPath
 
 from ._fs import FileSystem
 
-class HadoopFileSystem(FileSystem):
-    """
-    HDFS backed FileSystem implementation
-
-    Parameters
-    ----------
-    host : str
-        HDFS host to connect to. Set to "default" for fs.defaultFS from
-        core-site.xml.
-    port : int, default 8020
-        HDFS port to connect to. Set to 0 for default or logical (HA) nodes.
-    user : str, default None
-        Username when connecting to HDFS; None implies login user.
-    replication : int, default 3
-        Number of copies each block will have.
-    buffer_size : int, default 0
-        If 0, no buffering will happen otherwise the size of the temporary read
-        and write buffer.
-    default_block_size : int, default None
-        None means the default configuration for HDFS, a typical block size is
-        128 MB.
-    kerb_ticket : string or path, default None
-        If not None, the path to the Kerberos ticket cache.
-    extra_conf : dict, default None
-        Extra key/value pairs for configuration; will override any
-        hdfs-site.xml properties.
 
-    Examples
-    --------
-    >>> from pyarrow import fs
-    >>> hdfs = fs.HadoopFileSystem(
-    ...     host, port, user=user, kerb_ticket=ticket_cache_path
-    ... )  # doctest: +SKIP
+class HadoopFileSystem(FileSystem):
 
-    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
-    """
     def __init__(
         self,
         host: str,
@@ -68,25 +35,4 @@ class HadoopFileSystem(FileSystem):
         extra_conf: dict | None = None,
     ): ...
     @staticmethod
-    def from_uri(uri: str) -> HadoopFileSystem:  # type: ignore[override]
-        """
-        Instantiate HadoopFileSystem object from an URI string.
-
-        The following two calls are equivalent
-
-        * ``HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test\
-&replication=1')``
-        * ``HadoopFileSystem('localhost', port=8020, user='test', \
-replication=1)``
-
-        Parameters
-        ----------
-        uri : str
-            A string URI describing the connection to HDFS.
-            In order to change the user, replication, buffer_size or
-            default_block_size pass the values as query parts.
-
-        Returns
-        -------
-        HadoopFileSystem
-        """
+    def from_uri(uri: str) -> HadoopFileSystem: ...  # type: ignore[override]
diff --git a/python/pyarrow-stubs/_ipc.pyi b/python/pyarrow-stubs/_ipc.pyi
index 1676e49e962..23d770070e7 100644
--- a/python/pyarrow-stubs/_ipc.pyi
+++ b/python/pyarrow-stubs/_ipc.pyi
@@ -34,6 +34,7 @@ from pyarrow.lib import MemoryPool, RecordBatch, Schema, Table, Tensor, _Weakref
 from .io import Buffer, Codec, NativeFile
 from ._types import DictionaryMemo, KeyValueMetadata
 
+
 class MetadataVersion(enum.IntEnum):
     V1 = enum.auto()
     V2 = enum.auto()
@@ -41,22 +42,8 @@ class MetadataVersion(enum.IntEnum):
     V4 = enum.auto()
     V5 = enum.auto()
 
+
 class WriteStats(NamedTuple):
-    """IPC write statistics
-
-    Parameters
-    ----------
-    num_messages : int
-        Number of messages.
-    num_record_batches : int
-        Number of record batches.
-    num_dictionary_batches : int
-        Number of dictionary batches.
-    num_dictionary_deltas : int
-        Delta of dictionaries.
-    num_replaced_dictionaries : int
-        Number of replaced dictionaries.
-    """
 
     num_messages: int
     num_record_batches: int
@@ -64,22 +51,8 @@ class WriteStats(NamedTuple):
     num_dictionary_deltas: int
     num_replaced_dictionaries: int
 
+
 class ReadStats(NamedTuple):
-    """IPC read statistics
-
-    Parameters
-    ----------
-    num_messages : int
-        Number of messages.
-    num_record_batches : int
-        Number of record batches.
-    num_dictionary_batches : int
-        Number of dictionary batches.
-    num_dictionary_deltas : int
-        Delta of dictionaries.
-    num_replaced_dictionaries : int
-        Number of replaced dictionaries.
-    """
 
     num_messages: int
     num_record_batches: int
@@ -87,26 +60,13 @@ class ReadStats(NamedTuple):
     num_dictionary_deltas: int
     num_replaced_dictionaries: int
 
+
 class IpcReadOptions(_Weakrefable):
-    """
-    Serialization options for reading IPC format.
-
-    Parameters
-    ----------
-    ensure_native_endian : bool, default True
-        Whether to convert incoming data to platform-native endianness.
-    use_threads : bool
-        Whether to use the global CPU thread pool to parallelize any
-        computational tasks like decompression
-    included_fields : list
-        If empty (the default), return all deserialized fields.
-        If non-empty, the values are the indices of fields to read on
-        the top-level schema
-    """
 
     ensure_native_endian: bool
     use_threads: bool
     included_fields: list[int]
+
     def __init__(
         self,
         *,
@@ -115,40 +75,8 @@ class IpcReadOptions(_Weakrefable):
         included_fields: list[int] | None = None,
     ) -> None: ...
 
+
 class IpcWriteOptions(_Weakrefable):
-    """
-    Serialization options for the IPC format.
-
-    Parameters
-    ----------
-    metadata_version : MetadataVersion, default MetadataVersion.V5
-        The metadata version to write.  V5 is the current and latest,
-        V4 is the pre-1.0 metadata version (with incompatible Union layout).
-    allow_64bit : bool, default False
-        If true, allow field lengths that don't fit in a signed 32-bit int.
-    use_legacy_format : bool, default False
-        Whether to use the pre-Arrow 0.15 IPC format.
-    compression : str, Codec, or None
-        compression codec to use for record batch buffers.
-        If None then batch buffers will be uncompressed.
-        Must be "lz4", "zstd" or None.
-        To specify a compression_level use `pyarrow.Codec`
-    use_threads : bool
-        Whether to use the global CPU thread pool to parallelize any
-        computational tasks like compression.
-    emit_dictionary_deltas : bool
-        Whether to emit dictionary deltas.  Default is false for maximum
-        stream compatibility.
-    unify_dictionaries : bool
-        If true then calls to write_table will attempt to unify dictionaries
-        across all batches in the table.  This can help avoid the need for
-        replacement dictionaries (which the file format does not support)
-        but requires computing the unified dictionary and then remapping
-        the indices arrays.
-
-        This parameter is ignored when writing to the IPC stream format as
-        the IPC stream format can support replacement dictionaries.
-    """
 
     metadata_version: MetadataVersion
     allow_64bit: bool
@@ -157,6 +85,7 @@ class IpcWriteOptions(_Weakrefable):
     use_threads: bool
     emit_dictionary_deltas: bool
     unify_dictionaries: bool
+
     def __init__(
         self,
         *,
@@ -169,10 +98,8 @@ class IpcWriteOptions(_Weakrefable):
         unify_dictionaries: bool = False,
     ) -> None: ...
 
+
 class Message(_Weakrefable):
-    """
-    Container for an Arrow IPC message with metadata and optional body
-    """
 
     @property
     def type(self) -> str: ...
@@ -183,416 +110,134 @@ class Message(_Weakrefable):
     @property
     def body(self) -> Buffer | None: ...
     def equals(self, other: Message) -> bool: ...
+
     def serialize_to(
         self, sink: NativeFile, alignment: int = 8, memory_pool: MemoryPool | None = None
-    ):
-        """
-        Write message to generic OutputStream
-
-        Parameters
-        ----------
-        sink : NativeFile
-        alignment : int, default 8
-            Byte alignment for metadata and body
-        memory_pool : MemoryPool, default None
-            Uses default memory pool if not specified
-        """
-    def serialize(self, alignment: int = 8, memory_pool: MemoryPool | None = None) -> Buffer:
-        """
-        Write message as encapsulated IPC message
-
-        Parameters
-        ----------
-        alignment : int, default 8
-            Byte alignment for metadata and body
-        memory_pool : MemoryPool, default None
-            Uses default memory pool if not specified
-
-        Returns
-        -------
-        serialized : Buffer
-        """
+    ): ...
+
+    def serialize(self, alignment: int = 8, memory_pool: MemoryPool |
+                  None = None) -> Buffer: ...
+
 
 class MessageReader(_Weakrefable):
-    """
-    Interface for reading Message objects from some source (like an
-    InputStream)
-    """
+
     @classmethod
-    def open_stream(cls, source: bytes | NativeFile | IOBase | SupportPyBuffer) -> Self:
-        """
-        Open stream from source, if you want to use memory map use
-        MemoryMappedFile as source.
-
-        Parameters
-        ----------
-        source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
-            A readable source, like an InputStream
-        """
+    def open_stream(cls, source: bytes | NativeFile |
+                    IOBase | SupportPyBuffer) -> Self: ...
+
     def __iter__(self) -> Self: ...
-    def read_next_message(self) -> Message:
-        """
-        Read next Message from the stream.
-
-        Raises
-        ------
-        StopIteration
-            At end of stream
-        """
+    def read_next_message(self) -> Message: ...
+
     __next__ = read_next_message
 
 # ----------------------------------------------------------------------
 # File and stream readers and writers
 
+
 class _CRecordBatchWriter(_Weakrefable):
-    """The base RecordBatchWriter wrapper.
-
-    Provides common implementations of convenience methods. Should not
-    be instantiated directly by user code.
-    """
-    def write(self, table_or_batch: Table | RecordBatch):
-        """
-        Write RecordBatch or Table to stream.
-
-        Parameters
-        ----------
-        table_or_batch : {RecordBatch, Table}
-        """
+
+    def write(self, table_or_batch: Table | RecordBatch): ...
+
     def write_batch(
         self,
         batch: RecordBatch,
         custom_metadata: Mapping[bytes, bytes] | KeyValueMetadata | None = None,
-    ):
-        """
-        Write RecordBatch to stream.
-
-        Parameters
-        ----------
-        batch : RecordBatch
-        custom_metadata : mapping or KeyValueMetadata
-            Keys and values must be string-like / coercible to bytes
-        """
-    def write_table(self, table: Table, max_chunksize: int | None = None) -> None:
-        """
-        Write Table to stream in (contiguous) RecordBatch objects.
-
-        Parameters
-        ----------
-        table : Table
-        max_chunksize : int, default None
-            Maximum number of rows for RecordBatch chunks. Individual chunks may
-            be smaller depending on the chunk layout of individual columns.
-        """
-    def close(self) -> None:
-        """
-        Close stream and write end-of-stream 0 marker.
-        """
+    ): ...
+
+    def write_table(self, table: Table, max_chunksize: int | None = None) -> None: ...
+
+    def close(self) -> None: ...
+
     def __enter__(self) -> Self: ...
     def __exit__(self, exc_type, exc_val, exc_tb): ...
     @property
-    def stats(self) -> WriteStats:
-        """
-        Current IPC write statistics.
-        """
+    def stats(self) -> WriteStats: ...
+
 
 class _RecordBatchStreamWriter(_CRecordBatchWriter):
     @property
     def _use_legacy_format(self) -> bool: ...
     @property
     def _metadata_version(self) -> MetadataVersion: ...
-    def _open(self, sink, schema: Schema, options: IpcWriteOptions = IpcWriteOptions()): ...
+    def _open(self, sink, schema: Schema,
+              options: IpcWriteOptions = IpcWriteOptions()): ...
 
 
 class _ReadPandasMixin:
-    def read_pandas(self, **options) -> pd.DataFrame:
-        """
-        Read contents of stream to a pandas.DataFrame.
-
-        Read all record batches as a pyarrow.Table then convert it to a
-        pandas.DataFrame using Table.to_pandas.
-
-        Parameters
-        ----------
-        **options
-            Arguments to forward to :meth:`Table.to_pandas`.
+    def read_pandas(self, **options) -> pd.DataFrame: ...
 
-        Returns
-        -------
-        df : pandas.DataFrame
-        """
 
 class RecordBatchReader(_Weakrefable):
-    """Base class for reading stream of record batches.
-
-    Record batch readers function as iterators of record batches that also
-    provide the schema (without the need to get any batches).
-
-    Warnings
-    --------
-    Do not call this class's constructor directly, use one of the
-    ``RecordBatchReader.from_*`` functions instead.
-
-    Notes
-    -----
-    To import and export using the Arrow C stream interface, use the
-    ``_import_from_c`` and ``_export_to_c`` methods. However, keep in mind this
-    interface is intended for expert users.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> schema = pa.schema([("x", pa.int64())])
-    >>> def iter_record_batches():
-    ...     for i in range(2):
-    ...         yield pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], schema=schema)
-    >>> reader = pa.RecordBatchReader.from_batches(schema, iter_record_batches())
-    >>> print(reader.schema)
-    x: int64
-    >>> for batch in reader:
-    ...     print(batch)
-    pyarrow.RecordBatch
-    x: int64
-    ----
-    x: [1,2,3]
-    pyarrow.RecordBatch
-    x: int64
-    ----
-    x: [1,2,3]
-    """
 
     def __iter__(self) -> Self: ...
-    def read_next_batch(self) -> RecordBatch:
-        """
-        Read next RecordBatch from the stream.
-
-        Raises
-        ------
-        StopIteration:
-            At end of stream.
-
-        Returns
-        -------
-        RecordBatch
-        """
+    def read_next_batch(self) -> RecordBatch: ...
+
     __next__ = read_next_batch
     @property
-    def schema(self) -> Schema:
-        """
-        Shared schema of the record batches in the stream.
-
-        Returns
-        -------
-        Schema
-        """
-    def read_next_batch_with_custom_metadata(self) -> RecordBatchWithMetadata:
-        """
-        Read next RecordBatch from the stream along with its custom metadata.
-
-        Raises
-        ------
-        StopIteration:
-            At end of stream.
-
-        Returns
-        -------
-        batch : RecordBatch
-        custom_metadata : KeyValueMetadata
-        """
+    def schema(self) -> Schema: ...
+
+    def read_next_batch_with_custom_metadata(self) -> RecordBatchWithMetadata: ...
+
     def iter_batches_with_custom_metadata(
         self,
-    ) -> Iterator[RecordBatchWithMetadata]:
-        """
-        Iterate over record batches from the stream along with their custom
-        metadata.
-
-        Yields
-        ------
-        RecordBatchWithMetadata
-        """
-    def read_all(self) -> Table:
-        """
-        Read all record batches as a pyarrow.Table.
-
-        Returns
-        -------
-        Table
-        """
-    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
-    def close(self) -> None:
-        """
-        Release any resources associated with the reader.
-        """
+    ) -> Iterator[RecordBatchWithMetadata]: ...
+
+    def read_all(self) -> Table: ...
+
+    # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    read_pandas = _ReadPandasMixin.read_pandas
+    def close(self) -> None: ...
+
     def __enter__(self) -> Self: ...
     def __exit__(self, exc_type, exc_val, exc_tb): ...
-    def cast(self, target_schema: Schema) -> Self:
-        """
-        Wrap this reader with one that casts each batch lazily as it is pulled.
-        Currently only a safe cast to target_schema is implemented.
-
-        Parameters
-        ----------
-        target_schema : Schema
-            Schema to cast to, the names and order of fields must match.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
-    def _export_to_c(self, out_ptr: int) -> None:
-        """
-        Export to a C ArrowArrayStream struct, given its pointer.
-
-        Parameters
-        ----------
-        out_ptr: int
-            The raw pointer to a C ArrowArrayStream struct.
-
-        Be careful: if you don't pass the ArrowArrayStream struct to a
-        consumer, array memory will leak.  This is a low-level function
-        intended for expert users.
-        """
+    def cast(self, target_schema: Schema) -> Self: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
     @classmethod
-    def _import_from_c(cls, in_ptr: int) -> Self:
-        """
-        Import RecordBatchReader from a C ArrowArrayStream struct,
-        given its pointer.
-
-        Parameters
-        ----------
-        in_ptr: int
-            The raw pointer to a C ArrowArrayStream struct.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_stream__(self, requested_schema=None):
-        """
-        Export to a C ArrowArrayStream PyCapsule.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule, default None
-            The schema to which the stream should be casted, passed as a
-            PyCapsule containing a C ArrowSchema representation of the
-            requested schema.
-
-        Returns
-        -------
-        PyCapsule
-            A capsule containing a C ArrowArrayStream struct.
-        """
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
     @classmethod
-    def _import_from_c_capsule(cls, stream) -> Self:
-        """
-        Import RecordBatchReader from a C ArrowArrayStream PyCapsule.
-
-        Parameters
-        ----------
-        stream: PyCapsule
-            A capsule containing a C ArrowArrayStream PyCapsule.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
+    def _import_from_c_capsule(cls, stream) -> Self: ...
+
     @classmethod
-    def from_stream(cls, data: SupportArrowStream, schema: Schema | None = None) -> Self:
-        """
-        Create RecordBatchReader from a Arrow-compatible stream object.
-
-        This accepts objects implementing the Arrow PyCapsule Protocol for
-        streams, i.e. objects that have a ``__arrow_c_stream__`` method.
-
-        Parameters
-        ----------
-        data : Arrow-compatible stream object
-            Any object that implements the Arrow PyCapsule Protocol for
-            streams.
-        schema : Schema, default None
-            The schema to which the stream should be casted, if supported
-            by the stream object.
-
-        Returns
-        -------
-        RecordBatchReader
-        """
+    def from_stream(cls, data: SupportArrowStream,
+                    schema: Schema | None = None) -> Self: ...
+
     @classmethod
-    def from_batches(cls, schema: Schema, batches: Iterable[RecordBatch]) -> Self:
-        """
-        Create RecordBatchReader from an iterable of batches.
-
-        Parameters
-        ----------
-        schema : Schema
-            The shared schema of the record batches
-        batches : Iterable[RecordBatch]
-            The batches that this reader will return.
-
-        Returns
-        -------
-        reader : RecordBatchReader
-        """
+    def from_batches(cls, schema: Schema, batches: Iterable[RecordBatch]) -> Self: ...
+
 
 class _RecordBatchStreamReader(RecordBatchReader):
     @property
-    def stats(self) -> ReadStats:
-        """
-        Current IPC read statistics.
-        """
+    def stats(self) -> ReadStats: ...
 
-class _RecordBatchFileWriter(_RecordBatchStreamWriter): ...
 
-class RecordBatchWithMetadata(NamedTuple):
-    """RecordBatch with its custom metadata
+class _RecordBatchFileWriter(_RecordBatchStreamWriter):
+    ...
+
 
-    Parameters
-    ----------
-    batch : RecordBatch
-    custom_metadata : KeyValueMetadata
-    """
+class RecordBatchWithMetadata(NamedTuple):
 
     batch: RecordBatch
     custom_metadata: KeyValueMetadata
 
+
 class _RecordBatchFileReader(_Weakrefable):
     @property
-    def num_record_batches(self) -> int:
-        """
-        The number of record batches in the IPC file.
-        """
-    def get_batch(self, i: int) -> RecordBatch:
-        """
-        Read the record batch with the given index.
-
-        Parameters
-        ----------
-        i : int
-            The index of the record batch in the IPC file.
-
-        Returns
-        -------
-        batch : RecordBatch
-        """
+    def num_record_batches(self) -> int: ...
+
+    def get_batch(self, i: int) -> RecordBatch: ...
+
     get_record_batch = get_batch
-    def get_batch_with_custom_metadata(self, i: int) -> RecordBatchWithMetadata:
-        """
-        Read the record batch with the given index along with
-        its custom metadata
-
-        Parameters
-        ----------
-        i : int
-            The index of the record batch in the IPC file.
-
-        Returns
-        -------
-        batch : RecordBatch
-        custom_metadata : KeyValueMetadata
-        """
-    def read_all(self) -> Table:
-        """
-        Read all record batches as a pyarrow.Table
-        """
-    read_pandas = _ReadPandasMixin.read_pandas  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    def get_batch_with_custom_metadata(self, i: int) -> RecordBatchWithMetadata: ...
+
+    def read_all(self) -> Table: ...
+
+    # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+    read_pandas = _ReadPandasMixin.read_pandas
     def __enter__(self) -> Self: ...
     def __exit__(self, exc_type, exc_val, exc_tb): ...
     @property
@@ -600,105 +245,30 @@ class _RecordBatchFileReader(_Weakrefable):
     @property
     def stats(self) -> ReadStats: ...
 
-def get_tensor_size(tensor: Tensor) -> int:
-    """
-    Return total size of serialized Tensor including metadata and padding.
-
-    Parameters
-    ----------
-    tensor : Tensor
-        The tensor for which we want to known the size.
-    """
-
-def get_record_batch_size(batch: RecordBatch) -> int:
-    """
-    Return total size of serialized RecordBatch including metadata and padding.
-
-    Parameters
-    ----------
-    batch : RecordBatch
-        The recordbatch for which we want to know the size.
-    """
-
-def write_tensor(tensor: Tensor, dest: NativeFile) -> int:
-    """
-    Write pyarrow.Tensor to pyarrow.NativeFile object its current position.
-
-    Parameters
-    ----------
-    tensor : pyarrow.Tensor
-    dest : pyarrow.NativeFile
-
-    Returns
-    -------
-    bytes_written : int
-        Total number of bytes written to the file
-    """
-
-def read_tensor(source: NativeFile) -> Tensor:
-    """Read pyarrow.Tensor from pyarrow.NativeFile object from current
-    position. If the file source supports zero copy (e.g. a memory map), then
-    this operation does not allocate any memory. This function not assume that
-    the stream is aligned
-
-    Parameters
-    ----------
-    source : pyarrow.NativeFile
-
-    Returns
-    -------
-    tensor : Tensor
-
-    """
-
-def read_message(source: NativeFile | IOBase | SupportPyBuffer) -> Message:
-    """
-    Read length-prefixed message from file or buffer-like object
-
-    Parameters
-    ----------
-    source : pyarrow.NativeFile, file-like object, or buffer-like object
-
-    Returns
-    -------
-    message : Message
-    """
-
-def read_schema(obj: Buffer | Message, dictionary_memo: DictionaryMemo | None = None) -> Schema:
-    """
-    Read Schema from message or buffer
-
-    Parameters
-    ----------
-    obj : buffer or Message
-    dictionary_memo : DictionaryMemo, optional
-        Needed to be able to reconstruct dictionary-encoded fields
-        with read_record_batch
-
-    Returns
-    -------
-    schema : Schema
-    """
+
+def get_tensor_size(tensor: Tensor) -> int: ...
+
+
+def get_record_batch_size(batch: RecordBatch) -> int: ...
+
+
+def write_tensor(tensor: Tensor, dest: NativeFile) -> int: ...
+
+
+def read_tensor(source: NativeFile) -> Tensor: ...
+
+
+def read_message(source: NativeFile | IOBase | SupportPyBuffer) -> Message: ...
+
+
+def read_schema(obj: Buffer | Message, dictionary_memo: DictionaryMemo |
+                None = None) -> Schema: ...
+
 
 def read_record_batch(
     obj: Message | SupportPyBuffer, schema: Schema, dictionary_memo: DictionaryMemo | None = None
-) -> RecordBatch:
-    """
-    Read RecordBatch from message, given a known schema. If reading data from a
-    complete IPC stream, use ipc.open_stream instead
-
-    Parameters
-    ----------
-    obj : Message or Buffer-like
-    schema : Schema
-    dictionary_memo : DictionaryMemo, optional
-        If message contains dictionaries, must pass a populated
-        DictionaryMemo
-
-    Returns
-    -------
-    batch : RecordBatch
-    """
+) -> RecordBatch: ...
+
 
 __all__ = [
     "MetadataVersion",
diff --git a/python/pyarrow-stubs/_json.pyi b/python/pyarrow-stubs/_json.pyi
index f416b4b29c6..b52be2bf028 100644
--- a/python/pyarrow-stubs/_json.pyi
+++ b/python/pyarrow-stubs/_json.pyi
@@ -22,165 +22,49 @@ from _typeshed import StrPath
 from .lib import MemoryPool, RecordBatchReader, Schema, Table, _Weakrefable
 
 class ReadOptions(_Weakrefable):
-    """
-    Options for reading JSON files.
-
-    Parameters
-    ----------
-    use_threads : bool, optional (default True)
-        Whether to use multiple threads to accelerate reading
-    block_size : int, optional
-        How much bytes to process at a time from the input stream.
-        This will determine multi-threading granularity as well as
-        the size of individual chunks in the Table.
-    """
+
 
     use_threads: bool
-    """
-    Whether to use multiple threads to accelerate reading.
-    """
+
     block_size: int
-    """
-    How much bytes to process at a time from the input stream.
 
-    This will determine multi-threading granularity as well as the size of
-    individual chunks in the Table.
-    """
     def __init__(self, use_threads: bool | None = None, block_size: int | None = None): ...
-    def equals(self, other: ReadOptions) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.json.ReadOptions
+    def equals(self, other: ReadOptions) -> bool: ...
 
-        Returns
-        -------
-        bool
-        """
 
 class ParseOptions(_Weakrefable):
-    """
-    Options for parsing JSON files.
-
-    Parameters
-    ----------
-    explicit_schema : Schema, optional (default None)
-        Optional explicit schema (no type inference, ignores other fields).
-    newlines_in_values : bool, optional (default False)
-        Whether objects may be printed across multiple lines (for example
-        pretty printed). If false, input must end with an empty line.
-    unexpected_field_behavior : str, default "infer"
-        How JSON fields outside of explicit_schema (if given) are treated.
-
-        Possible behaviors:
-
-         - "ignore": unexpected JSON fields are ignored
-         - "error": error out on unexpected JSON fields
-         - "infer": unexpected JSON fields are type-inferred and included in
-           the output
-    """
+
 
     explicit_schema: Schema
-    """
-    Optional explicit schema (no type inference, ignores other fields)
-    """
-    newlines_in_values: bool
-    """
-    Whether newline characters are allowed in JSON values.
-    Setting this to True reduces the performance of multi-threaded
-    JSON reading.
-    """
-    unexpected_field_behavior: Literal["ignore", "error", "infer"]
-    """
-    How JSON fields outside of explicit_schema (if given) are treated.
 
-    Possible behaviors:
+    newlines_in_values: bool
 
-        - "ignore": unexpected JSON fields are ignored
-        - "error": error out on unexpected JSON fields
-        - "infer": unexpected JSON fields are type-inferred and included in
-        the output
+    unexpected_field_behavior: Literal["ignore", "error", "infer"]
 
-    Set to "infer" by default.
-    """
     def __init__(
         self,
         explicit_schema: Schema | None = None,
         newlines_in_values: bool | None = None,
         unexpected_field_behavior: Literal["ignore", "error", "infer"] = "infer",
     ): ...
-    def equals(self, other: ParseOptions) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.json.ParseOptions
+    def equals(self, other: ParseOptions) -> bool: ...
 
-        Returns
-        -------
-        bool
-        """
 
-class JSONStreamingReader(RecordBatchReader):
-    """An object that reads record batches incrementally from a JSON file.
+class JSONStreamingReader(RecordBatchReader): ...
 
-    Should not be instantiated directly by user code.
-    """
 
 def read_json(
     input_file: StrPath | IO[Any],
     read_options: ReadOptions | None = None,
     parse_options: ParseOptions | None = None,
     memory_pool: MemoryPool | None = None,
-) -> Table:
-    """
-    Read a Table from a stream of JSON data.
-
-    Parameters
-    ----------
-    input_file : str, path or file-like object
-        The location of JSON data. Currently only the line-delimited JSON
-        format is supported.
-    read_options : pyarrow.json.ReadOptions, optional
-        Options for the JSON reader (see ReadOptions constructor for defaults).
-    parse_options : pyarrow.json.ParseOptions, optional
-        Options for the JSON parser
-        (see ParseOptions constructor for defaults).
-    memory_pool : MemoryPool, optional
-        Pool to allocate Table memory from.
-
-    Returns
-    -------
-    :class:`pyarrow.Table`
-        Contents of the JSON file as a in-memory table.
-    """
+) -> Table: ...
+
 
 def open_json(
     input_file: StrPath | IO[Any],
     read_options: ReadOptions | None = None,
     parse_options: ParseOptions | None = None,
     memory_pool: MemoryPool | None = None,
-) -> JSONStreamingReader:
-    """
-    Open a streaming reader of JSON data.
-
-    Reading using this function is always single-threaded.
-
-    Parameters
-    ----------
-    input_file : string, path or file-like object
-        The location of JSON data.  If a string or path, and if it ends
-        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
-        the data is automatically decompressed when reading.
-    read_options : pyarrow.json.ReadOptions, optional
-        Options for the JSON reader (see pyarrow.json.ReadOptions constructor
-        for defaults)
-    parse_options : pyarrow.json.ParseOptions, optional
-        Options for the JSON parser
-        (see pyarrow.json.ParseOptions constructor for defaults)
-    memory_pool : MemoryPool, optional
-        Pool to allocate RecordBatch memory from
-
-    Returns
-    -------
-    :class:`pyarrow.json.JSONStreamingReader`
-    """
+) -> JSONStreamingReader: ...
+
diff --git a/python/pyarrow-stubs/_parquet.pyi b/python/pyarrow-stubs/_parquet.pyi
index c75337cbf3b..ce499fd1c16 100644
--- a/python/pyarrow-stubs/_parquet.pyi
+++ b/python/pyarrow-stubs/_parquet.pyi
@@ -110,6 +110,7 @@ _Compression: TypeAlias = Literal[
     "UNKNOWN",
 ]
 
+
 class _Statistics(TypedDict):
     has_min_max: bool
     min: Any | None
@@ -119,6 +120,7 @@ class _Statistics(TypedDict):
     num_values: int
     physical_type: _PhysicalType
 
+
 class Statistics(_Weakrefable):
     def to_dict(self) -> _Statistics: ...
     def equals(self, other: Statistics) -> bool: ...
@@ -149,11 +151,13 @@ class Statistics(_Weakrefable):
     @property
     def converted_type(self) -> _ConvertedType | None: ...
 
+
 class ParquetLogicalType(_Weakrefable):
     def to_json(self) -> str: ...
     @property
     def type(self) -> _LogicTypeName: ...
 
+
 class _ColumnChunkMetaData(TypedDict):
     file_offset: int
     file_path: str | None
@@ -170,6 +174,7 @@ class _ColumnChunkMetaData(TypedDict):
     total_compressed_size: int
     total_uncompressed_size: int
 
+
 class ColumnChunkMetaData(_Weakrefable):
     def to_dict(self) -> _ColumnChunkMetaData: ...
     def equals(self, other: ColumnChunkMetaData) -> bool: ...
@@ -212,15 +217,18 @@ class ColumnChunkMetaData(_Weakrefable):
     @property
     def metadata(self) -> dict[bytes, bytes] | None: ...
 
+
 class _SortingColumn(TypedDict):
     column_index: int
     descending: bool
     nulls_first: bool
 
+
 class SortingColumn:
     def __init__(
         self, column_index: int, descending: bool = False, nulls_first: bool = False
     ) -> None: ...
+
     @classmethod
     def from_ordering(
         cls,
@@ -228,6 +236,7 @@ class SortingColumn:
         sort_keys: Sequence[tuple[str, Order]],
         null_placement: Literal["at_start", "at_end"] = "at_end",
     ) -> tuple[SortingColumn, ...]: ...
+
     @staticmethod
     def to_ordering(
         schema: Schema, sorting_columns: tuple[SortingColumn, ...]
@@ -241,6 +250,7 @@ class SortingColumn:
     def nulls_first(self) -> bool: ...
     def to_dict(self) -> _SortingColumn: ...
 
+
 class _RowGroupMetaData(TypedDict):
     num_columns: int
     num_rows: int
@@ -248,6 +258,7 @@ class _RowGroupMetaData(TypedDict):
     columns: list[ColumnChunkMetaData]
     sorting_columns: list[SortingColumn]
 
+
 class RowGroupMetaData(_Weakrefable):
     def __init__(self, parent: FileMetaData, index: int) -> None: ...
     def equals(self, other: RowGroupMetaData) -> bool: ...
@@ -262,6 +273,7 @@ class RowGroupMetaData(_Weakrefable):
     @property
     def sorting_columns(self) -> list[SortingColumn]: ...
 
+
 class _FileMetaData(TypedDict):
     created_by: str
     num_columns: int
@@ -270,6 +282,7 @@ class _FileMetaData(TypedDict):
     format_version: str
     serialized_size: int
 
+
 class FileMetaData(_Weakrefable):
     def __hash__(self) -> int: ...
     def to_dict(self) -> _FileMetaData: ...
@@ -293,7 +306,9 @@ class FileMetaData(_Weakrefable):
     def row_group(self, i: int) -> RowGroupMetaData: ...
     def set_file_path(self, path: str) -> None: ...
     def append_row_groups(self, other: FileMetaData) -> None: ...
-    def write_metadata_file(self, where: StrPath | Buffer | NativeFile | IO) -> None: ...
+    def write_metadata_file(self, where: StrPath | Buffer |
+                            NativeFile | IO) -> None: ...
+
 
 class ParquetSchema(_Weakrefable):
     def __init__(self, container: FileMetaData) -> None: ...
@@ -306,6 +321,7 @@ class ParquetSchema(_Weakrefable):
     def equals(self, other: ParquetSchema) -> bool: ...
     def column(self, i: int) -> ColumnSchema: ...
 
+
 class ColumnSchema(_Weakrefable):
     def __init__(self, schema: ParquetSchema, index: int) -> None: ...
     def equals(self, other: ColumnSchema) -> bool: ...
@@ -330,8 +346,10 @@ class ColumnSchema(_Weakrefable):
     @property
     def scale(self) -> int | None: ...
 
+
 class ParquetReader(_Weakrefable):
     def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
+
     def open(
         self,
         source: StrPath | NativeFile | IO,
@@ -357,6 +375,7 @@ class ParquetReader(_Weakrefable):
     def num_row_groups(self) -> int: ...
     def set_use_threads(self, use_threads: bool) -> None: ...
     def set_batch_size(self, batch_size: int) -> None: ...
+
     def iter_batches(
         self,
         batch_size: int,
@@ -364,25 +383,31 @@ class ParquetReader(_Weakrefable):
         column_indices: list[int] | None = None,
         use_threads: bool = True,
     ) -> Iterator[RecordBatch]: ...
+
     def read_row_group(
         self, i: int, column_indices: list[int] | None = None, use_threads: bool = True
     ) -> Table: ...
+
     def read_row_groups(
         self,
         row_groups: list[int],
         column_indices: list[int] | None = None,
         use_threads: bool = True,
     ) -> Table: ...
+
     def read_all(
         self, column_indices: list[int] | None = None, use_threads: bool = True
     ) -> Table: ...
-    def scan_contents(self, column_indices: list[int] | None = None, batch_size: int = 65536): ...
+    def scan_contents(
+        self, column_indices: list[int] | None = None, batch_size: int = 65536): ...
+
     def column_name_idx(self, column_name: str) -> int: ...
     def read_column(self, column_index: int) -> ChunkedArray: ...
     def close(self) -> None: ...
     @property
     def closed(self) -> bool: ...
 
+
 class ParquetWriter(_Weakrefable):
     def __init__(
         self,
@@ -458,5 +483,10 @@ class ParquetWriter(_Weakrefable):
     @property
     def store_decimal_as_integer(self) -> bool: ...
 
-class FileEncryptionProperties: ...
-class FileDecryptionProperties: ...
+
+class FileEncryptionProperties:
+    ...
+
+
+class FileDecryptionProperties:
+    ...
diff --git a/python/pyarrow-stubs/_parquet_encryption.pyi b/python/pyarrow-stubs/_parquet_encryption.pyi
index e1228cbdb5a..cf09b6ee39c 100644
--- a/python/pyarrow-stubs/_parquet_encryption.pyi
+++ b/python/pyarrow-stubs/_parquet_encryption.pyi
@@ -22,6 +22,7 @@ from typing import Callable
 from ._parquet import FileDecryptionProperties, FileEncryptionProperties
 from .lib import _Weakrefable
 
+
 class EncryptionConfiguration(_Weakrefable):
     footer_key: str
     column_keys: dict[str, list[str]]
@@ -45,15 +46,18 @@ class EncryptionConfiguration(_Weakrefable):
         data_key_length_bits: int | None = None,
     ) -> None: ...
 
+
 class DecryptionConfiguration(_Weakrefable):
     cache_lifetime: dt.timedelta
     def __init__(self, *, cache_lifetime: dt.timedelta | None = None): ...
 
+
 class KmsConnectionConfig(_Weakrefable):
     kms_instance_id: str
     kms_instance_url: str
     key_access_token: str
     custom_kms_conf: dict[str, str]
+
     def __init__(
         self,
         *,
@@ -64,17 +68,22 @@ class KmsConnectionConfig(_Weakrefable):
     ) -> None: ...
     def refresh_key_access_token(self, value: str) -> None: ...
 
+
 class KmsClient(_Weakrefable):
     def wrap_key(self, key_bytes: bytes, master_key_identifier: str) -> str: ...
     def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> str: ...
 
+
 class CryptoFactory(_Weakrefable):
-    def __init__(self, kms_client_factory: Callable[[KmsConnectionConfig], KmsClient]): ...
+    def __init__(self, kms_client_factory: Callable[[
+                 KmsConnectionConfig], KmsClient]): ...
+
     def file_encryption_properties(
         self,
         kms_connection_config: KmsConnectionConfig,
         encryption_config: EncryptionConfiguration,
     ) -> FileEncryptionProperties: ...
+
     def file_decryption_properties(
         self,
         kms_connection_config: KmsConnectionConfig,
diff --git a/python/pyarrow-stubs/_s3fs.pyi b/python/pyarrow-stubs/_s3fs.pyi
index f1399bc4b1e..f065d78f993 100644
--- a/python/pyarrow-stubs/_s3fs.pyi
+++ b/python/pyarrow-stubs/_s3fs.pyi
@@ -23,6 +23,7 @@ from typing_extensions import Required, NotRequired
 from ._fs import FileSystem
 from .lib import KeyValueMetadata
 
+
 class _ProxyOptions(TypedDict):
     schema: Required[Literal["http", "https"]]
     host: Required[str]
@@ -30,6 +31,7 @@ class _ProxyOptions(TypedDict):
     username: NotRequired[str]
     password: NotRequired[str]
 
+
 class S3LogLevel(enum.IntEnum):
     Off = enum.auto()
     Fatal = enum.auto()
@@ -39,6 +41,7 @@ class S3LogLevel(enum.IntEnum):
     Debug = enum.auto()
     Trace = enum.auto()
 
+
 Off = S3LogLevel.Off
 Fatal = S3LogLevel.Fatal
 Error = S3LogLevel.Error
@@ -47,6 +50,7 @@ Info = S3LogLevel.Info
 Debug = S3LogLevel.Debug
 Trace = S3LogLevel.Trace
 
+
 def initialize_s3(
     log_level: S3LogLevel = S3LogLevel.Fatal, num_event_loop_threads: int = 1
 ) -> None: ...
@@ -55,12 +59,19 @@ def finalize_s3() -> None: ...
 def ensure_s3_finalized() -> None: ...
 def resolve_s3_region(bucket: str) -> str: ...
 
+
 class S3RetryStrategy:
     max_attempts: int
     def __init__(self, max_attempts=3) -> None: ...
 
-class AwsStandardS3RetryStrategy(S3RetryStrategy): ...
-class AwsDefaultS3RetryStrategy(S3RetryStrategy): ...
+
+class AwsStandardS3RetryStrategy(S3RetryStrategy):
+    ...
+
+
+class AwsDefaultS3RetryStrategy(S3RetryStrategy):
+    ...
+
 
 class S3FileSystem(FileSystem):
     def __init__(
diff --git a/python/pyarrow-stubs/_stubs_typing.pyi b/python/pyarrow-stubs/_stubs_typing.pyi
index 98479791103..56aa7fd1123 100644
--- a/python/pyarrow-stubs/_stubs_typing.pyi
+++ b/python/pyarrow-stubs/_stubs_typing.pyi
@@ -56,21 +56,27 @@ _V = TypeVar("_V", covariant=True)
 
 SingleOrList: TypeAlias = list[_T] | _T
 
+
 class SupportEq(Protocol):
     def __eq__(self, other) -> bool: ...
 
+
 class SupportLt(Protocol):
     def __lt__(self, other) -> bool: ...
 
+
 class SupportGt(Protocol):
     def __gt__(self, other) -> bool: ...
 
+
 class SupportLe(Protocol):
     def __le__(self, other) -> bool: ...
 
+
 class SupportGe(Protocol):
     def __ge__(self, other) -> bool: ...
 
+
 FilterTuple: TypeAlias = (
     tuple[str, Literal["=", "==", "!="], SupportEq]
     | tuple[str, Literal["<"], SupportLt]
@@ -80,22 +86,31 @@ FilterTuple: TypeAlias = (
     | tuple[str, Literal["in", "not in"], Collection]
 )
 
-class Buffer(Protocol): ...
 
-class SupportPyBuffer(Protocol): ...
+class Buffer(Protocol):
+    ...
+
+
+class SupportPyBuffer(Protocol):
+    ...
+
 
 class SupportArrowStream(Protocol):
     def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
 
+
 class SupportArrowArray(Protocol):
     def __arrow_c_array__(self, requested_schema=None) -> Any: ...
 
+
 class SupportArrowDeviceArray(Protocol):
     def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
 
+
 class SupportArrowSchema(Protocol):
     def __arrow_c_schema(self) -> Any: ...
 
+
 class NullableCollection(Protocol[_V]):  # pyright: ignore[reportInvalidTypeVarUse]
     def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ...
     def __len__(self) -> int: ...
diff --git a/python/pyarrow-stubs/_substrait.pyi b/python/pyarrow-stubs/_substrait.pyi
index ee78e9720fe..12dd437412f 100644
--- a/python/pyarrow-stubs/_substrait.pyi
+++ b/python/pyarrow-stubs/_substrait.pyi
@@ -20,6 +20,7 @@ from typing import Any, Callable
 from ._compute import Expression
 from .lib import Buffer, RecordBatchReader, Schema, Table, _Weakrefable
 
+
 def run_query(
     plan: Buffer | int,
     *,
@@ -28,14 +29,18 @@ def run_query(
 ) -> RecordBatchReader: ...
 def _parse_json_plan(plan: bytes) -> Buffer: ...
 
+
 class SubstraitSchema:
     schema: Schema
     expression: Expression
     def __init__(self, schema: Schema, expression: Expression) -> None: ...
     def to_pysubstrait(self) -> Any: ...
 
+
 def serialize_schema(schema: Schema) -> SubstraitSchema: ...
 def deserialize_schema(buf: Buffer | bytes) -> Schema: ...
+
+
 def serialize_expressions(
     exprs: list[Expression],
     names: list[str],
@@ -44,6 +49,7 @@ def serialize_expressions(
     allow_arrow_extensions: bool = False,
 ) -> Buffer: ...
 
+
 class BoundExpressions(_Weakrefable):
     @property
     def schema(self) -> Schema: ...
@@ -52,5 +58,6 @@ class BoundExpressions(_Weakrefable):
     @classmethod
     def from_substrait(cls, message: Buffer | bytes) -> BoundExpressions: ...
 
+
 def deserialize_expressions(buf: Buffer | bytes) -> BoundExpressions: ...
 def get_supported_functions() -> list[str]: ...
diff --git a/python/pyarrow-stubs/_types.pyi b/python/pyarrow-stubs/_types.pyi
index 6596fb3e1d1..0cb4bba6a6f 100644
--- a/python/pyarrow-stubs/_types.pyi
+++ b/python/pyarrow-stubs/_types.pyi
@@ -45,669 +45,278 @@ from typing_extensions import TypeVar, deprecated
 from .io import Buffer
 from .scalar import ExtensionScalar
 
-class _Weakrefable: ...
-class _Metadata(_Weakrefable): ...
+
+class _Weakrefable:
+    ...
+
+
+class _Metadata(_Weakrefable):
+    ...
+
 
 class DataType(_Weakrefable):
-    """
-    Base class of all Arrow data types.
-
-    Each data type is an *instance* of this class.
-
-    Examples
-    --------
-    Instance of int64 type:
-
-    >>> import pyarrow as pa
-    >>> pa.int64()
-    DataType(int64)
-    """
-    def field(self, i: int) -> Field:
-        """
-        Parameters
-        ----------
-        i : int
-
-        Returns
-        -------
-        pyarrow.Field
-        """
+
+    def field(self, i: int) -> Field: ...
+
     @property
     def id(self) -> int: ...
     @property
-    def bit_width(self) -> int:
-        """
-        Bit width for fixed width type.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.int64()
-        DataType(int64)
-        >>> pa.int64().bit_width
-        64
-        """
+    def bit_width(self) -> int: ...
+
     @property
-    def byte_width(self) -> int:
-        """
-        Byte width for fixed width type.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.int64()
-        DataType(int64)
-        >>> pa.int64().byte_width
-        8
-        """
+    def byte_width(self) -> int: ...
+
     @property
-    def num_fields(self) -> int:
-        """
-        The number of child fields.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.int64()
-        DataType(int64)
-        >>> pa.int64().num_fields
-        0
-        >>> pa.list_(pa.string())
-        ListType(list<item: string>)
-        >>> pa.list_(pa.string()).num_fields
-        1
-        >>> struct = pa.struct({'x': pa.int32(), 'y': pa.string()})
-        >>> struct.num_fields
-        2
-        """
+    def num_fields(self) -> int: ...
+
     @property
-    def num_buffers(self) -> int:
-        """
-        Number of data buffers required to construct Array type
-        excluding children.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.int64().num_buffers
-        2
-        >>> pa.string().num_buffers
-        3
-        """
-    def __hash__(self) -> int:
-        """
-        Return hash(self).
-        """
-    def equals(self, other: DataType | str, *, check_metadata: bool = False) -> bool:
-        """
-        Return true if type is equivalent to passed value.
-
-        Parameters
-        ----------
-        other : DataType or string convertible to DataType
-        check_metadata : bool
-            Whether nested Field metadata equality should be checked as well.
-
-        Returns
-        -------
-        is_equal : bool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.int64().equals(pa.string())
-        False
-        >>> pa.int64().equals(pa.int64())
-        True
-        """
-    def to_pandas_dtype(self) -> np.generic:
-        """
-        Return the equivalent NumPy / Pandas dtype.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.int64().to_pandas_dtype()
-        <class 'numpy.int64'>
-        """
-    def _export_to_c(self, out_ptr: int) -> None:
-        """
-        Export to a C ArrowSchema struct, given its pointer.
-
-        Be careful: if you don't pass the ArrowSchema struct to a consumer,
-        its memory will leak.  This is a low-level function intended for
-        expert users.
-        """
+    def num_buffers(self) -> int: ...
+
+    def __hash__(self) -> int: ...
+
+    def equals(self, other: DataType | str, *,
+               check_metadata: bool = False) -> bool: ...
+
+    def to_pandas_dtype(self) -> np.generic: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
     @classmethod
-    def _import_from_c(cls, in_ptr: int) -> Self:
-        """
-        Import DataType from a C ArrowSchema struct, given its pointer.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_schema__(self) -> Any:
-        """
-        Export to a ArrowSchema PyCapsule
-
-        Unlike _export_to_c, this will not leak memory if the capsule is not used.
-        """
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_schema__(self) -> Any: ...
+
     @classmethod
-    def _import_from_c_capsule(cls, schema) -> Self:
-        """
-        Import a DataType from a ArrowSchema PyCapsule
+    def _import_from_c_capsule(cls, schema) -> Self: ...
 
-        Parameters
-        ----------
-        schema : PyCapsule
-            A valid PyCapsule with name 'arrow_schema' containing an
-            ArrowSchema pointer.
-        """
 
 _AsPyType = TypeVar("_AsPyType")
 _DataTypeT = TypeVar("_DataTypeT", bound=DataType)
 
-class _BasicDataType(DataType, Generic[_AsPyType]): ...
-class NullType(_BasicDataType[None]): ...
-class BoolType(_BasicDataType[bool]): ...
-class UInt8Type(_BasicDataType[int]): ...
-class Int8Type(_BasicDataType[int]): ...
-class UInt16Type(_BasicDataType[int]): ...
-class Int16Type(_BasicDataType[int]): ...
-class Uint32Type(_BasicDataType[int]): ...
-class Int32Type(_BasicDataType[int]): ...
-class UInt64Type(_BasicDataType[int]): ...
-class Int64Type(_BasicDataType[int]): ...
-class Float16Type(_BasicDataType[float]): ...
-class Float32Type(_BasicDataType[float]): ...
-class Float64Type(_BasicDataType[float]): ...
-class Date32Type(_BasicDataType[dt.date]): ...
-class Date64Type(_BasicDataType[dt.date]): ...
-class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ...
-class StringType(_BasicDataType[str]): ...
-class LargeStringType(_BasicDataType[str]): ...
-class StringViewType(_BasicDataType[str]): ...
-class BinaryType(_BasicDataType[bytes]): ...
-class LargeBinaryType(_BasicDataType[bytes]): ...
-class BinaryViewType(_BasicDataType[bytes]): ...
 
-_Unit = TypeVar("_Unit", bound=Literal["s", "ms", "us", "ns"], default=Literal["us"])
-_Tz = TypeVar("_Tz", str, None, default=None)
+class _BasicDataType(DataType, Generic[_AsPyType]):
+    ...
+
+
+class NullType(_BasicDataType[None]):
+    ...
+
+
+class BoolType(_BasicDataType[bool]):
+    ...
+
+
+class UInt8Type(_BasicDataType[int]):
+    ...
+
+
+class Int8Type(_BasicDataType[int]):
+    ...
+
+
+class UInt16Type(_BasicDataType[int]):
+    ...
+
+
+class Int16Type(_BasicDataType[int]):
+    ...
+
+
+class Uint32Type(_BasicDataType[int]):
+    ...
 
-class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
-    """
-    Concrete class for timestamp data types.
 
-    Examples
-    --------
-    >>> import pyarrow as pa
+class Int32Type(_BasicDataType[int]):
+    ...
+
+
+class UInt64Type(_BasicDataType[int]):
+    ...
+
+
+class Int64Type(_BasicDataType[int]):
+    ...
+
+
+class Float16Type(_BasicDataType[float]):
+    ...
+
+
+class Float32Type(_BasicDataType[float]):
+    ...
+
+
+class Float64Type(_BasicDataType[float]):
+    ...
 
-    Create an instance of timestamp type:
 
-    >>> pa.timestamp('us')
-    TimestampType(timestamp[us])
+class Date32Type(_BasicDataType[dt.date]):
+    ...
 
-    Create an instance of timestamp type with timezone:
 
-    >>> pa.timestamp('s', tz='UTC')
-    TimestampType(timestamp[s, tz=UTC])
-    """
+class Date64Type(_BasicDataType[dt.date]):
+    ...
+
+
+class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]):
+    ...
+
+
+class StringType(_BasicDataType[str]):
+    ...
+
+
+class LargeStringType(_BasicDataType[str]):
+    ...
+
+
+class StringViewType(_BasicDataType[str]):
+    ...
+
+
+class BinaryType(_BasicDataType[bytes]):
+    ...
+
+
+class LargeBinaryType(_BasicDataType[bytes]):
+    ...
+
+
+class BinaryViewType(_BasicDataType[bytes]):
+    ...
+
+
+_Unit = TypeVar("_Unit", bound=Literal["s", "ms", "us", "ns"], default=Literal["us"])
+_Tz = TypeVar("_Tz", str, None, default=None)
+
+
+class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]):
+
     @property
-    def unit(self) -> _Unit:
-        """
-        The timestamp unit ('s', 'ms', 'us' or 'ns').
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.timestamp('us')
-        >>> t.unit
-        'us'
-        """
+    def unit(self) -> _Unit: ...
+
     @property
-    def tz(self) -> _Tz:
-        """
-        The timestamp time zone, if any, or None.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.timestamp('s', tz='UTC')
-        >>> t.tz
-        'UTC'
-        """
+    def tz(self) -> _Tz: ...
 
-_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"])
 
-class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]):
-    """
-    Concrete class for time32 data types.
+_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"])
 
-    Supported time unit resolutions are 's' [second]
-    and 'ms' [millisecond].
 
-    Examples
-    --------
-    Create an instance of time32 type:
+class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]):
 
-    >>> import pyarrow as pa
-    >>> pa.time32('ms')
-    Time32Type(time32[ms])
-    """
     @property
-    def unit(self) -> _Time32Unit:
-        """
-        The time unit ('s' or 'ms').
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.time32('ms')
-        >>> t.unit
-        'ms'
-        """
+    def unit(self) -> _Time32Unit: ...
 
-_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"])
 
-class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]):
-    """
-    Concrete class for time64 data types.
+_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"])
 
-    Supported time unit resolutions are 'us' [microsecond]
-    and 'ns' [nanosecond].
 
-    Examples
-    --------
-    Create an instance of time64 type:
+class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]):
 
-    >>> import pyarrow as pa
-    >>> pa.time64('us')
-    Time64Type(time64[us])
-    """
     @property
-    def unit(self) -> _Time64Unit:
-        """
-        The time unit ('us' or 'ns').
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.time64('us')
-        >>> t.unit
-        'us'
-        """
+    def unit(self) -> _Time64Unit: ...
 
-class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]):
-    """
-    Concrete class for duration data types.
 
-    Examples
-    --------
-    Create an instance of duration type:
+class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]):
 
-    >>> import pyarrow as pa
-    >>> pa.duration('s')
-    DurationType(duration[s])
-    """
     @property
-    def unit(self) -> _Unit:
-        """
-        The duration unit ('s', 'ms', 'us' or 'ns').
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.duration('s')
-        >>> t.unit
-        's'
-        """
+    def unit(self) -> _Unit: ...
 
-class FixedSizeBinaryType(_BasicDataType[Decimal]):
-    """
-    Concrete class for fixed-size binary data types.
 
-    Examples
-    --------
-    Create an instance of fixed-size binary type:
+class FixedSizeBinaryType(_BasicDataType[Decimal]):
+    ...
 
-    >>> import pyarrow as pa
-    >>> pa.binary(3)
-    FixedSizeBinaryType(fixed_size_binary[3])
-    """
 
 _Precision = TypeVar("_Precision", default=Any)
 _Scale = TypeVar("_Scale", default=Any)
 
-class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
-    """
-    Concrete class for decimal32 data types.
 
-    Examples
-    --------
-    Create an instance of decimal32 type:
+class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
 
-    >>> import pyarrow as pa
-    >>> pa.decimal32(5, 2)
-    Decimal32Type(decimal32(5, 2))
-    """
     @property
-    def precision(self) -> _Precision:
-        """
-        The decimal precision, in number of decimal digits (an integer).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.decimal32(5, 2)
-        >>> t.precision
-        5
-        """
+    def precision(self) -> _Precision: ...
+
     @property
-    def scale(self) -> _Scale:
-        """
-        The decimal scale (an integer).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.decimal32(5, 2)
-        >>> t.scale
-        2
-        """
+    def scale(self) -> _Scale: ...
 
-class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
-    """
-    Concrete class for decimal64 data types.
 
-    Examples
-    --------
-    Create an instance of decimal64 type:
+class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
 
-    >>> import pyarrow as pa
-    >>> pa.decimal64(5, 2)
-    Decimal64Type(decimal64(5, 2))
-    """
     @property
-    def precision(self) -> _Precision:
-        """
-        The decimal precision, in number of decimal digits (an integer).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.decimal64(5, 2)
-        >>> t.precision
-        5
-        """
+    def precision(self) -> _Precision: ...
+
     @property
-    def scale(self) -> _Scale:
-        """
-        The decimal scale (an integer).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.decimal64(5, 2)
-        >>> t.scale
-        2
-        """
+    def scale(self) -> _Scale: ...
 
-class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
-    """
-    Concrete class for decimal128 data types.
 
-    Examples
-    --------
-    Create an instance of decimal128 type:
+class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
 
-    >>> import pyarrow as pa
-    >>> pa.decimal128(5, 2)
-    Decimal128Type(decimal128(5, 2))
-    """
     @property
-    def precision(self) -> _Precision:
-        """
-        The decimal precision, in number of decimal digits (an integer).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.decimal128(5, 2)
-        >>> t.precision
-        5
-        """
+    def precision(self) -> _Precision: ...
+
     @property
-    def scale(self) -> _Scale:
-        """
-        The decimal scale (an integer).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.decimal128(5, 2)
-        >>> t.scale
-        2
-        """
+    def scale(self) -> _Scale: ...
 
-class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
-    """
-    Concrete class for decimal256 data types.
 
-    Examples
-    --------
-    Create an instance of decimal256 type:
+class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]):
 
-    >>> import pyarrow as pa
-    >>> pa.decimal256(76, 38)
-    Decimal256Type(decimal256(76, 38))
-    """
     @property
-    def precision(self) -> _Precision:
-        """
-        The decimal precision, in number of decimal digits (an integer).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.decimal256(76, 38)
-        >>> t.precision
-        76
-        """
+    def precision(self) -> _Precision: ...
+
     @property
-    def scale(self) -> _Scale:
-        """
-        The decimal scale (an integer).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.decimal256(76, 38)
-        >>> t.scale
-        38
-        """
+    def scale(self) -> _Scale: ...
 
-class ListType(DataType, Generic[_DataTypeT]):
-    """
-    Concrete class for list data types.
 
-    Examples
-    --------
-    Create an instance of ListType:
+class ListType(DataType, Generic[_DataTypeT]):
 
-    >>> import pyarrow as pa
-    >>> pa.list_(pa.string())
-    ListType(list<item: string>)
-    """
     @property
-    def value_field(self) -> Field[_DataTypeT]:
-        """
-        The field for list values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.list_(pa.string()).value_field
-        pyarrow.Field<item: string>
-        """
+    def value_field(self) -> Field[_DataTypeT]: ...
+
     @property
-    def value_type(self) -> _DataTypeT:
-        """
-        The data type of list values.
+    def value_type(self) -> _DataTypeT: ...
 
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.list_(pa.string()).value_type
-        DataType(string)
-        """
 
 class LargeListType(DataType, Generic[_DataTypeT]):
-    """
-    Concrete class for large list data types
-    (like ListType, but with 64-bit offsets).
-
-    Examples
-    --------
-    Create an instance of LargeListType:
-
-    >>> import pyarrow as pa
-    >>> pa.large_list(pa.string())
-    LargeListType(large_list<item: string>)
-    """
+
     @property
     def value_field(self) -> Field[_DataTypeT]: ...
     @property
-    def value_type(self) -> _DataTypeT:
-        """
-        The data type of large list values.
+    def value_type(self) -> _DataTypeT: ...
 
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.large_list(pa.string()).value_type
-        DataType(string)
-        """
 
 class ListViewType(DataType, Generic[_DataTypeT]):
-    """
-    Concrete class for list view data types.
-
-    Examples
-    --------
-    Create an instance of ListViewType:
 
-    >>> import pyarrow as pa
-    >>> pa.list_view(pa.string())
-    ListViewType(list_view<item: string>)
-    """
     @property
-    def value_field(self) -> Field[_DataTypeT]:
-        """
-        The field for list view values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.list_view(pa.string()).value_field
-        pyarrow.Field<item: string>
-        """
+    def value_field(self) -> Field[_DataTypeT]: ...
+
     @property
-    def value_type(self) -> _DataTypeT:
-        """
-        The data type of list view values.
+    def value_type(self) -> _DataTypeT: ...
 
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.list_view(pa.string()).value_type
-        DataType(string)
-        """
 
 class LargeListViewType(DataType, Generic[_DataTypeT]):
-    """
-    Concrete class for large list view data types
-    (like ListViewType, but with 64-bit offsets).
-
-    Examples
-    --------
-    Create an instance of LargeListViewType:
-
-    >>> import pyarrow as pa
-    >>> pa.large_list_view(pa.string())
-    LargeListViewType(large_list_view<item: string>)
-    """
+
     @property
-    def value_field(self) -> Field[_DataTypeT]:
-        """
-        The field for large list view values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.large_list_view(pa.string()).value_field
-        pyarrow.Field<item: string>
-        """
+    def value_field(self) -> Field[_DataTypeT]: ...
+
     @property
-    def value_type(self) -> _DataTypeT:
-        """
-        The data type of large list view values.
+    def value_type(self) -> _DataTypeT: ...
 
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.large_list_view(pa.string()).value_type
-        DataType(string)
-        """
 
 class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]):
-    """
-    Concrete class for fixed size list data types.
 
-    Examples
-    --------
-    Create an instance of FixedSizeListType:
-
-    >>> import pyarrow as pa
-    >>> pa.list_(pa.int32(), 2)
-    FixedSizeListType(fixed_size_list<item: int32>[2])
-    """
     @property
-    def value_field(self) -> Field[_DataTypeT]:
-        """
-        The field for list values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.list_(pa.int32(), 2).value_field
-        pyarrow.Field<item: int32>
-        """
+    def value_field(self) -> Field[_DataTypeT]: ...
+
     @property
-    def value_type(self) -> _DataTypeT:
-        """
-        The data type of large list values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.list_(pa.int32(), 2).value_type
-        DataType(int32)
-        """
+    def value_type(self) -> _DataTypeT: ...
+
     @property
-    def list_size(self) -> _Size:
-        """
-        The size of the fixed size lists.
+    def list_size(self) -> _Size: ...
 
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.list_(pa.int32(), 2).list_size
-        2
-        """
 
 class DictionaryMemo(_Weakrefable):
-    """
-    Tracking container for dictionary-encoded fields.
-    """
+    ...
+
 
 _IndexT = TypeVar(
     "_IndexT",
@@ -724,716 +333,167 @@ _BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType)
 _ValueT = TypeVar("_ValueT", bound=DataType)
 _Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False])
 
-class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]):
-    """
-    Concrete class for dictionary data types.
-
-    Examples
-    --------
-    Create an instance of dictionary type:
 
-    >>> import pyarrow as pa
-    >>> pa.dictionary(pa.int64(), pa.utf8())
-    DictionaryType(dictionary<values=string, indices=int64, ordered=0>)
-    """
+class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]):
 
     @property
-    def ordered(self) -> _Ordered:
-        """
-        Whether the dictionary is ordered, i.e. whether the ordering of values
-        in the dictionary is important.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.dictionary(pa.int64(), pa.utf8()).ordered
-        False
-        """
-    @property
-    def index_type(self) -> _IndexT:
-        """
-        The data type of dictionary indices (a signed integer type).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.dictionary(pa.int16(), pa.utf8()).index_type
-        DataType(int16)
-        """
+    def ordered(self) -> _Ordered: ...
+
     @property
-    def value_type(self) -> _BasicValueT:
-        """
-        The dictionary value type.
+    def index_type(self) -> _IndexT: ...
 
-        The dictionary values are found in an instance of DictionaryArray.
+    @property
+    def value_type(self) -> _BasicValueT: ...
 
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.dictionary(pa.int16(), pa.utf8()).value_type
-        DataType(string)
-        """
 
 _K = TypeVar("_K", bound=DataType)
 
-class MapType(DataType, Generic[_K, _ValueT, _Ordered]):
-    """
-    Concrete class for map data types.
-
-    Examples
-    --------
-    Create an instance of MapType:
 
-    >>> import pyarrow as pa
-    >>> pa.map_(pa.string(), pa.int32())
-    MapType(map<string, int32>)
-    >>> pa.map_(pa.string(), pa.int32(), keys_sorted=True)
-    MapType(map<string, int32, keys_sorted>)
-    """
+class MapType(DataType, Generic[_K, _ValueT, _Ordered]):
 
     @property
-    def key_field(self) -> Field[_K]:
-        """
-        The field for keys in the map entries.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.map_(pa.string(), pa.int32()).key_field
-        pyarrow.Field<key: string not null>
-        """
+    def key_field(self) -> Field[_K]: ...
+
     @property
-    def key_type(self) -> _K:
-        """
-        The data type of keys in the map entries.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.map_(pa.string(), pa.int32()).key_type
-        DataType(string)
-        """
+    def key_type(self) -> _K: ...
+
     @property
-    def item_field(self) -> Field[_ValueT]:
-        """
-        The field for items in the map entries.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.map_(pa.string(), pa.int32()).item_field
-        pyarrow.Field<value: int32>
-        """
+    def item_field(self) -> Field[_ValueT]: ...
+
     @property
-    def item_type(self) -> _ValueT:
-        """
-        The data type of items in the map entries.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.map_(pa.string(), pa.int32()).item_type
-        DataType(int32)
-        """
+    def item_type(self) -> _ValueT: ...
+
     @property
-    def keys_sorted(self) -> _Ordered:
-        """
-        Should the entries be sorted according to keys.
+    def keys_sorted(self) -> _Ordered: ...
 
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> pa.map_(pa.string(), pa.int32(), keys_sorted=True).keys_sorted
-        True
-        """
 
 _Size = TypeVar("_Size", default=int)
 
+
 class StructType(DataType):
-    """
-    Concrete class for struct data types.
 
-    ``StructType`` supports direct indexing using ``[...]`` (implemented via
-    ``__getitem__``) to access its fields.
-    It will return the struct field with the given index or name.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-
-    Accessing fields using direct indexing:
-
-    >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()})
-    >>> struct_type[0]
-    pyarrow.Field<x: int32>
-    >>> struct_type['y']
-    pyarrow.Field<y: string>
-
-    Accessing fields using ``field()``:
-
-    >>> struct_type.field(1)
-    pyarrow.Field<y: string>
-    >>> struct_type.field('x')
-    pyarrow.Field<x: int32>
-
-    # Creating a schema from the struct type's fields:
-    >>> pa.schema(list(struct_type))
-    x: int32
-    y: string
-    """
-    def get_field_index(self, name: str) -> int:
-        """
-        Return index of the unique field with the given name.
-
-        Parameters
-        ----------
-        name : str
-            The name of the field to look up.
-
-        Returns
-        -------
-        index : int
-            The index of the field with the given name; -1 if the
-            name isn't found or there are several fields with the given
-            name.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()})
-
-        Index of the field with a name 'y':
-
-        >>> struct_type.get_field_index('y')
-        1
-
-        Index of the field that does not exist:
-
-        >>> struct_type.get_field_index('z')
-        -1
-        """
-    def field(self, i: int | str) -> Field:
-        """
-        Select a field by its column name or numeric index.
-
-        Parameters
-        ----------
-        i : int or str
-
-        Returns
-        -------
-        pyarrow.Field
-
-        Examples
-        --------
-
-        >>> import pyarrow as pa
-        >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()})
-
-        Select the second field:
-
-        >>> struct_type.field(1)
-        pyarrow.Field<y: string>
-
-        Select the field named 'x':
-
-        >>> struct_type.field('x')
-        pyarrow.Field<x: int32>
-        """
-    def get_all_field_indices(self, name: str) -> list[int]:
-        """
-        Return sorted list of indices for the fields with the given name.
-
-        Parameters
-        ----------
-        name : str
-            The name of the field to look up.
-
-        Returns
-        -------
-        indices : List[int]
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()})
-        >>> struct_type.get_all_field_indices('x')
-        [0]
-        """
-    def __len__(self) -> int:
-        """
-        Like num_fields().
-        """
-    def __iter__(self) -> Iterator[Field]:
-        """
-        Iterate over struct fields, in order.
-        """
-    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
-    @property
-    def names(self) -> list[str]:
-        """
-        Lists the field names.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct_type = pa.struct([('a', pa.int64()), ('b', pa.float64()), ('c', pa.string())])
-        >>> struct_type.names
-        ['a', 'b', 'c']
-        """
-    @property
-    def fields(self) -> list[Field]:
-        """
-        Lists all fields within the StructType.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct_type = pa.struct([('a', pa.int64()), ('b', pa.float64()), ('c', pa.string())])
-        >>> struct_type.fields
-        [pyarrow.Field<a: int64>, pyarrow.Field<b: double>, pyarrow.Field<c: string>]
-        """
+    def get_field_index(self, name: str) -> int: ...
 
-class UnionType(DataType):
-    """
-    Base class for union data types.
+    def field(self, i: int | str) -> Field: ...
 
-    Examples
-    --------
-    Create an instance of a dense UnionType using ``pa.union``:
+    def get_all_field_indices(self, name: str) -> list[int]: ...
 
-    >>> import pyarrow as pa
-    >>> pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())],
-    ...          mode=pa.lib.UnionMode_DENSE),
-    (DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>),)
+    def __len__(self) -> int: ...
 
-    Create an instance of a dense UnionType using ``pa.dense_union``:
+    def __iter__(self) -> Iterator[Field]: ...
 
-    >>> pa.dense_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
-    DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>)
+    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
+    @property
+    def names(self) -> list[str]: ...
 
-    Create an instance of a sparse UnionType using ``pa.union``:
+    @property
+    def fields(self) -> list[Field]: ...
 
-    >>> pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())],
-    ...          mode=pa.lib.UnionMode_SPARSE),
-    (SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>),)
 
-    Create an instance of a sparse UnionType using ``pa.sparse_union``:
+class UnionType(DataType):
 
-    >>> pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
-    SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>)
-    """
     @property
-    def mode(self) -> Literal["sparse", "dense"]:
-        """
-        The mode of the union ("dense" or "sparse").
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
-        >>> union.mode
-        'sparse'
-        """
-    @property
-    def type_codes(self) -> list[int]:
-        """
-        The type code to indicate each data type in this union.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
-        >>> union.type_codes
-        [0, 1]
-        """
-    def __len__(self) -> int:
-        """
-        Like num_fields().
-        """
-    def __iter__(self) -> Iterator[Field]:
-        """
-        Iterate over union members, in order.
-        """
-    def field(self, i: int) -> Field:
-        """
-        Return a child field by its numeric index.
-
-        Parameters
-        ----------
-        i : int
-
-        Returns
-        -------
-        pyarrow.Field
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
-        >>> union[0]
-        pyarrow.Field<a: fixed_size_binary[10]>
-        """
-    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
+    def mode(self) -> Literal["sparse", "dense"]: ...
 
-class SparseUnionType(UnionType):
-    """
-    Concrete class for sparse union types.
+    @property
+    def type_codes(self) -> list[int]: ...
 
-    Examples
-    --------
-    Create an instance of a sparse UnionType using ``pa.union``:
+    def __len__(self) -> int: ...
 
-    >>> import pyarrow as pa
-    >>> pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())],
-    ...          mode=pa.lib.UnionMode_SPARSE),
-    (SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>),)
+    def __iter__(self) -> Iterator[Field]: ...
 
-    Create an instance of a sparse UnionType using ``pa.sparse_union``:
+    def field(self, i: int) -> Field: ...
 
-    >>> pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
-    SparseUnionType(sparse_union<a: fixed_size_binary[10]=0, b: string=1>)
-    """
-    @property
-    def mode(self) -> Literal["sparse"]:
-        """
-        The mode of the union ("dense" or "sparse").
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
-        >>> union.mode
-        'sparse'
-        """
+    __getitem__ = field  # pyright: ignore[reportUnknownVariableType]
 
-class DenseUnionType(UnionType):
-    """
-    Concrete class for dense union types.
 
-    Examples
-    --------
-    Create an instance of a dense UnionType using ``pa.union``:
+class SparseUnionType(UnionType):
 
-    >>> import pyarrow as pa
-    >>> pa.union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())],
-    ...          mode=pa.lib.UnionMode_DENSE),
-    (DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>),)
+    @property
+    def mode(self) -> Literal["sparse"]: ...
 
-    Create an instance of a dense UnionType using ``pa.dense_union``:
 
-    >>> pa.dense_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
-    DenseUnionType(dense_union<a: fixed_size_binary[10]=0, b: string=1>)
-    """
+class DenseUnionType(UnionType):
 
     @property
-    def mode(self) -> Literal["dense"]:
-        """
-        The mode of the union ("dense" or "sparse").
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> union = pa.sparse_union([pa.field('a', pa.binary(10)), pa.field('b', pa.string())])
-        >>> union.mode
-        'sparse'
-        """
+    def mode(self) -> Literal["dense"]: ...
+
 
 _RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type)
 
+
 class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]):
-    """
-    Concrete class for run-end encoded types.
-    """
+
     @property
     def run_end_type(self) -> _RunEndType: ...
     @property
     def value_type(self) -> _BasicValueT: ...
 
+
 _StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray)
 
+
 class BaseExtensionType(DataType):
-    """
-    Concrete base class for extension types.
-    """
-    def __arrow_ext_class__(self) -> type[ExtensionArray]:
-        """
-        The associated array extension class
-        """
-    def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]:
-        """
-        The associated scalar class
-        """
+
+    def __arrow_ext_class__(self) -> type[ExtensionArray]: ...
+
+    def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ...
+
     @property
-    def extension_name(self) -> str:
-        """
-        The extension type name.
-        """
+    def extension_name(self) -> str: ...
+
     @property
-    def storage_type(self) -> DataType:
-        """
-        The underlying storage type.
-        """
-    def wrap_array(self, storage: _StorageT) -> _StorageT:
-        """
-        Wrap the given storage array as an extension array.
-
-        Parameters
-        ----------
-        storage : Array or ChunkedArray
-
-        Returns
-        -------
-        array : Array or ChunkedArray
-            Extension array wrapping the storage array
-        """
+    def storage_type(self) -> DataType: ...
+
+    def wrap_array(self, storage: _StorageT) -> _StorageT: ...
+
 
 class ExtensionType(BaseExtensionType):
-    """
-    Concrete base class for Python-defined extension types.
-
-    Parameters
-    ----------
-    storage_type : DataType
-        The underlying storage type for the extension type.
-    extension_name : str
-        A unique name distinguishing this extension type. The name will be
-        used when deserializing IPC data.
-
-    Examples
-    --------
-    Define a RationalType extension type subclassing ExtensionType:
-
-    >>> import pyarrow as pa
-    >>> class RationalType(pa.ExtensionType):
-    ...     def __init__(self, data_type: pa.DataType):
-    ...         if not pa.types.is_integer(data_type):
-    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
-    ...         super().__init__(
-    ...             pa.struct(
-    ...                 [
-    ...                     ("numer", data_type),
-    ...                     ("denom", data_type),
-    ...                 ],
-    ...             ),
-    ...             # N.B. This name does _not_ reference `data_type` so deserialization
-    ...             # will work for _any_ integer `data_type` after registration
-    ...             "my_package.rational",
-    ...         )
-    ...     def __arrow_ext_serialize__(self) -> bytes:
-    ...         # No parameters are necessary
-    ...         return b""
-    ...     @classmethod
-    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
-    ...         # return an instance of this subclass
-    ...         return RationalType(storage_type[0].type)
-
-    Register the extension type:
-
-    >>> pa.register_extension_type(RationalType(pa.int64()))
-
-    Create an instance of RationalType extension type:
-
-    >>> rational_type = RationalType(pa.int32())
-
-    Inspect the extension type:
-
-    >>> rational_type.extension_name
-    'my_package.rational'
-    >>> rational_type.storage_type
-    StructType(struct<numer: int32, denom: int32>)
-
-    Wrap an array as an extension array:
-
-    >>> storage_array = pa.array(
-    ...     [
-    ...         {"numer": 10, "denom": 17},
-    ...         {"numer": 20, "denom": 13},
-    ...     ],
-    ...     type=rational_type.storage_type
-    ... )
-    >>> rational_array = rational_type.wrap_array(storage_array)
-    >>> rational_array
-    <pyarrow.lib.ExtensionArray object at ...>
-    -- is_valid: all not null
-    -- child 0 type: int32
-      [
-        10,
-        20
-      ]
-    -- child 1 type: int32
-      [
-        17,
-        13
-      ]
-
-    Or do the same with creating an ExtensionArray:
-
-    >>> rational_array = pa.ExtensionArray.from_storage(rational_type, storage_array)
-    >>> rational_array
-    <pyarrow.lib.ExtensionArray object at ...>
-    -- is_valid: all not null
-    -- child 0 type: int32
-      [
-        10,
-        20
-      ]
-    -- child 1 type: int32
-      [
-        17,
-        13
-      ]
-
-    Unregister the extension type:
-
-    >>> pa.unregister_extension_type("my_package.rational")
-
-    Note that even though we registered the concrete type
-    ``RationalType(pa.int64())``, PyArrow will be able to deserialize
-    ``RationalType(integer_type)`` for any ``integer_type``, as the deserializer
-    will reference the name ``my_package.rational`` and the ``@classmethod``
-    ``__arrow_ext_deserialize__``.
-    """
-
-    def __init__(self, storage_type: DataType, extension_name: str) -> None:
-        """
-        Initialize an extension type instance.
-
-        This should be called at the end of the subclass'
-        ``__init__`` method.
-        """
-    def __arrow_ext_serialize__(self) -> bytes:
-        """
-        Serialized representation of metadata to reconstruct the type object.
-
-        This method should return a bytes object, and those serialized bytes
-        are stored in the custom metadata of the Field holding an extension
-        type in an IPC message.
-        The bytes are passed to ``__arrow_ext_deserialize`` and should hold
-        sufficient information to reconstruct the data type instance.
-        """
-    @classmethod
-    def __arrow_ext_deserialize__(cls, storage_type: DataType, serialized: bytes) -> Self:
-        """
-        Return an extension type instance from the storage type and serialized
-        metadata.
 
-        This method should return an instance of the ExtensionType subclass
-        that matches the passed storage type and serialized metadata (the
-        return value of ``__arrow_ext_serialize__``).
-        """
+    def __init__(self, storage_type: DataType, extension_name: str) -> None: ...
 
-class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]):
-    """
-    Concrete class for fixed shape tensor extension type.
+    def __arrow_ext_serialize__(self) -> bytes: ...
 
-    Examples
-    --------
-    Create an instance of fixed shape tensor extension type:
+    @classmethod
+    def __arrow_ext_deserialize__(
+        cls, storage_type: DataType, serialized: bytes) -> Self: ...
 
-    >>> import pyarrow as pa
-    >>> pa.fixed_shape_tensor(pa.int32(), [2, 2])
-    FixedShapeTensorType(extension<arrow.fixed_shape_tensor[value_type=int32, shape=[2,2]]>)
 
-    Create an instance of fixed shape tensor extension type with
-    permutation:
+class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]):
 
-    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3),
-    ...                                     permutation=[0, 2, 1])
-    >>> tensor_type.permutation
-    [0, 2, 1]
-    """
-    @property
-    def value_type(self) -> _ValueT:
-        """
-        Data type of an individual tensor.
-        """
     @property
-    def shape(self) -> list[int]:
-        """
-        Shape of the tensors.
-        """
+    def value_type(self) -> _ValueT: ...
+
     @property
-    def dim_names(self) -> list[str] | None:
-        """
-        Explicit names of the dimensions.
-        """
+    def shape(self) -> list[int]: ...
+
     @property
-    def permutation(self) -> list[int] | None:
-        """
-        Indices of the dimensions ordering.
-        """
+    def dim_names(self) -> list[str] | None: ...
 
-class Bool8Type(BaseExtensionType):
-    """
-    Concrete class for bool8 extension type.
+    @property
+    def permutation(self) -> list[int] | None: ...
 
-    Bool8 is an alternate representation for boolean
-    arrays using 8 bits instead of 1 bit per value. The underlying
-    storage type is int8.
 
-    Examples
-    --------
-    Create an instance of bool8 extension type:
+class Bool8Type(BaseExtensionType):
+    ...
 
-    >>> import pyarrow as pa
-    >>> pa.bool8()
-    Bool8Type(extension<arrow.bool8>)
-    """
 
 class UuidType(BaseExtensionType):
-    """
-    Concrete class for UUID extension type.
-    """
-
-class JsonType(BaseExtensionType):
-    """
-    Concrete class for JSON extension type.
-
-    Examples
-    --------
-    Define the extension type for JSON array
+    ...
 
-    >>> import pyarrow as pa
-    >>> json_type = pa.json_(pa.large_utf8())
 
-    Create an extension array
+class JsonType(BaseExtensionType):
+    ...
 
-    >>> arr = [None, '{ "id":30, "values":["a", "b"] }']
-    >>> storage = pa.array(arr, pa.large_utf8())
-    >>> pa.ExtensionArray.from_storage(json_type, storage)
-    <pyarrow.lib.JsonArray object at ...>
-    [
-      null,
-      "{ "id":30, "values":["a", "b"] }"
-    ]
-    """
 
 class OpaqueType(BaseExtensionType):
-    """
-    Concrete class for opaque extension type.
-
-    Opaque is a placeholder for a type from an external (often non-Arrow)
-    system that could not be interpreted.
-
-    Examples
-    --------
-    Create an instance of opaque extension type:
 
-    >>> import pyarrow as pa
-    >>> pa.opaque(pa.int32(), "geometry", "postgis")
-    OpaqueType(extension<arrow.opaque[storage_type=int32, type_name=geometry, vendor_name=postgis]>)
-    """
     @property
-    def type_name(self) -> str:
-        """
-        The name of the type in the external system.
-        """
+    def type_name(self) -> str: ...
+
     @property
-    def vendor_name(self) -> str:
-        """
-        The name of the external system.
-        """
+    def vendor_name(self) -> str: ...
+
 
 # TODO
 # @deprecated(
@@ -1467,2701 +527,347 @@ class OpaqueType(BaseExtensionType):
 #         """
 
 class UnknownExtensionType(ExtensionType):  # type: ignore
-    """
-    A concrete class for Python-defined extension types that refer to
-    an unknown Python implementation.
-
-    Parameters
-    ----------
-    storage_type : DataType
-        The storage type for which the extension is built.
-    serialized : bytes
-        The serialised output.
-    """
-    def __init__(self, storage_type: DataType, serialized: bytes) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
-
-def register_extension_type(ext_type: ExtensionType) -> None:  # type: ignore
-    """
-    Register a Python extension type.
-
-    Registration is based on the extension name (so different registered types
-    need unique extension names). Registration needs an extension type
-    instance, but then works for any instance of the same subclass regardless
-    of parametrization of the type.
-
-    Parameters
-    ----------
-    ext_type : BaseExtensionType instance
-        The ExtensionType subclass to register.
-
-    Examples
-    --------
-    Define a RationalType extension type subclassing ExtensionType:
-
-    >>> import pyarrow as pa
-    >>> class RationalType(pa.ExtensionType):
-    ...     def __init__(self, data_type: pa.DataType):
-    ...         if not pa.types.is_integer(data_type):
-    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
-    ...         super().__init__(
-    ...             pa.struct(
-    ...                 [
-    ...                     ("numer", data_type),
-    ...                     ("denom", data_type),
-    ...                 ],
-    ...             ),
-    ...             # N.B. This name does _not_ reference `data_type` so deserialization
-    ...             # will work for _any_ integer `data_type` after registration
-    ...             "my_package.rational",
-    ...         )
-    ...     def __arrow_ext_serialize__(self) -> bytes:
-    ...         # No parameters are necessary
-    ...         return b""
-    ...     @classmethod
-    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
-    ...         # return an instance of this subclass
-    ...         return RationalType(storage_type[0].type)
-
-    Register the extension type:
-
-    >>> pa.register_extension_type(RationalType(pa.int64()))
-
-    Unregister the extension type:
-
-    >>> pa.unregister_extension_type("my_package.rational")
-    """
-
-def unregister_extension_type(type_name: str) -> None:
-    """
-    Unregister a Python extension type.
-
-    Parameters
-    ----------
-    type_name : str
-        The name of the ExtensionType subclass to unregister.
-
-    Examples
-    --------
-    Define a RationalType extension type subclassing ExtensionType:
-
-    >>> import pyarrow as pa
-    >>> class RationalType(pa.ExtensionType):
-    ...     def __init__(self, data_type: pa.DataType):
-    ...         if not pa.types.is_integer(data_type):
-    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
-    ...         super().__init__(
-    ...             pa.struct(
-    ...                 [
-    ...                     ("numer", data_type),
-    ...                     ("denom", data_type),
-    ...                 ],
-    ...             ),
-    ...             # N.B. This name does _not_ reference `data_type` so deserialization
-    ...             # will work for _any_ integer `data_type` after registration
-    ...             "my_package.rational",
-    ...         )
-    ...     def __arrow_ext_serialize__(self) -> bytes:
-    ...         # No parameters are necessary
-    ...         return b""
-    ...     @classmethod
-    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
-    ...         # return an instance of this subclass
-    ...         return RationalType(storage_type[0].type)
-
-    Register the extension type:
-
-    >>> pa.register_extension_type(RationalType(pa.int64()))
-
-    Unregister the extension type:
-
-    >>> pa.unregister_extension_type("my_package.rational")
-    """
+
+    def __init__(self, storage_type: DataType, serialized: bytes) -> None: ...
+
+
+def register_extension_type(ext_type: ExtensionType) -> None: ...  # type: ignore
+
+
+def unregister_extension_type(type_name: str) -> None: ...
+
 
 class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]):
-    """
-    KeyValueMetadata
-
-    Parameters
-    ----------
-    __arg0__ : dict
-        A dict of the key-value metadata
-    **kwargs : optional
-        additional key-value metadata
-    """
-    def __init__(self, __arg0__: Mapping[bytes, bytes] | None = None, **kwargs) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
-    def equals(self, other: KeyValueMetadata) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.KeyValueMetadata
-
-        Returns
-        -------
-        bool
-        """
-    def __len__(self) -> int:
-        """
-        Return len(self).
-        """
-    def __contains__(self, __key: object) -> bool:
-        """
-        Return bool(key in self).
-        """
-    def __getitem__(self, __key: Any) -> Any:
-        """
-        Return self[key].
-        """
-    def __iter__(self) -> Iterator[bytes]:
-        """
-        Implement iter(self).
-        """
-    def get_all(self, key: str) -> list[bytes]:
-        """
-        Parameters
-        ----------
-        key : str
-
-        Returns
-        -------
-        list[byte]
-        """
-    def to_dict(self) -> dict[bytes, bytes]:
-        """
-        Convert KeyValueMetadata to dict. If a key occurs twice, the value for
-        the first one is returned
-        """
+
+    def __init__(self, __arg0__: Mapping[bytes, bytes]
+                 | None = None, **kwargs) -> None: ...
+
+    def equals(self, other: KeyValueMetadata) -> bool: ...
+
+    def __len__(self) -> int: ...
+
+    def __contains__(self, __key: object) -> bool: ...
+
+    def __getitem__(self, __key: Any) -> Any: ...
+
+    def __iter__(self) -> Iterator[bytes]: ...
+
+    def get_all(self, key: str) -> list[bytes]: ...
+
+    def to_dict(self) -> dict[bytes, bytes]: ...
+
 
 class Field(_Weakrefable, Generic[_DataTypeT]):
-    """
-    A named field, with a data type, nullability, and optional metadata.
-
-    Notes
-    -----
-    Do not use this class's constructor directly; use pyarrow.field
-
-    Examples
-    --------
-    Create an instance of pyarrow.Field:
-
-    >>> import pyarrow as pa
-    >>> pa.field('key', pa.int32())
-    pyarrow.Field<key: int32>
-    >>> pa.field('key', pa.int32(), nullable=False)
-    pyarrow.Field<key: int32 not null>
-    >>> field = pa.field('key', pa.int32(),
-    ...                  metadata={"key": "Something important"})
-    >>> field
-    pyarrow.Field<key: int32>
-    >>> field.metadata
-    {b'key': b'Something important'}
-
-    Use the field to create a struct type:
-
-    >>> pa.struct([field])
-    StructType(struct<key: int32>)
-    """
-
-    def equals(self, other: Field, check_metadata: bool = False) -> bool:
-        """
-        Test if this field is equal to the other
-
-        Parameters
-        ----------
-        other : pyarrow.Field
-        check_metadata : bool, default False
-            Whether Field metadata equality should be checked as well.
-
-        Returns
-        -------
-        is_equal : bool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> f1 = pa.field('key', pa.int32())
-        >>> f2 = pa.field('key', pa.int32(), nullable=False)
-        >>> f1.equals(f2)
-        False
-        >>> f1.equals(f1)
-        True
-        """
-    def __hash__(self) -> int:
-        """
-        Return hash(self).
-        """
+
+    def equals(self, other: Field, check_metadata: bool = False) -> bool: ...
+
+    def __hash__(self) -> int: ...
+
     @property
-    def nullable(self) -> bool:
-        """
-        The field nullability.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> f1 = pa.field('key', pa.int32())
-        >>> f2 = pa.field('key', pa.int32(), nullable=False)
-        >>> f1.nullable
-        True
-        >>> f2.nullable
-        False
-        """
+    def nullable(self) -> bool: ...
+
     @property
-    def name(self) -> str:
-        """
-        The field name.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> field = pa.field('key', pa.int32())
-        >>> field.name
-        'key'
-        """
+    def name(self) -> str: ...
+
     @property
-    def metadata(self) -> dict[bytes, bytes] | None:
-        """
-        The field metadata (if any is set).
-
-        Returns
-        -------
-        metadata : dict or None
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> field = pa.field('key', pa.int32(),
-        ...                  metadata={"key": "Something important"})
-        >>> field.metadata
-        {b'key': b'Something important'}
-        """
+    def metadata(self) -> dict[bytes, bytes] | None: ...
+
     @property
     def type(self) -> _DataTypeT: ...
-    def with_metadata(self, metadata: dict[bytes | str, bytes | str]) -> Self:
-        """
-        Add metadata as dict of string keys and values to Field
-
-        Parameters
-        ----------
-        metadata : dict
-            Keys and values must be string-like / coercible to bytes
-
-        Returns
-        -------
-        field : pyarrow.Field
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> field = pa.field('key', pa.int32())
-
-        Create new field by adding metadata to existing one:
-
-        >>> field_new = field.with_metadata({"key": "Something important"})
-        >>> field_new
-        pyarrow.Field<key: int32>
-        >>> field_new.metadata
-        {b'key': b'Something important'}
-        """
-    def remove_metadata(self) -> Self:
-        """
-        Create new field without metadata, if any
-
-        Returns
-        -------
-        field : pyarrow.Field
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> field = pa.field('key', pa.int32(),
-        ...                  metadata={"key": "Something important"})
-        >>> field.metadata
-        {b'key': b'Something important'}
-
-        Create new field by removing the metadata from the existing one:
-
-        >>> field_new = field.remove_metadata()
-        >>> field_new.metadata
-        """
-    def with_type(self, new_type: _DataTypeT) -> Field[_DataTypeT]:
-        """
-        A copy of this field with the replaced type
-
-        Parameters
-        ----------
-        new_type : pyarrow.DataType
-
-        Returns
-        -------
-        field : pyarrow.Field
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> field = pa.field('key', pa.int32())
-        >>> field
-        pyarrow.Field<key: int32>
-
-        Create new field by replacing type of an existing one:
-
-        >>> field_new = field.with_type(pa.int64())
-        >>> field_new
-        pyarrow.Field<key: int64>
-        """
-    def with_name(self, name: str) -> Self:
-        """
-        A copy of this field with the replaced name
-
-        Parameters
-        ----------
-        name : str
-
-        Returns
-        -------
-        field : pyarrow.Field
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> field = pa.field('key', pa.int32())
-        >>> field
-        pyarrow.Field<key: int32>
-
-        Create new field by replacing the name of an existing one:
-
-        >>> field_new = field.with_name('lock')
-        >>> field_new
-        pyarrow.Field<lock: int32>
-        """
-    def with_nullable(self, nullable: bool) -> Field[_DataTypeT]:
-        """
-        A copy of this field with the replaced nullability
-
-        Parameters
-        ----------
-        nullable : bool
-
-        Returns
-        -------
-        field: pyarrow.Field
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> field = pa.field('key', pa.int32())
-        >>> field
-        pyarrow.Field<key: int32>
-        >>> field.nullable
-        True
-
-        Create new field by replacing the nullability of an existing one:
-
-        >>> field_new = field.with_nullable(False)
-        >>> field_new
-        pyarrow.Field<key: int32 not null>
-        >>> field_new.nullable
-        False
-        """
-    def flatten(self) -> list[Field]:
-        """
-        Flatten this field.  If a struct field, individual child fields
-        will be returned with their names prefixed by the parent's name.
-
-        Returns
-        -------
-        fields : List[pyarrow.Field]
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> f1 = pa.field('bar', pa.float64(), nullable=False)
-        >>> f2 = pa.field('foo', pa.int32()).with_metadata({"key": "Something important"})
-        >>> ff = pa.field('ff', pa.struct([f1, f2]), nullable=False)
-
-        Flatten a struct field:
-
-        >>> ff
-        pyarrow.Field<ff: struct<bar: double not null, foo: int32> not null>
-        >>> ff.flatten()
-        [pyarrow.Field<ff.bar: double not null>, pyarrow.Field<ff.foo: int32>]
-        """
-    def _export_to_c(self, out_ptr: int) -> None:
-        """
-        Export to a C ArrowSchema struct, given its pointer.
-
-        Be careful: if you don't pass the ArrowSchema struct to a consumer,
-        its memory will leak.  This is a low-level function intended for
-        expert users.
-        """
+    def with_metadata(self, metadata: dict[bytes | str, bytes | str]) -> Self: ...
+
+    def remove_metadata(self) -> Self: ...
+
+    def with_type(self, new_type: _DataTypeT) -> Field[_DataTypeT]: ...
+
+    def with_name(self, name: str) -> Self: ...
+
+    def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ...
+
+    def flatten(self) -> list[Field]: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
     @classmethod
-    def _import_from_c(cls, in_ptr: int) -> Self:
-        """
-        Import Field from a C ArrowSchema struct, given its pointer.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_schema__(self) -> Any:
-        """
-        Export to a ArrowSchema PyCapsule
-
-        Unlike _export_to_c, this will not leak memory if the capsule is not used.
-        """
+    def _import_from_c(cls, in_ptr: int) -> Self: ...
+
+    def __arrow_c_schema__(self) -> Any: ...
+
     @classmethod
-    def _import_from_c_capsule(cls, schema) -> Self:
-        """
-        Import a Field from a ArrowSchema PyCapsule
+    def _import_from_c_capsule(cls, schema) -> Self: ...
 
-        Parameters
-        ----------
-        schema : PyCapsule
-            A valid PyCapsule with name 'arrow_schema' containing an
-            ArrowSchema pointer.
-        """
 
 class Schema(_Weakrefable):
-    """
-    A named collection of types a.k.a schema. A schema defines the
-    column names and types in a record batch or table data structure.
-    They also contain metadata about the columns. For example, schemas
-    converted from Pandas contain metadata about their original Pandas
-    types so they can be converted back to the same types.
-
-    Warnings
-    --------
-    Do not call this class's constructor directly. Instead use
-    :func:`pyarrow.schema` factory function which makes a new Arrow
-    Schema object.
-
-    Examples
-    --------
-    Create a new Arrow Schema object:
-
-    >>> import pyarrow as pa
-    >>> pa.schema([
-    ...     ('some_int', pa.int32()),
-    ...     ('some_string', pa.string())
-    ... ])
-    some_int: int32
-    some_string: string
-
-    Create Arrow Schema with metadata:
-
-    >>> pa.schema([
-    ...     pa.field('n_legs', pa.int64()),
-    ...     pa.field('animals', pa.string())],
-    ...     metadata={"n_legs": "Number of legs per animal"})
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'Number of legs per animal'
-    """
-
-    def __len__(self) -> int:
-        """
-        Return len(self).
-        """
-    def __getitem__(self, key: str) -> Field:
-        """
-        Return self[key].
-        """
+
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, key: str) -> Field: ...
+
     _field = __getitem__  # pyright: ignore[reportUnknownVariableType]
-    def __iter__(self) -> Iterator[Field]:
-        """
-        Implement iter(self).
-        """
-    def __hash__(self) -> int:
-        """
-        Return hash(self).
-        """
+    def __iter__(self) -> Iterator[Field]: ...
+
+    def __hash__(self) -> int: ...
+
     def __sizeof__(self) -> int: ...
     @property
-    def pandas_metadata(self) -> dict:
-        """
-        Return deserialized-from-JSON pandas metadata field (if it exists)
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
-        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
-        >>> schema = pa.Table.from_pandas(df).schema
-
-        Select pandas metadata field from Arrow Schema:
-
-        >>> schema.pandas_metadata
-        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, 'stop': 4, 'step': 1}], ...
-        """
+    def pandas_metadata(self) -> dict: ...
+
     @property
-    def names(self) -> list[str]:
-        """
-        The schema's field names.
-
-        Returns
-        -------
-        list of str
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Get the names of the schema's fields:
-
-        >>> schema.names
-        ['n_legs', 'animals']
-        """
+    def names(self) -> list[str]: ...
+
     @property
-    def types(self) -> list[DataType]:
-        """
-        The schema's field types.
-
-        Returns
-        -------
-        list of DataType
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Get the types of the schema's fields:
-
-        >>> schema.types
-        [DataType(int64), DataType(string)]
-        """
+    def types(self) -> list[DataType]: ...
+
     @property
-    def metadata(self) -> dict[bytes, bytes]:
-        """
-        The schema's metadata (if any is set).
-
-        Returns
-        -------
-        metadata: dict or None
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"})
-
-        Get the metadata of the schema's fields:
-
-        >>> schema.metadata
-        {b'n_legs': b'Number of legs per animal'}
-        """
-    def empty_table(self) -> Table:
-        """
-        Provide an empty table according to the schema.
-
-        Returns
-        -------
-        table: pyarrow.Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Create an empty table with schema's fields:
-
-        >>> schema.empty_table()
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[]]
-        animals: [[]]
-        """
-    def equals(self, other: Schema, check_metadata: bool = False) -> bool:
-        """
-        Test if this schema is equal to the other
-
-        Parameters
-        ----------
-        other :  pyarrow.Schema
-        check_metadata : bool, default False
-            Key/value metadata must be equal too
-
-        Returns
-        -------
-        is_equal : bool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema1 = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"})
-        >>> schema2 = pa.schema([
-        ...     ('some_int', pa.int32()),
-        ...     ('some_string', pa.string())
-        ... ])
-
-        Test two equal schemas:
-
-        >>> schema1.equals(schema1)
-        True
-
-        Test two unequal schemas:
-
-        >>> schema1.equals(schema2)
-        False
-        """
+    def metadata(self) -> dict[bytes, bytes]: ...
+
+    def empty_table(self) -> Table: ...
+
+    def equals(self, other: Schema, check_metadata: bool = False) -> bool: ...
+
     @classmethod
-    def from_pandas(cls, df: pd.DataFrame, preserve_index: bool | None = None) -> Schema:
-        """
-        Returns implied schema from dataframe
-
-        Parameters
-        ----------
-        df : pandas.DataFrame
-        preserve_index : bool, default True
-            Whether to store the index as an additional column (or columns, for
-            MultiIndex) in the resulting `Table`.
-            The default of None will store the index as a column, except for
-            RangeIndex which is stored as metadata only. Use
-            ``preserve_index=True`` to force it to be stored as a column.
-
-        Returns
-        -------
-        pyarrow.Schema
-
-        Examples
-        --------
-        >>> import pandas as pd
-        >>> import pyarrow as pa
-        >>> df = pd.DataFrame({
-        ...     'int': [1, 2],
-        ...     'str': ['a', 'b']
-        ... })
-
-        Create an Arrow Schema from the schema of a pandas dataframe:
-
-        >>> pa.Schema.from_pandas(df)
-        int: int64
-        str: string
-        -- schema metadata --
-        pandas: '{"index_columns": [{"kind": "range", "name": null, ...
-        """
-    def field(self, i: int | str | bytes) -> Field:
-        """
-        Select a field by its column name or numeric index.
-
-        Parameters
-        ----------
-        i : int or string
-
-        Returns
-        -------
-        pyarrow.Field
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Select the second field:
-
-        >>> schema.field(1)
-        pyarrow.Field<animals: string>
-
-        Select the field of the column named 'n_legs':
-
-        >>> schema.field('n_legs')
-        pyarrow.Field<n_legs: int64>
-        """
+    def from_pandas(cls, df: pd.DataFrame, preserve_index: bool |
+                    None = None) -> Schema: ...
+
+    def field(self, i: int | str | bytes) -> Field: ...
+
     @deprecated("Use 'field' instead")
-    def field_by_name(self, name: str) -> Field:
-        """
-        DEPRECATED
-
-        Parameters
-        ----------
-        name : str
-
-        Returns
-        -------
-        field: pyarrow.Field
-        """
-    def get_field_index(self, name: str) -> int:
-        """
-        Return index of the unique field with the given name.
-
-        Parameters
-        ----------
-        name : str
-            The name of the field to look up.
-
-        Returns
-        -------
-        index : int
-            The index of the field with the given name; -1 if the
-            name isn't found or there are several fields with the given
-            name.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Get the index of the field named 'animals':
-
-        >>> schema.get_field_index("animals")
-        1
-
-        Index in case of several fields with the given name:
-
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string()),
-        ...     pa.field('animals', pa.bool_())],
-        ...     metadata={"n_legs": "Number of legs per animal"})
-        >>> schema.get_field_index("animals")
-        -1
-        """
-    def get_all_field_indices(self, name: str) -> list[int]:
-        """
-        Return sorted list of indices for the fields with the given name.
-
-        Parameters
-        ----------
-        name : str
-            The name of the field to look up.
-
-        Returns
-        -------
-        indices : List[int]
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string()),
-        ...     pa.field('animals', pa.bool_())])
-
-        Get the indexes of the fields named 'animals':
-
-        >>> schema.get_all_field_indices("animals")
-        [1, 2]
-        """
-    def append(self, field: Field) -> Schema:
-        """
-        Append a field at the end of the schema.
-
-        In contrast to Python's ``list.append()`` it does return a new
-        object, leaving the original Schema unmodified.
-
-        Parameters
-        ----------
-        field : Field
-
-        Returns
-        -------
-        schema: Schema
-            New object with appended field.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Append a field 'extra' at the end of the schema:
-
-        >>> schema_new = schema.append(pa.field('extra', pa.bool_()))
-        >>> schema_new
-        n_legs: int64
-        animals: string
-        extra: bool
-
-        Original schema is unmodified:
-
-        >>> schema
-        n_legs: int64
-        animals: string
-        """
-    def insert(self, i: int, field: Field) -> Schema:
-        """
-        Add a field at position i to the schema.
-
-        Parameters
-        ----------
-        i : int
-        field : Field
-
-        Returns
-        -------
-        schema: Schema
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Insert a new field on the second position:
-
-        >>> schema.insert(1, pa.field('extra', pa.bool_()))
-        n_legs: int64
-        extra: bool
-        animals: string
-        """
-    def remove(self, i: int) -> Schema:
-        """
-        Remove the field at index i from the schema.
-
-        Parameters
-        ----------
-        i : int
-
-        Returns
-        -------
-        schema: Schema
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Remove the second field of the schema:
-
-        >>> schema.remove(1)
-        n_legs: int64
-        """
-    def set(self, i: int, field: Field) -> Schema:
-        """
-        Replace a field at position i in the schema.
-
-        Parameters
-        ----------
-        i : int
-        field : Field
-
-        Returns
-        -------
-        schema: Schema
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Replace the second field of the schema with a new field 'extra':
-
-        >>> schema.set(1, pa.field('replaced', pa.bool_()))
-        n_legs: int64
-        replaced: bool
-        """
+    def field_by_name(self, name: str) -> Field: ...
+
+    def get_field_index(self, name: str) -> int: ...
+
+    def get_all_field_indices(self, name: str) -> list[int]: ...
+
+    def append(self, field: Field) -> Schema: ...
+
+    def insert(self, i: int, field: Field) -> Schema: ...
+
+    def remove(self, i: int) -> Schema: ...
+
+    def set(self, i: int, field: Field) -> Schema: ...
+
     @deprecated("Use 'with_metadata' instead")
-    def add_metadata(self, metadata: dict) -> Schema:
-        """
-        DEPRECATED
-
-        Parameters
-        ----------
-        metadata : dict
-            Keys and values must be string-like / coercible to bytes
-        """
-    def with_metadata(self, metadata: dict) -> Schema:
-        """
-        Add metadata as dict of string keys and values to Schema
-
-        Parameters
-        ----------
-        metadata : dict
-            Keys and values must be string-like / coercible to bytes
-
-        Returns
-        -------
-        schema : pyarrow.Schema
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Add metadata to existing schema field:
-
-        >>> schema.with_metadata({"n_legs": "Number of legs per animal"})
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-        """
-    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer:
-        """
-        Write Schema to Buffer as encapsulated IPC message
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            Uses default memory pool if not specified
-
-        Returns
-        -------
-        serialized : Buffer
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())])
-
-        Write schema to Buffer:
-
-        >>> schema.serialize()
-        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
-        """
-    def remove_metadata(self) -> Schema:
-        """
-        Create new schema without metadata, if any
-
-        Returns
-        -------
-        schema : pyarrow.Schema
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> schema = pa.schema([
-        ...     pa.field('n_legs', pa.int64()),
-        ...     pa.field('animals', pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"})
-        >>> schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-
-        Create a new schema with removing the metadata from the original:
-
-        >>> schema.remove_metadata()
-        n_legs: int64
-        animals: string
-        """
+    def add_metadata(self, metadata: dict) -> Schema: ...
+
+    def with_metadata(self, metadata: dict) -> Schema: ...
+
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ...
+
+    def remove_metadata(self) -> Schema: ...
+
     def to_string(
         self,
         truncate_metadata: bool = True,
         show_field_metadata: bool = True,
         show_schema_metadata: bool = True,
-    ) -> str:
-        """
-        Return human-readable representation of Schema
-
-        Parameters
-        ----------
-        truncate_metadata : boolean, default True
-            Limit metadata key/value display to a single line of ~80 characters
-            or less
-        show_field_metadata : boolean, default True
-            Display Field-level KeyValueMetadata
-        show_schema_metadata : boolean, default True
-            Display Schema-level KeyValueMetadata
-        element_size_limit : int, default 100
-            Maximum number of characters of a single element before it is truncated.
-
-        Returns
-        -------
-        str : the formatted output
-        """
-    def _export_to_c(self, out_ptr: int) -> None:
-        """
-        Export to a C ArrowSchema struct, given its pointer.
-
-        Be careful: if you don't pass the ArrowSchema struct to a consumer,
-        its memory will leak.  This is a low-level function intended for
-        expert users.
-        """
+    ) -> str: ...
+
+    def _export_to_c(self, out_ptr: int) -> None: ...
+
     @classmethod
-    def _import_from_c(cls, in_ptr: int) -> Schema:
-        """
-        Import Schema from a C ArrowSchema struct, given its pointer.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_schema__(self) -> Any:
-        """
-        Export to a ArrowSchema PyCapsule
-
-        Unlike _export_to_c, this will not leak memory if the capsule is not used.
-        """
+    def _import_from_c(cls, in_ptr: int) -> Schema: ...
+
+    def __arrow_c_schema__(self) -> Any: ...
+
     @staticmethod
-    def _import_from_c_capsule(schema: Any) -> Schema:
-        """
-        Import a Schema from a ArrowSchema PyCapsule
+    def _import_from_c_capsule(schema: Any) -> Schema: ...
 
-        Parameters
-        ----------
-        schema : PyCapsule
-            A valid PyCapsule with name 'arrow_schema' containing an
-            ArrowSchema pointer.
-        """
 
 def unify_schemas(
     schemas: list[Schema], *, promote_options: Literal["default", "permissive"] = "default"
-) -> Schema:
-    """
-    Unify schemas by merging fields by name.
-
-    The resulting schema will contain the union of fields from all schemas.
-    Fields with the same name will be merged. Note that two fields with
-    different types will fail merging by default.
-
-    - The unified field will inherit the metadata from the schema where
-        that field is first defined.
-    - The first N fields in the schema will be ordered the same as the
-        N fields in the first schema.
-
-    The resulting schema will inherit its metadata from the first input
-    schema.
-
-    Parameters
-    ----------
-    schemas : list of Schema
-        Schemas to merge into a single one.
-    promote_options : str, default default
-        Accepts strings "default" and "permissive".
-        Default: null and only null can be unified with another type.
-        Permissive: types are promoted to the greater common denominator.
-
-    Returns
-    -------
-    Schema
-
-    Raises
-    ------
-    ArrowInvalid :
-        If any input schema contains fields with duplicate names.
-        If Fields of the same name are not mergeable.
-    """
+) -> Schema: ...
+
 
 def field(
     name: SupportArrowSchema | str, type: _DataTypeT, nullable: bool = ..., metadata: dict[Any, Any] | None = None
-) -> Field[_DataTypeT] | Field[Any]:
-    """
-    Create a pyarrow.Field instance.
-
-    Parameters
-    ----------
-    name : str or bytes
-        Name of the field.
-        Alternatively, you can also pass an object that implements the Arrow
-        PyCapsule Protocol for schemas (has an ``__arrow_c_schema__`` method).
-    type : pyarrow.DataType or str
-        Arrow datatype of the field or a string matching one.
-    nullable : bool, default True
-        Whether the field's values are nullable.
-    metadata : dict, default None
-        Optional field metadata, the keys and values must be coercible to
-        bytes.
-
-    Returns
-    -------
-    field : pyarrow.Field
-
-    Examples
-    --------
-    Create an instance of pyarrow.Field:
-
-    >>> import pyarrow as pa
-    >>> pa.field('key', pa.int32())
-    pyarrow.Field<key: int32>
-    >>> pa.field('key', pa.int32(), nullable=False)
-    pyarrow.Field<key: int32 not null>
-
-    >>> field = pa.field('key', pa.int32(),
-    ...                  metadata={"key": "Something important"})
-    >>> field
-    pyarrow.Field<key: int32>
-    >>> field.metadata
-    {b'key': b'Something important'}
-
-    Use the field to create a struct type:
-
-    >>> pa.struct([field])
-    StructType(struct<key: int32>)
-
-    A str can also be passed for the type parameter:
-
-    >>> pa.field('key', 'int32')
-    pyarrow.Field<key: int32>
-    """
-
-def null() -> NullType:
-    """
-    Create instance of null type.
-
-    Examples
-    --------
-    Create an instance of a null type:
-
-    >>> import pyarrow as pa
-    >>> pa.null()
-    DataType(null)
-    >>> print(pa.null())
-    null
-
-    Create a ``Field`` type with a null type and a name:
-
-    >>> pa.field('null_field', pa.null())
-    pyarrow.Field<null_field: null>
-    """
-
-def bool_() -> BoolType:
-    """
-    Create instance of boolean type.
-
-    Examples
-    --------
-    Create an instance of a boolean type:
-
-    >>> import pyarrow as pa
-    >>> pa.bool_()
-    DataType(bool)
-    >>> print(pa.bool_())
-    bool
-
-    Create a ``Field`` type with a boolean type
-    and a name:
-
-    >>> pa.field('bool_field', pa.bool_())
-    pyarrow.Field<bool_field: bool>
-    """
-
-def uint8() -> UInt8Type:
-    """
-    Create instance of unsigned int8 type.
-
-    Examples
-    --------
-    Create an instance of unsigned int8 type:
-
-    >>> import pyarrow as pa
-    >>> pa.uint8()
-    DataType(uint8)
-    >>> print(pa.uint8())
-    uint8
-
-    Create an array with unsigned int8 type:
-
-    >>> pa.array([0, 1, 2], type=pa.uint8())
-    <pyarrow.lib.UInt8Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def int8() -> Int8Type:
-    """
-    Create instance of signed int8 type.
-
-    Examples
-    --------
-    Create an instance of int8 type:
-
-    >>> import pyarrow as pa
-    >>> pa.int8()
-    DataType(int8)
-    >>> print(pa.int8())
-    int8
-
-    Create an array with int8 type:
-
-    >>> pa.array([0, 1, 2], type=pa.int8())
-    <pyarrow.lib.Int8Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def uint16() -> UInt16Type:
-    """
-    Create instance of unsigned uint16 type.
-
-    Examples
-    --------
-    Create an instance of unsigned int16 type:
-
-    >>> import pyarrow as pa
-    >>> pa.uint16()
-    DataType(uint16)
-    >>> print(pa.uint16())
-    uint16
-
-    Create an array with unsigned int16 type:
-
-    >>> pa.array([0, 1, 2], type=pa.uint16())
-    <pyarrow.lib.UInt16Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def int16() -> Int16Type:
-    """
-    Create instance of signed int16 type.
-
-    Examples
-    --------
-    Create an instance of int16 type:
-
-    >>> import pyarrow as pa
-    >>> pa.int16()
-    DataType(int16)
-    >>> print(pa.int16())
-    int16
-
-    Create an array with int16 type:
-
-    >>> pa.array([0, 1, 2], type=pa.int16())
-    <pyarrow.lib.Int16Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def uint32() -> Uint32Type:
-    """
-    Create instance of unsigned uint32 type.
-
-    Examples
-    --------
-    Create an instance of unsigned int32 type:
-
-    >>> import pyarrow as pa
-    >>> pa.uint32()
-    DataType(uint32)
-    >>> print(pa.uint32())
-    uint32
-
-    Create an array with unsigned int32 type:
-
-    >>> pa.array([0, 1, 2], type=pa.uint32())
-    <pyarrow.lib.UInt32Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def int32() -> Int32Type:
-    """
-    Create instance of signed int32 type.
-
-    Examples
-    --------
-    Create an instance of int32 type:
-
-    >>> import pyarrow as pa
-    >>> pa.int32()
-    DataType(int32)
-    >>> print(pa.int32())
-    int32
-
-    Create an array with int32 type:
-
-    >>> pa.array([0, 1, 2], type=pa.int32())
-    <pyarrow.lib.Int32Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def int64() -> Int64Type:
-    """
-    Create instance of signed int64 type.
-
-    Examples
-    --------
-    Create an instance of int64 type:
-
-    >>> import pyarrow as pa
-    >>> pa.int64()
-    DataType(int64)
-    >>> print(pa.int64())
-    int64
-
-    Create an array with int64 type:
-
-    >>> pa.array([0, 1, 2], type=pa.int64())
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def uint64() -> UInt64Type:
-    """
-    Create instance of unsigned uint64 type.
-
-    Examples
-    --------
-    Create an instance of unsigned int64 type:
-
-    >>> import pyarrow as pa
-    >>> pa.uint64()
-    DataType(uint64)
-    >>> print(pa.uint64())
-    uint64
-
-    Create an array with unsigned uint64 type:
-
-    >>> pa.array([0, 1, 2], type=pa.uint64())
-    <pyarrow.lib.UInt64Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def timestamp(unit: _Unit, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]:
-    """
-    Create instance of timestamp type with resolution and optional time zone.
-
-    Parameters
-    ----------
-    unit : str
-        one of 's' [second], 'ms' [millisecond], 'us' [microsecond], or 'ns'
-        [nanosecond]
-    tz : str, default None
-        Time zone name. None indicates time zone naive
-
-    Examples
-    --------
-    Create an instance of timestamp type:
-
-    >>> import pyarrow as pa
-    >>> pa.timestamp('us')
-    TimestampType(timestamp[us])
-    >>> pa.timestamp('s', tz='America/New_York')
-    TimestampType(timestamp[s, tz=America/New_York])
-    >>> pa.timestamp('s', tz='+07:30')
-    TimestampType(timestamp[s, tz=+07:30])
-
-    Use timestamp type when creating a scalar object:
-
-    >>> from datetime import datetime
-    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp('s', tz='UTC'))
-    <pyarrow.TimestampScalar: '2012-01-01T00:00:00+0000'>
-    >>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp('us'))
-    <pyarrow.TimestampScalar: '2012-01-01T00:00:00.000000'>
-
-    Returns
-    -------
-    timestamp_type : TimestampType
-    """
-
-def time32(unit: _Time32Unit) -> Time32Type[_Time32Unit]:
-    """
-    Create instance of 32-bit time (time of day) type with unit resolution.
-
-    Parameters
-    ----------
-    unit : str
-        one of 's' [second], or 'ms' [millisecond]
-
-    Returns
-    -------
-    type : pyarrow.Time32Type
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> pa.time32('s')
-    Time32Type(time32[s])
-    >>> pa.time32('ms')
-    Time32Type(time32[ms])
-    """
-
-def time64(unit: _Time64Unit) -> Time64Type[_Time64Unit]:
-    """
-    Create instance of 64-bit time (time of day) type with unit resolution.
-
-    Parameters
-    ----------
-    unit : str
-        One of 'us' [microsecond], or 'ns' [nanosecond].
-
-    Returns
-    -------
-    type : pyarrow.Time64Type
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> pa.time64('us')
-    Time64Type(time64[us])
-    >>> pa.time64('ns')
-    Time64Type(time64[ns])
-    """
-
-def duration(unit: _Unit) -> DurationType[_Unit]:
-    """
-    Create instance of a duration type with unit resolution.
-
-    Parameters
-    ----------
-    unit : str
-        One of 's' [second], 'ms' [millisecond], 'us' [microsecond], or
-        'ns' [nanosecond].
-
-    Returns
-    -------
-    type : pyarrow.DurationType
-
-    Examples
-    --------
-    Create an instance of duration type:
-
-    >>> import pyarrow as pa
-    >>> pa.duration('us')
-    DurationType(duration[us])
-    >>> pa.duration('s')
-    DurationType(duration[s])
-
-    Create an array with duration type:
-
-    >>> pa.array([0, 1, 2], type=pa.duration('s'))
-    <pyarrow.lib.DurationArray object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def month_day_nano_interval() -> MonthDayNanoIntervalType:
-    """
-    Create instance of an interval type representing months, days and
-    nanoseconds between two dates.
-
-    Examples
-    --------
-    Create an instance of an month_day_nano_interval type:
-
-    >>> import pyarrow as pa
-    >>> pa.month_day_nano_interval()
-    DataType(month_day_nano_interval)
-
-    Create a scalar with month_day_nano_interval type:
-
-    >>> pa.scalar((1, 15, -30), type=pa.month_day_nano_interval())
-    <pyarrow.MonthDayNanoIntervalScalar: MonthDayNano(months=1, days=15, nanoseconds=-30)>
-    """
-
-def date32() -> Date32Type:
-    """
-    Create instance of 32-bit date (days since UNIX epoch 1970-01-01).
-
-    Examples
-    --------
-    Create an instance of 32-bit date type:
-
-    >>> import pyarrow as pa
-    >>> pa.date32()
-    DataType(date32[day])
-
-    Create a scalar with 32-bit date type:
-
-    >>> from datetime import date
-    >>> pa.scalar(date(2012, 1, 1), type=pa.date32())
-    <pyarrow.Date32Scalar: datetime.date(2012, 1, 1)>
-    """
-
-def date64() -> Date64Type:
-    """
-    Create instance of 64-bit date (milliseconds since UNIX epoch 1970-01-01).
-
-    Examples
-    --------
-    Create an instance of 64-bit date type:
-
-    >>> import pyarrow as pa
-    >>> pa.date64()
-    DataType(date64[ms])
-
-    Create a scalar with 64-bit date type:
-
-    >>> from datetime import datetime
-    >>> pa.scalar(datetime(2012, 1, 1), type=pa.date64())
-    <pyarrow.Date64Scalar: datetime.date(2012, 1, 1)>
-    """
-
-def float16() -> Float16Type:
-    """
-    Create half-precision floating point type.
-
-    Examples
-    --------
-    Create an instance of float16 type:
-
-    >>> import pyarrow as pa
-    >>> pa.float16()
-    DataType(halffloat)
-    >>> print(pa.float16())
-    halffloat
-
-    Create an array with float16 type:
-
-    >>> arr = np.array([1.5, np.nan], dtype=np.float16)
-    >>> a = pa.array(arr, type=pa.float16())
-    >>> a
-    <pyarrow.lib.HalfFloatArray object at ...>
-    [
-      1.5,
-      nan
-    ]
-
-    Note that unlike other float types, if you convert this array
-    to a python list, the types of its elements will be ``np.float16``
-
-    >>> [type(val) for val in a.to_pylist()]
-    [<class 'float'>, <class 'float'>]
-    """
-
-def float32() -> Float32Type:
-    """
-    Create single-precision floating point type.
-
-    Examples
-    --------
-    Create an instance of float32 type:
-
-    >>> import pyarrow as pa
-    >>> pa.float32()
-    DataType(float)
-    >>> print(pa.float32())
-    float
-
-    Create an array with float32 type:
-
-    >>> pa.array([0.0, 1.0, 2.0], type=pa.float32())
-    <pyarrow.lib.FloatArray object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def float64() -> Float64Type:
-    """
-    Create double-precision floating point type.
-
-    Examples
-    --------
-    Create an instance of float64 type:
-
-    >>> import pyarrow as pa
-    >>> pa.float64()
-    DataType(double)
-    >>> print(pa.float64())
-    double
-
-    Create an array with float64 type:
-
-    >>> pa.array([0.0, 1.0, 2.0], type=pa.float64())
-    <pyarrow.lib.DoubleArray object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
-
-def decimal32(precision: _Precision, scale: _Scale | None = None) -> Decimal32Type[_Precision, _Scale| Literal[0]]:
-    """
-    Create decimal type with precision and scale and 32-bit width.
-
-    Arrow decimals are fixed-point decimal numbers encoded as a scaled
-    integer.  The precision is the number of significant digits that the
-    decimal type can represent; the scale is the number of digits after
-    the decimal point (note the scale can be negative).
-
-    As an example, ``decimal32(7, 3)`` can exactly represent the numbers
-    1234.567 and -1234.567 (encoded internally as the 32-bit integers
-    1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
-
-    ``decimal32(5, -3)`` can exactly represent the number 12345000
-    (encoded internally as the 32-bit integer 12345), but neither
-    123450000 nor 1234500.
-
-    If you need a precision higher than 9 significant digits, consider
-    using ``decimal64``, ``decimal128``, or ``decimal256``.
-
-    Parameters
-    ----------
-    precision : int
-        Must be between 1 and 9
-    scale : int
-
-    Returns
-    -------
-    decimal_type : Decimal32Type
-
-    Examples
-    --------
-    Create an instance of decimal type:
-
-    >>> import pyarrow as pa
-    >>> pa.decimal32(5, 2)
-    Decimal32Type(decimal32(5, 2))
-
-    Create an array with decimal type:
-
-    >>> import decimal
-    >>> a = decimal.Decimal('123.45')
-    >>> pa.array([a], pa.decimal32(5, 2))
-    <pyarrow.lib.Decimal32Array object at ...>
-    [
-      123.45
-    ]
-    """
-
-def decimal64(precision: _Precision, scale: _Scale | None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]:
-    """
-    Create decimal type with precision and scale and 64-bit width.
-
-    Arrow decimals are fixed-point decimal numbers encoded as a scaled
-    integer.  The precision is the number of significant digits that the
-    decimal type can represent; the scale is the number of digits after
-    the decimal point (note the scale can be negative).
-
-    As an example, ``decimal64(7, 3)`` can exactly represent the numbers
-    1234.567 and -1234.567 (encoded internally as the 64-bit integers
-    1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
-
-    ``decimal64(5, -3)`` can exactly represent the number 12345000
-    (encoded internally as the 64-bit integer 12345), but neither
-    123450000 nor 1234500.
-
-    If you need a precision higher than 18 significant digits, consider
-    using ``decimal128``, or ``decimal256``.
-
-    Parameters
-    ----------
-    precision : int
-        Must be between 1 and 18
-    scale : int
-
-    Returns
-    -------
-    decimal_type : Decimal64Type
-
-    Examples
-    --------
-    Create an instance of decimal type:
-
-    >>> import pyarrow as pa
-    >>> pa.decimal64(5, 2)
-    Decimal64Type(decimal64(5, 2))
-
-    Create an array with decimal type:
-
-    >>> import decimal
-    >>> a = decimal.Decimal('123.45')
-    >>> pa.array([a], pa.decimal64(5, 2))
-    <pyarrow.lib.Decimal64Array object at ...>
-    [
-      123.45
-    ]
-    """
-
-def decimal128(precision: _Precision, scale: _Scale | None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]:
-    """
-    Create decimal type with precision and scale and 128-bit width.
-
-    Arrow decimals are fixed-point decimal numbers encoded as a scaled
-    integer.  The precision is the number of significant digits that the
-    decimal type can represent; the scale is the number of digits after
-    the decimal point (note the scale can be negative).
-
-    As an example, ``decimal128(7, 3)`` can exactly represent the numbers
-    1234.567 and -1234.567 (encoded internally as the 128-bit integers
-    1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
-
-    ``decimal128(5, -3)`` can exactly represent the number 12345000
-    (encoded internally as the 128-bit integer 12345), but neither
-    123450000 nor 1234500.
-
-    If you need a precision higher than 38 significant digits, consider
-    using ``decimal256``.
-
-    Parameters
-    ----------
-    precision : int
-        Must be between 1 and 38
-    scale : int
-
-    Returns
-    -------
-    decimal_type : Decimal128Type
-
-    Examples
-    --------
-    Create an instance of decimal type:
-
-    >>> import pyarrow as pa
-    >>> pa.decimal128(5, 2)
-    Decimal128Type(decimal128(5, 2))
-
-    Create an array with decimal type:
-
-    >>> import decimal
-    >>> a = decimal.Decimal('123.45')
-    >>> pa.array([a], pa.decimal128(5, 2))
-    <pyarrow.lib.Decimal128Array object at ...>
-    [
-      123.45
-    ]
-    """
-
-def decimal256(precision: _Precision, scale: _Scale | None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]:
-    """
-    Create decimal type with precision and scale and 256-bit width.
-
-    Arrow decimals are fixed-point decimal numbers encoded as a scaled
-    integer.  The precision is the number of significant digits that the
-    decimal type can represent; the scale is the number of digits after
-    the decimal point (note the scale can be negative).
-
-    For most use cases, the maximum precision offered by ``decimal128``
-    is sufficient, and it will result in a more compact and more efficient
-    encoding.  ``decimal256`` is useful if you need a precision higher
-    than 38 significant digits.
-
-    Parameters
-    ----------
-    precision : int
-        Must be between 1 and 76
-    scale : int
-
-    Returns
-    -------
-    decimal_type : Decimal256Type
-    """
-
-def string() -> StringType:
-    """
-    Create UTF8 variable-length string type.
-
-    Examples
-    --------
-    Create an instance of a string type:
-
-    >>> import pyarrow as pa
-    >>> pa.string()
-    DataType(string)
-
-    and use the string type to create an array:
-
-    >>> pa.array(['foo', 'bar', 'baz'], type=pa.string())
-    <pyarrow.lib.StringArray object at ...>
-    [
-      "foo",
-      "bar",
-      "baz"
-    ]
-    """
+) -> Field[_DataTypeT] | Field[Any]: ...
 
-utf8 = string
-"""
-Alias for string().
 
-Examples
---------
-Create an instance of a string type:
+def null() -> NullType: ...
 
->>> import pyarrow as pa
->>> pa.utf8()
-DataType(string)
 
-and use the string type to create an array:
+def bool_() -> BoolType: ...
 
->>> pa.array(['foo', 'bar', 'baz'], type=pa.utf8())
-<pyarrow.lib.StringArray object at ...>
-[
-    "foo",
-    "bar",
-    "baz"
-]
-"""
-
-def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType:
-    """
-    Create variable-length or fixed size binary type.
-
-    Parameters
-    ----------
-    length : int, optional, default -1
-        If length == -1 then return a variable length binary type. If length is
-        greater than or equal to 0 then return a fixed size binary type of
-        width `length`.
-
-    Examples
-    --------
-    Create an instance of a variable-length binary type:
-
-    >>> import pyarrow as pa
-    >>> pa.binary()
-    DataType(binary)
-
-    and use the variable-length binary type to create an array:
-
-    >>> pa.array(['foo', 'bar', 'baz'], type=pa.binary())
-    <pyarrow.lib.BinaryArray object at ...>
-    [
-      666F6F,
-      626172,
-      62617A
-    ]
-
-    Create an instance of a fixed-size binary type:
-
-    >>> pa.binary(3)
-    FixedSizeBinaryType(fixed_size_binary[3])
-
-    and use the fixed-length binary type to create an array:
-
-    >>> pa.array(['foo', 'bar', 'baz'], type=pa.binary(3))
-    <pyarrow.lib.FixedSizeBinaryArray object at ...>
-    [
-      666F6F,
-      626172,
-      62617A
-    ]
-    """
-
-def large_binary() -> LargeBinaryType:
-    """
-    Create large variable-length binary type.
-
-    This data type may not be supported by all Arrow implementations.  Unless
-    you need to represent data larger than 2GB, you should prefer binary().
-
-    Examples
-    --------
-    Create an instance of large variable-length binary type:
-
-    >>> import pyarrow as pa
-    >>> pa.large_binary()
-    DataType(large_binary)
-
-    and use the type to create an array:
-
-    >>> pa.array(['foo', 'bar', 'baz'], type=pa.large_binary())
-    <pyarrow.lib.LargeBinaryArray object at ...>
-    [
-      666F6F,
-      626172,
-      62617A
-    ]
-    """
-
-def large_string() -> LargeStringType:
-    """
-    Create large UTF8 variable-length string type.
-
-    This data type may not be supported by all Arrow implementations.  Unless
-    you need to represent data larger than 2GB, you should prefer string().
-
-    Examples
-    --------
-    Create an instance of large UTF8 variable-length binary type:
-
-    >>> import pyarrow as pa
-    >>> pa.large_string()
-    DataType(large_string)
-
-    and use the type to create an array:
-
-    >>> pa.array(['foo', 'bar'] * 50, type=pa.large_string())
-    <pyarrow.lib.LargeStringArray object at ...>
-    [
-      "foo",
-      "bar",
-      ...
-      "foo",
-      "bar"
-    ]
-    """
 
-large_utf8 = large_string
-"""
-Alias for large_string().
+def uint8() -> UInt8Type: ...
 
-Examples
---------
-Create an instance of large UTF8 variable-length binary type:
 
->>> import pyarrow as pa
->>> pa.large_utf8()
-DataType(large_string)
+def int8() -> Int8Type: ...
 
-and use the type to create an array:
 
->>> pa.array(['foo', 'bar'] * 50, type=pa.large_utf8())
-<pyarrow.lib.LargeStringArray object at ...>
-[
-    "foo",
-    "bar",
-    ...
-    "foo",
-    "bar"
-]
-"""
+def uint16() -> UInt16Type: ...
 
-def binary_view() -> BinaryViewType:
-    """
-    Create a variable-length binary view type.
 
-    Examples
-    --------
-    Create an instance of a string type:
+def int16() -> Int16Type: ...
 
-    >>> import pyarrow as pa
-    >>> pa.binary_view()
-    DataType(binary_view)
-    """
 
-def string_view() -> StringViewType:
-    """
-    Create UTF8 variable-length string view type.
+def uint32() -> Uint32Type: ...
 
-    Examples
-    --------
-    Create an instance of a string type:
 
-    >>> import pyarrow as pa
-    >>> pa.string_view()
-    DataType(string_view)
-    """
+def int32() -> Int32Type: ...
 
-def list_(
-    value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] | _Size | None = None
-) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]:
-    """
-    Create ListType instance from child data type or field.
-
-    Parameters
-    ----------
-    value_type : DataType or Field
-    list_size : int, optional, default -1
-        If length == -1 then return a variable length list type. If length is
-        greater than or equal to 0 then return a fixed size list type.
-
-    Returns
-    -------
-    list_type : DataType
-
-    Examples
-    --------
-    Create an instance of ListType:
-
-    >>> import pyarrow as pa
-    >>> pa.list_(pa.string())
-    ListType(list<item: string>)
-    >>> pa.list_(pa.int32(), 2)
-    FixedSizeListType(fixed_size_list<item: int32>[2])
-
-    Use the ListType to create a scalar:
-
-    >>> pa.scalar(['foo', None], type=pa.list_(pa.string(), 2))
-    <pyarrow.FixedSizeListScalar: ['foo', None]>
-
-    or an array:
-
-    >>> pa.array([[1, 2], [3, 4]], pa.list_(pa.int32(), 2))
-    <pyarrow.lib.FixedSizeListArray object at ...>
-    [
-      [
-        1,
-        2
-      ],
-      [
-        3,
-        4
-      ]
-    ]
-    """
-
-def large_list(value_type: _DataTypeT | Field[_DataTypeT]) -> LargeListType[_DataTypeT]:
-    """
-    Create LargeListType instance from child data type or field.
-
-    This data type may not be supported by all Arrow implementations.
-    Unless you need to represent data larger than 2**31 elements, you should
-    prefer list_().
-
-    Parameters
-    ----------
-    value_type : DataType or Field
-
-    Returns
-    -------
-    list_type : DataType
-
-    Examples
-    --------
-    Create an instance of LargeListType:
-
-    >>> import pyarrow as pa
-    >>> pa.large_list(pa.int8())
-    LargeListType(large_list<item: int8>)
-
-    Use the LargeListType to create an array:
-
-    >>> pa.array([[-1, 3]] * 5, type=pa.large_list(pa.int8()))
-    <pyarrow.lib.LargeListArray object at ...>
-    [
-      [
-        -1,
-        3
-      ],
-      [
-        -1,
-        3
-      ],
-    ...
-    """
 
-def list_view(value_type: _DataTypeT | Field[_DataTypeT]) -> ListViewType[_DataTypeT]:
-    """
-    Create ListViewType instance from child data type or field.
+def int64() -> Int64Type: ...
 
-    This data type may not be supported by all Arrow implementations
-    because it is an alternative to the ListType.
 
-    Parameters
-    ----------
-    value_type : DataType or Field
+def uint64() -> UInt64Type: ...
 
-    Returns
-    -------
-    list_view_type : DataType
 
-    Examples
-    --------
-    Create an instance of ListViewType:
+def timestamp(unit: _Unit, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]: ...
 
-    >>> import pyarrow as pa
-    >>> pa.list_view(pa.string())
-    ListViewType(list_view<item: string>)
-    """
 
-def large_list_view(
-    value_type: _DataTypeT | Field[_DataTypeT],
-) -> LargeListViewType[_DataTypeT]:
-    """
-    Create LargeListViewType instance from child data type or field.
+def time32(unit: _Time32Unit) -> Time32Type[_Time32Unit]: ...
 
-    This data type may not be supported by all Arrow implementations
-    because it is an alternative to the ListType.
 
-    Parameters
-    ----------
-    value_type : DataType or Field
+def time64(unit: _Time64Unit) -> Time64Type[_Time64Unit]: ...
 
-    Returns
-    -------
-    list_view_type : DataType
 
-    Examples
-    --------
-    Create an instance of LargeListViewType:
+def duration(unit: _Unit) -> DurationType[_Unit]: ...
 
-    >>> import pyarrow as pa
-    >>> pa.large_list_view(pa.int8())
-    LargeListViewType(large_list_view<item: int8>)
-    """
 
-def map_(
-    key_type: _K, item_type: _ValueT, key_sorted: _Ordered | None = None
-) -> MapType[_K, _ValueT, _Ordered]:
-    """
-    Create MapType instance from key and item data types or fields.
-
-    Parameters
-    ----------
-    key_type : DataType or Field
-    item_type : DataType or Field
-    keys_sorted : bool
-
-    Returns
-    -------
-    map_type : DataType
-
-    Examples
-    --------
-    Create an instance of MapType:
-
-    >>> import pyarrow as pa
-    >>> pa.map_(pa.string(), pa.int32())
-    MapType(map<string, int32>)
-    >>> pa.map_(pa.string(), pa.int32(), keys_sorted=True)
-    MapType(map<string, int32, keys_sorted>)
-
-    Use MapType to create an array:
-
-    >>> data = [[{'key': 'a', 'value': 1}, {'key': 'b', 'value': 2}], [{'key': 'c', 'value': 3}]]
-    >>> pa.array(data, type=pa.map_(pa.string(), pa.int32(), keys_sorted=True))
-    <pyarrow.lib.MapArray object at ...>
-    [
-      keys:
-      [
-        "a",
-        "b"
-      ]
-      values:
-      [
-        1,
-        2
-      ],
-      keys:
-      [
-        "c"
-      ]
-      values:
-      [
-        3
-      ]
-    ]
-    """
+def month_day_nano_interval() -> MonthDayNanoIntervalType: ...
 
-def dictionary(
-    index_type: _IndexT, value_type: _BasicValueT, ordered: _Ordered | None = None
-) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]:
-    """
-    Dictionary (categorical, or simply encoded) type.
 
-    Parameters
-    ----------
-    index_type : DataType
-    value_type : DataType
-    ordered : bool
+def date32() -> Date32Type: ...
 
-    Returns
-    -------
-    type : DictionaryType
 
-    Examples
-    --------
-    Create an instance of dictionary type:
+def date64() -> Date64Type: ...
 
-    >>> import pyarrow as pa
-    >>> pa.dictionary(pa.int64(), pa.utf8())
-    DictionaryType(dictionary<values=string, indices=int64, ordered=0>)
 
-    Use dictionary type to create an array:
+def float16() -> Float16Type: ...
+
+
+def float32() -> Float32Type: ...
+
+
+def float64() -> Float64Type: ...
+
+
+def decimal32(precision: _Precision, scale: _Scale |
+              None = None) -> Decimal32Type[_Precision, _Scale| Literal[0]]: ...
+
+
+def decimal64(precision: _Precision, scale: _Scale |
+              None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal128(precision: _Precision, scale: _Scale |
+               None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def decimal256(precision: _Precision, scale: _Scale |
+               None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ...
+
+
+def string() -> StringType: ...
+
+
+utf8 = string
+
+
+def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType: ...
+
+
+def large_binary() -> LargeBinaryType: ...
+
+
+def large_string() -> LargeStringType: ...
+
+
+large_utf8 = large_string
+
+
+def binary_view() -> BinaryViewType: ...
+
+
+def string_view() -> StringViewType: ...
+
+
+def list_(
+    value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] | _Size | None = None
+) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ...
+
+
+def large_list(value_type: _DataTypeT |
+               Field[_DataTypeT]) -> LargeListType[_DataTypeT]: ...
+
+
+def list_view(value_type: _DataTypeT |
+              Field[_DataTypeT]) -> ListViewType[_DataTypeT]: ...
+
+
+def large_list_view(
+    value_type: _DataTypeT | Field[_DataTypeT],
+) -> LargeListViewType[_DataTypeT]: ...
+
+
+def map_(
+    key_type: _K, item_type: _ValueT, key_sorted: _Ordered | None = None
+) -> MapType[_K, _ValueT, _Ordered]: ...
+
+
+def dictionary(
+    index_type: _IndexT, value_type: _BasicValueT, ordered: _Ordered | None = None
+) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ...
 
-    >>> pa.array(["a", "b", None, "d"], pa.dictionary(pa.int64(), pa.utf8()))
-    <pyarrow.lib.DictionaryArray object at ...>
-    ...
-    -- dictionary:
-      [
-        "a",
-        "b",
-        "d"
-      ]
-    -- indices:
-      [
-        0,
-        1,
-        null,
-        2
-      ]
-    """
 
 def struct(
     fields: Iterable[Field[Any] | tuple[str, Field[Any]] | tuple[str, DataType]]
     | Mapping[str, Field[Any]],
-) -> StructType:
-    """
-    Create StructType instance from fields.
-
-    A struct is a nested type parameterized by an ordered sequence of types
-    (which can all be distinct), called its fields.
-
-    Parameters
-    ----------
-    fields : iterable of Fields or tuples, or mapping of strings to DataTypes
-        Each field must have a UTF8-encoded name, and these field names are
-        part of the type metadata.
-
-    Examples
-    --------
-    Create an instance of StructType from an iterable of tuples:
-
-    >>> import pyarrow as pa
-    >>> fields = [
-    ...     ('f1', pa.int32()),
-    ...     ('f2', pa.string()),
-    ... ]
-    >>> struct_type = pa.struct(fields)
-    >>> struct_type
-    StructType(struct<f1: int32, f2: string>)
-
-    Retrieve a field from a StructType:
-
-    >>> struct_type[0]
-    pyarrow.Field<f1: int32>
-    >>> struct_type['f1']
-    pyarrow.Field<f1: int32>
-
-    Create an instance of StructType from an iterable of Fields:
-
-    >>> fields = [
-    ...     pa.field('f1', pa.int32()),
-    ...     pa.field('f2', pa.string(), nullable=False),
-    ... ]
-    >>> pa.struct(fields)
-    StructType(struct<f1: int32, f2: string not null>)
-
-    Returns
-    -------
-    type : DataType
-    """
+) -> StructType: ...
+
 
 def sparse_union(
     child_fields: list[Field[Any]], type_codes: list[int] | None = None
-) -> SparseUnionType:
-    """
-    Create SparseUnionType from child fields.
-
-    A sparse union is a nested type where each logical value is taken from
-    a single child.  A buffer of 8-bit type ids indicates which child
-    a given logical value is to be taken from.
-
-    In a sparse union, each child array should have the same length as the
-    union array, regardless of the actual number of union values that
-    refer to it.
-
-    Parameters
-    ----------
-    child_fields : sequence of Field values
-        Each field must have a UTF8-encoded name, and these field names are
-        part of the type metadata.
-    type_codes : list of integers, default None
-
-    Returns
-    -------
-    type : SparseUnionType
-    """
+) -> SparseUnionType: ...
+
 
 def dense_union(
     child_fields: list[Field[Any]], type_codes: list[int] | None = None
-) -> DenseUnionType:
-    """
-    Create DenseUnionType from child fields.
-
-    A dense union is a nested type where each logical value is taken from
-    a single child, at a specific offset.  A buffer of 8-bit type ids
-    indicates which child a given logical value is to be taken from,
-    and a buffer of 32-bit offsets indicates at which physical position
-    in the given child array the logical value is to be taken from.
-
-    Unlike a sparse union, a dense union allows encoding only the child array
-    values which are actually referred to by the union array.  This is
-    counterbalanced by the additional footprint of the offsets buffer, and
-    the additional indirection cost when looking up values.
-
-    Parameters
-    ----------
-    child_fields : sequence of Field values
-        Each field must have a UTF8-encoded name, and these field names are
-        part of the type metadata.
-    type_codes : list of integers, default None
-
-    Returns
-    -------
-    type : DenseUnionType
-    """
+) -> DenseUnionType: ...
+
 
 def union(
     child_fields: list[Field[Any]], mode: Literal["sparse"] | Literal["dense"], type_codes: list[int] | None = None
-) -> SparseUnionType | DenseUnionType:
-    """
-    Create UnionType from child fields.
-
-    A union is a nested type where each logical value is taken from a
-    single child.  A buffer of 8-bit type ids indicates which child
-    a given logical value is to be taken from.
-
-    Unions come in two flavors: sparse and dense
-    (see also `pyarrow.sparse_union` and `pyarrow.dense_union`).
-
-    Parameters
-    ----------
-    child_fields : sequence of Field values
-        Each field must have a UTF8-encoded name, and these field names are
-        part of the type metadata.
-    mode : str
-        Must be 'sparse' or 'dense'
-    type_codes : list of integers, default None
-
-    Returns
-    -------
-    type : UnionType
-    """
+) -> SparseUnionType | DenseUnionType: ...
+
 
 def run_end_encoded(
     run_end_type: _RunEndType, value_type: _BasicValueT
-) -> RunEndEncodedType[_RunEndType, _BasicValueT]:
-    """
-    Create RunEndEncodedType from run-end and value types.
-
-    Parameters
-    ----------
-    run_end_type : pyarrow.DataType
-        The integer type of the run_ends array. Must be 'int16', 'int32', or 'int64'.
-    value_type : pyarrow.DataType
-        The type of the values array.
-
-    Returns
-    -------
-    type : RunEndEncodedType
-    """
-
-def json_(storage_type: DataType = ...) -> JsonType:
-    """
-    Create instance of JSON extension type.
-
-    Parameters
-    ----------
-    storage_type : DataType, default pyarrow.string()
-        The underlying data type. Can be on of the following types:
-        string, large_string, string_view.
-
-    Returns
-    -------
-    type : JsonType
-
-    Examples
-    --------
-    Create an instance of JSON extension type:
-
-    >>> import pyarrow as pa
-    >>> pa.json_(pa.utf8())
-    JsonType(extension<arrow.json>)
-
-    Use the JSON type to create an array:
-
-    >>> pa.array(['{"a": 1}', '{"b": 2}'], type=pa.json_(pa.utf8()))
-    <pyarrow.lib.JsonArray object at ...>
-    [
-      "{"a": 1}",
-      "{"b": 2}"
-    ]
-    """
-
-def uuid() -> UuidType:
-    """
-    Create UuidType instance.
-
-    Returns
-    -------
-    type : UuidType
-    """
+) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ...
+
+
+def json_(storage_type: DataType = ...) -> JsonType: ...
+
+
+def uuid() -> UuidType: ...
+
 
 def fixed_shape_tensor(
     value_type: _ValueT,
     shape: Sequence[int],
     dim_names: Sequence[str] | None = None,
     permutation: Sequence[int] | None = None,
-) -> FixedShapeTensorType[_ValueT]:
-    """
-    Create instance of fixed shape tensor extension type with shape and optional
-    names of tensor dimensions and indices of the desired logical
-    ordering of dimensions.
-
-    Parameters
-    ----------
-    value_type : DataType
-        Data type of individual tensor elements.
-    shape : tuple or list of integers
-        The physical shape of the contained tensors.
-    dim_names : tuple or list of strings, default None
-        Explicit names to tensor dimensions.
-    permutation : tuple or list integers, default None
-        Indices of the desired ordering of the original dimensions.
-        The indices contain a permutation of the values ``[0, 1, .., N-1]`` where
-        N is the number of dimensions. The permutation indicates which dimension
-        of the logical layout corresponds to which dimension of the physical tensor.
-        For more information on this parameter see
-        :ref:`fixed_shape_tensor_extension`.
-
-    Examples
-    --------
-    Create an instance of fixed shape tensor extension type:
-
-    >>> import pyarrow as pa
-    >>> tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
-    >>> tensor_type
-    FixedShapeTensorType(extension<arrow.fixed_shape_tensor[value_type=int32, shape=[2,2]]>)
-
-    Inspect the data type:
-
-    >>> tensor_type.value_type
-    DataType(int32)
-    >>> tensor_type.shape
-    [2, 2]
-
-    Create a table with fixed shape tensor extension array:
-
-    >>> arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
-    >>> storage = pa.array(arr, pa.list_(pa.int32(), 4))
-    >>> tensor = pa.ExtensionArray.from_storage(tensor_type, storage)
-    >>> pa.table([tensor], names=["tensor_array"])
-    pyarrow.Table
-    tensor_array: extension<arrow.fixed_shape_tensor[value_type=int32, shape=[2,2]]>
-    ----
-    tensor_array: [[[1,2,3,4],[10,20,30,40],[100,200,300,400]]]
-
-    Create an instance of fixed shape tensor extension type with names
-    of tensor dimensions:
-
-    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3),
-    ...                                     dim_names=['C', 'H', 'W'])
-    >>> tensor_type.dim_names
-    ['C', 'H', 'W']
-
-    Create an instance of fixed shape tensor extension type with
-    permutation:
-
-    >>> tensor_type = pa.fixed_shape_tensor(pa.int8(), (2, 2, 3),
-    ...                                     permutation=[0, 2, 1])
-    >>> tensor_type.permutation
-    [0, 2, 1]
-
-    Returns
-    -------
-    type : FixedShapeTensorType
-    """
-
-def bool8() -> Bool8Type:
-    """
-    Create instance of bool8 extension type.
-
-    Examples
-    --------
-    Create an instance of bool8 extension type:
-
-    >>> import pyarrow as pa
-    >>> type = pa.bool8()
-    >>> type
-    Bool8Type(extension<arrow.bool8>)
-
-    Inspect the data type:
-
-    >>> type.storage_type
-    DataType(int8)
-
-    Create a table with a bool8 array:
-
-    >>> arr = [-1, 0, 1, 2, None]
-    >>> storage = pa.array(arr, pa.int8())
-    >>> other = pa.ExtensionArray.from_storage(type, storage)
-    >>> pa.table([other], names=["unknown_col"])
-    pyarrow.Table
-    unknown_col: extension<arrow.bool8>
-    ----
-    unknown_col: [[-1,0,1,2,null]]
-
-    Returns
-    -------
-    type : Bool8Type
-    """
-
-def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType:
-    """
-    Create instance of opaque extension type.
-
-    Parameters
-    ----------
-    storage_type : DataType
-        The underlying data type.
-    type_name : str
-        The name of the type in the external system.
-    vendor_name : str
-        The name of the external system.
-
-    Examples
-    --------
-    Create an instance of an opaque extension type:
-
-    >>> import pyarrow as pa
-    >>> type = pa.opaque(pa.binary(), "other", "jdbc")
-    >>> type
-    OpaqueType(extension<arrow.opaque[storage_type=binary, type_name=other, vendor_name=jdbc]>)
-
-    Inspect the data type:
-
-    >>> type.storage_type
-    DataType(binary)
-    >>> type.type_name
-    'other'
-    >>> type.vendor_name
-    'jdbc'
-
-    Create a table with an opaque array:
-
-    >>> arr = [None, b"foobar"]
-    >>> storage = pa.array(arr, pa.binary())
-    >>> other = pa.ExtensionArray.from_storage(type, storage)
-    >>> pa.table([other], names=["unknown_col"])
-    pyarrow.Table
-    unknown_col: extension<arrow.opaque[storage_type=binary, type_name=other, vendor_name=jdbc]>
-    ----
-    unknown_col: [[null,666F6F626172]]
-
-    Returns
-    -------
-    type : OpaqueType
-    """
-
-def type_for_alias(name: Any) -> DataType:
-    """
-    Return DataType given a string alias if one exists.
-
-    Parameters
-    ----------
-    name : str
-        The alias of the DataType that should be retrieved.
-
-    Returns
-    -------
-    type : DataType
-    """
+) -> FixedShapeTensorType[_ValueT]: ...
+
+
+def bool8() -> Bool8Type: ...
+
+
+def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ...
+
+
+def type_for_alias(name: Any) -> DataType: ...
+
 
 def schema(
     fields: Iterable[Field[Any]] | Iterable[tuple[str, DataType]] | Mapping[str, DataType],
     metadata: dict[bytes | str, bytes | str] | None = None,
-) -> Schema:
-    """
-    Construct pyarrow.Schema from collection of fields.
-
-    Parameters
-    ----------
-    fields : iterable of Fields or tuples, or mapping of strings to DataTypes
-        Can also pass an object that implements the Arrow PyCapsule Protocol
-        for schemas (has an ``__arrow_c_schema__`` method).
-    metadata : dict, default None
-        Keys and values must be coercible to bytes.
-
-    Examples
-    --------
-    Create a Schema from iterable of tuples:
-
-    >>> import pyarrow as pa
-    >>> pa.schema([
-    ...     ('some_int', pa.int32()),
-    ...     ('some_string', pa.string()),
-    ...     pa.field('some_required_string', pa.string(), nullable=False)
-    ... ])
-    some_int: int32
-    some_string: string
-    some_required_string: string not null
-
-    Create a Schema from iterable of Fields:
-
-    >>> pa.schema([
-    ...     pa.field('some_int', pa.int32()),
-    ...     pa.field('some_string', pa.string())
-    ... ])
-    some_int: int32
-    some_string: string
-
-    DataTypes can also be passed as strings. The following is equivalent to the
-    above example:
-
-    >>> pa.schema([
-    ...     pa.field('some_int', "int32"),
-    ...     pa.field('some_string', "string")
-    ... ])
-    some_int: int32
-    some_string: string
-
-    Or more concisely:
-
-    >>> pa.schema([
-    ...     ('some_int', "int32"),
-    ...     ('some_string', "string")
-    ... ])
-    some_int: int32
-    some_string: string
-
-    Returns
-    -------
-    schema : pyarrow.Schema
-    """
-
-def from_numpy_dtype(dtype: np.dtype[Any]) -> DataType:
-    """
-    Convert NumPy dtype to pyarrow.DataType.
-
-    Parameters
-    ----------
-    dtype : the numpy dtype to convert
-
-
-    Examples
-    --------
-    Create a pyarrow DataType from NumPy dtype:
-
-    >>> import pyarrow as pa
-    >>> import numpy as np
-    >>> pa.from_numpy_dtype(np.dtype('float16'))
-    DataType(halffloat)
-    >>> pa.from_numpy_dtype('U')
-    DataType(string)
-    >>> pa.from_numpy_dtype(bool)
-    DataType(bool)
-    >>> pa.from_numpy_dtype(np.str_)
-    DataType(string)
-    """
+) -> Schema: ...
+
+
+def from_numpy_dtype(dtype: np.dtype[Any]) -> DataType: ...
+
 
 __all__ = [
     "_Weakrefable",
diff --git a/python/pyarrow-stubs/acero.pyi b/python/pyarrow-stubs/acero.pyi
index 2abb608b32c..b3bc83382fb 100644
--- a/python/pyarrow-stubs/acero.pyi
+++ b/python/pyarrow-stubs/acero.pyi
@@ -32,6 +32,7 @@ from .compute import Expression, FunctionOptions
 
 _StrOrExpr: TypeAlias = str | Expression
 
+
 class Declaration(lib._Weakrefable):
     def __init__(
         self,
@@ -44,16 +45,23 @@ class Declaration(lib._Weakrefable):
     def to_reader(self, use_threads: bool = True) -> lib.RecordBatchReader: ...
     def to_table(self, use_threads: bool = True) -> lib.Table: ...
 
-class ExecNodeOptions(lib._Weakrefable): ...
+
+class ExecNodeOptions(lib._Weakrefable):
+    ...
+
 
 class TableSourceNodeOptions(ExecNodeOptions):
     def __init__(self, table: lib.Table) -> None: ...
 
+
 class FilterNodeOptions(ExecNodeOptions):
     def __init__(self, filter_expression: Expression) -> None: ...
 
+
 class ProjectNodeOptions(ExecNodeOptions):
-    def __init__(self, expressions: list[Expression], names: list[str] | None = None) -> None: ...
+    def __init__(self, expressions: list[Expression],
+                 names: list[str] | None = None) -> None: ...
+
 
 class AggregateNodeOptions(ExecNodeOptions):
     def __init__(
@@ -62,6 +70,7 @@ class AggregateNodeOptions(ExecNodeOptions):
         keys: list[_StrOrExpr] | None = None,
     ) -> None: ...
 
+
 class OrderByNodeOptions(ExecNodeOptions):
     def __init__(
         self,
@@ -70,6 +79,7 @@ class OrderByNodeOptions(ExecNodeOptions):
         null_placement: Literal["at_start", "at_end"] = "at_end",
     ) -> None: ...
 
+
 class HashJoinNodeOptions(ExecNodeOptions):
     def __init__(
         self,
@@ -91,6 +101,7 @@ class HashJoinNodeOptions(ExecNodeOptions):
         output_suffix_for_right: str = "",
     ) -> None: ...
 
+
 class AsofJoinNodeOptions(ExecNodeOptions):
     def __init__(
         self,
diff --git a/python/pyarrow-stubs/array.pyi b/python/pyarrow-stubs/array.pyi
index 3027d689372..7aa67fc8955 100644
--- a/python/pyarrow-stubs/array.pyi
+++ b/python/pyarrow-stubs/array.pyi
@@ -45,7 +45,7 @@ from pyarrow._stubs_typing import (
     SupportArrowArray,
     SupportArrowDeviceArray,
 )
-from pyarrow.lib import ( # type: ignore[attr-defined]
+from pyarrow.lib import (  # type: ignore[attr-defined]
     Buffer,
     Device,  # type: ignore[reportAttributeAccessIssue]
     MemoryManager,  # type: ignore[reportAttributeAccessIssue]
@@ -84,6 +84,7 @@ from ._types import (
 )
 from ._stubs_typing import NullableCollection
 
+
 def array(
     values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
     type: Any | None = None,
@@ -92,302 +93,57 @@ def array(
     from_pandas: bool | None = None,
     safe: bool = True,
     memory_pool: MemoryPool | None = None,
-) -> ArrayLike:
-    """
-    Create pyarrow.Array instance from a Python object.
-
-    Parameters
-    ----------
-    obj : sequence, iterable, ndarray, pandas.Series, Arrow-compatible array
-        If both type and size are specified may be a single use iterable. If
-        not strongly-typed, Arrow type will be inferred for resulting array.
-        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
-        (has an ``__arrow_c_array__`` or ``__arrow_c_device_array__`` method)
-        can be passed as well.
-    type : pyarrow.DataType
-        Explicit type to attempt to coerce to, otherwise will be inferred from
-        the data.
-    mask : array[bool], optional
-        Indicate which values are null (True) or not null (False).
-    size : int64, optional
-        Size of the elements. If the input is larger than size bail at this
-        length. For iterators, if size is larger than the input iterator this
-        will be treated as a "max size", but will involve an initial allocation
-        of size followed by a resize to the actual size (so if you know the
-        exact size specifying it correctly will give you better performance).
-    from_pandas : bool, default None
-        Use pandas's semantics for inferring nulls from values in
-        ndarray-like data. If passed, the mask tasks precedence, but
-        if a value is unmasked (not-null), but still null according to
-        pandas semantics, then it is null. Defaults to False if not
-        passed explicitly by user, or True if a pandas object is
-        passed in.
-    safe : bool, default True
-        Check for overflows or other unsafe conversions.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the currently-set default
-        memory pool.
-
-    Returns
-    -------
-    array : pyarrow.Array or pyarrow.ChunkedArray
-        A ChunkedArray instead of an Array is returned if:
-
-        - the object data overflowed binary storage.
-        - the object's ``__arrow_array__`` protocol method returned a chunked
-          array.
-
-    Notes
-    -----
-    Timezone will be preserved in the returned array for timezone-aware data,
-    else no timezone will be returned for naive timestamps.
-    Internally, UTC values are stored for timezone-aware data with the
-    timezone set in the data type.
-
-    Pandas's DateOffsets and dateutil.relativedelta.relativedelta are by
-    default converted as MonthDayNanoIntervalArray. relativedelta leapdays
-    are ignored as are all absolute fields on both objects. datetime.timedelta
-    can also be converted to MonthDayNanoIntervalArray but this requires
-    passing MonthDayNanoIntervalType explicitly.
-
-    Converting to dictionary array will promote to a wider integer type for
-    indices if the number of distinct values cannot be represented, even if
-    the index type was explicitly set. This means that if there are more than
-    127 values the returned dictionary array's index type will be at least
-    pa.int16() even if pa.int8() was passed to the function. Note that an
-    explicit index type will not be demoted even if it is wider than required.
-
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> import pyarrow as pa
-    >>> pa.array(pd.Series([1, 2]))
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      1,
-      2
-    ]
-
-    >>> pa.array(["a", "b", "a"], type=pa.dictionary(pa.int8(), pa.string()))
-    <pyarrow.lib.DictionaryArray object at ...>
-    ...
-    -- dictionary:
-      [
-        "a",
-        "b"
-      ]
-    -- indices:
-      [
-        0,
-        1,
-        0
-      ]
-
-    >>> import numpy as np
-    >>> pa.array(pd.Series([1, 2]), mask=np.array([0, 1], dtype=bool))
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      1,
-      null
-    ]
-
-    >>> arr = pa.array(range(1024), type=pa.dictionary(pa.int8(), pa.int64()))
-    >>> arr.type.index_type
-    DataType(int16)
-    """
+) -> ArrayLike: ...
+
 
 def asarray(
     values: NullableCollection[Any] | Iterable[Any] | SupportArrowArray | SupportArrowDeviceArray,
     type: _DataTypeT | Any | None = None,
-) -> Array[Scalar[_DataTypeT]] | ArrayLike:
-    """
-    Convert to pyarrow.Array, inferring type if not provided.
-
-    Parameters
-    ----------
-    values : array-like
-        This can be a sequence, numpy.ndarray, pyarrow.Array or
-        pyarrow.ChunkedArray. If a ChunkedArray is passed, the output will be
-        a ChunkedArray, otherwise the output will be a Array.
-    type : string or DataType
-        Explicitly construct the array with this type. Attempt to cast if
-        indicated type is different.
-
-    Returns
-    -------
-    arr : Array or ChunkedArray
-    """
+) -> Array[Scalar[_DataTypeT]] | ArrayLike: ...
+
 
 def nulls(
     size: int,
     type: Any | None = None,
     memory_pool: MemoryPool | None = None,
-) -> ArrayLike:
-    """
-    Create a strongly-typed Array instance with all elements null.
-
-    Parameters
-    ----------
-    size : int
-        Array length.
-    type : pyarrow.DataType, default None
-        Explicit type for the array. By default use NullType.
-    memory_pool : MemoryPool, default None
-        Arrow MemoryPool to use for allocations. Uses the default memory
-        pool if not passed.
-
-    Returns
-    -------
-    arr : Array
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> pa.nulls(10)
-    <pyarrow.lib.NullArray object at ...>
-    10 nulls
-
-    >>> pa.nulls(3, pa.uint32())
-    <pyarrow.lib.UInt32Array object at ...>
-    [
-      null,
-      null,
-      null
-    ]
-    """
+) -> ArrayLike: ...
+
 
 def repeat(
     value: Any,
     size: int,
     memory_pool: MemoryPool | None = None,
-) -> ArrayLike:
-    """
-    Create an Array instance whose slots are the given scalar.
-
-    Parameters
-    ----------
-    value : Scalar-like object
-        Either a pyarrow.Scalar or any python object coercible to a Scalar.
-    size : int
-        Number of times to repeat the scalar in the output Array.
-    memory_pool : MemoryPool, default None
-        Arrow MemoryPool to use for allocations. Uses the default memory
-        pool if not passed.
-
-    Returns
-    -------
-    arr : Array
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> pa.repeat(10, 3)
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      10,
-      10,
-      10
-    ]
-
-    >>> pa.repeat([1, 2], 2)
-    <pyarrow.lib.ListArray object at ...>
-    [
-      [
-        1,
-        2
-      ],
-      [
-        1,
-        2
-      ]
-    ]
-
-    >>> pa.repeat("string", 3)
-    <pyarrow.lib.StringArray object at ...>
-    [
-      "string",
-      "string",
-      "string"
-    ]
-
-    >>> pa.repeat(pa.scalar({'a': 1, 'b': [1, 2]}), 2)
-    <pyarrow.lib.StructArray object at ...>
-    -- is_valid: all not null
-    -- child 0 type: int64
-      [
-        1,
-        1
-      ]
-    -- child 1 type: list<item: int64>
-      [
-        [
-          1,
-          2
-        ],
-        [
-          1,
-          2
-        ]
-      ]
-    """
-
-def infer_type(values: Iterable[Any], mask: Mask, from_pandas: bool = False) -> DataType:
-    """
-    Attempt to infer Arrow data type that can hold the passed Python
-    sequence type in an Array object
-
-    Parameters
-    ----------
-    values : array-like
-        Sequence to infer type from.
-    mask : ndarray (bool type), optional
-        Optional exclusion mask where True marks null, False non-null.
-    from_pandas : bool, default False
-        Use pandas's NA/null sentinel values for type inference.
-
-    Returns
-    -------
-    type : DataType
-    """
+) -> ArrayLike: ...
+
+
+def infer_type(values: Iterable[Any], mask: Mask,
+               from_pandas: bool = False) -> DataType: ...
+
 
 class ArrayStatistics(_Weakrefable):
-    """
-    The class for statistics of an array.
-    """
+
     @property
-    def null_count(self) -> int:
-        """
-        The number of nulls.
-        """
+    def null_count(self) -> int: ...
+
     @property
-    def distinct_count(self) -> int:
-        """
-        The number of distinct values.
-        """
+    def distinct_count(self) -> int: ...
+
     @property
-    def min(self) -> Any:
-        """
-        The minimum value.
-        """
+    def min(self) -> Any: ...
+
     @property
-    def is_min_exact(self) -> bool:
-        """
-        Whether the minimum value is an exact value or not.
-        """
+    def is_min_exact(self) -> bool: ...
+
     @property
-    def max(self) -> Any:
-        """
-        The maximum value.
-        """
+    def max(self) -> Any: ...
 
     @property
-    def is_max_exact(self) -> bool:
-        """
-        Whether the maximum value is an exact value or not.
-        """
+    def is_max_exact(self) -> bool: ...
+
 
 _ConvertAs = TypeVar("_ConvertAs", pd.DataFrame, pd.Series)
 
+
 class _PandasConvertible(_Weakrefable, Generic[_ConvertAs]):
     def to_pandas(
         self,
@@ -407,287 +163,38 @@ class _PandasConvertible(_Weakrefable, Generic[_ConvertAs]):
         maps_as_pydicts: Literal["None", "lossy", "strict"] | None = None,
         types_mapper: Callable[[DataType], ExtensionDtype | None] | None = None,
         coerce_temporal_nanoseconds: bool = False,
-    ) -> _ConvertAs:
-        """
-        Convert to a pandas-compatible NumPy array or DataFrame, as appropriate
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            Arrow MemoryPool to use for allocations. Uses the default memory
-            pool if not passed.
-        categories : list, default empty
-            List of fields that should be returned as pandas.Categorical. Only
-            applies to table-like data structures.
-        strings_to_categorical : bool, default False
-            Encode string (UTF8) and binary types to pandas.Categorical.
-        zero_copy_only : bool, default False
-            Raise an ArrowException if this function call would require copying
-            the underlying data.
-        integer_object_nulls : bool, default False
-            Cast integers with nulls to objects
-        date_as_object : bool, default True
-            Cast dates to objects. If False, convert to datetime64 dtype with
-            the equivalent time unit (if supported). Note: in pandas version
-            < 2.0, only datetime64[ns] conversion is supported.
-        timestamp_as_object : bool, default False
-            Cast non-nanosecond timestamps (np.datetime64) to objects. This is
-            useful in pandas version 1.x if you have timestamps that don't fit
-            in the normal date range of nanosecond timestamps (1678 CE-2262 CE).
-            Non-nanosecond timestamps are supported in pandas version 2.0.
-            If False, all timestamps are converted to datetime64 dtype.
-        use_threads : bool, default True
-            Whether to parallelize the conversion using multiple threads.
-        deduplicate_objects : bool, default True
-            Do not create multiple copies Python objects when created, to save
-            on memory use. Conversion will be slower.
-        ignore_metadata : bool, default False
-            If True, do not use the 'pandas' metadata to reconstruct the
-            DataFrame index, if present
-        safe : bool, default True
-            For certain data types, a cast is needed in order to store the
-            data in a pandas DataFrame or Series (e.g. timestamps are always
-            stored as nanoseconds in pandas). This option controls whether it
-            is a safe cast or not.
-        split_blocks : bool, default False
-            If True, generate one internal "block" for each column when
-            creating a pandas.DataFrame from a RecordBatch or Table. While this
-            can temporarily reduce memory note that various pandas operations
-            can trigger "consolidation" which may balloon memory use.
-        self_destruct : bool, default False
-            EXPERIMENTAL: If True, attempt to deallocate the originating Arrow
-            memory while converting the Arrow object to pandas. If you use the
-            object after calling to_pandas with this option it will crash your
-            program.
-
-            Note that you may not see always memory usage improvements. For
-            example, if multiple columns share an underlying allocation,
-            memory can't be freed until all columns are converted.
-        maps_as_pydicts : str, optional, default `None`
-            Valid values are `None`, 'lossy', or 'strict'.
-            The default behavior (`None`), is to convert Arrow Map arrays to
-            Python association lists (list-of-tuples) in the same order as the
-            Arrow Map, as in [(key1, value1), (key2, value2), ...].
-
-            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
-            This can change the ordering of (key, value) pairs, and will
-            deduplicate multiple keys, resulting in a possible loss of data.
-
-            If 'lossy', this key deduplication results in a warning printed
-            when detected. If 'strict', this instead results in an exception
-            being raised when detected.
-        types_mapper : function, default None
-            A function mapping a pyarrow DataType to a pandas ExtensionDtype.
-            This can be used to override the default pandas type for conversion
-            of built-in pyarrow types or in absence of pandas_metadata in the
-            Table schema. The function receives a pyarrow DataType and is
-            expected to return a pandas ExtensionDtype or ``None`` if the
-            default conversion should be used for that type. If you have
-            a dictionary mapping, you can pass ``dict.get`` as function.
-        coerce_temporal_nanoseconds : bool, default False
-            Only applicable to pandas version >= 2.0.
-            A legacy option to coerce date32, date64, duration, and timestamp
-            time units to nanoseconds when converting to pandas. This is the
-            default behavior in pandas version 1.x. Set this option to True if
-            you'd like to use this coercion when using pandas version >= 2.0
-            for backwards compatibility (not recommended otherwise).
-
-        Returns
-        -------
-        pandas.Series or pandas.DataFrame depending on type of object
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-
-        Convert a Table to pandas DataFrame:
-
-        >>> table = pa.table([
-        ...    pa.array([2, 4, 5, 100]),
-        ...    pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-        ...    ], names=['n_legs', 'animals'])
-        >>> table.to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       4          Horse
-        2       5  Brittle stars
-        3     100      Centipede
-        >>> isinstance(table.to_pandas(), pd.DataFrame)
-        True
-
-        Convert a RecordBatch to pandas DataFrame:
-
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 4, 5, 100])
-        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-        >>> batch = pa.record_batch([n_legs, animals],
-        ...                         names=["n_legs", "animals"])
-        >>> batch
-        pyarrow.RecordBatch
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-        >>> batch.to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       4          Horse
-        2       5  Brittle stars
-        3     100      Centipede
-        >>> isinstance(batch.to_pandas(), pd.DataFrame)
-        True
-
-        Convert a Chunked Array to pandas Series:
-
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.to_pandas()
-        0      2
-        1      2
-        2      4
-        3      4
-        4      5
-        5    100
-        dtype: int64
-        >>> isinstance(n_legs.to_pandas(), pd.Series)
-        True
-        """
+    ) -> _ConvertAs: ...
+
 
 _CastAs = TypeVar("_CastAs", bound=DataType)
 _Scalar_co = TypeVar("_Scalar_co", bound=Scalar, covariant=True)
 _ScalarT = TypeVar("_ScalarT", bound=Scalar)
 
+
 class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
-    """
-    The base class for all Arrow arrays.
-    """
-
-    def diff(self, other: Self) -> str:
-        """
-        Compare contents of this array against another one.
-
-        Return a string containing the result of diffing this array
-        (on the left side) against the other array (on the right side).
-
-        Parameters
-        ----------
-        other : Array
-            The other array to compare this array with.
-
-        Returns
-        -------
-        diff : str
-            A human-readable printout of the differences.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> left = pa.array(["one", "two", "three"])
-        >>> right = pa.array(["two", None, "two-and-a-half", "three"])
-        >>> print(left.diff(right)) # doctest: +SKIP
-
-        @@ -0, +0 @@
-        -"one"
-        @@ -2, +1 @@
-        +null
-        +"two-and-a-half"
-        """
+
+    def diff(self, other: Self) -> str: ...
+
     def cast(
         self,
         target_type: _CastAs,
         safe: bool = True,
         options: CastOptions | None = None,
         memory_pool: MemoryPool | None = None,
-    ) -> Array[Scalar[_CastAs]]:
-        """
-        Cast array values to another data type
-
-        See :func:`pyarrow.compute.cast` for usage.
-
-        Parameters
-        ----------
-        target_type : DataType, default None
-            Type to cast array to.
-        safe : boolean, default True
-            Whether to check for conversion errors such as overflow.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-        memory_pool : MemoryPool, optional
-            memory pool to use for allocations during function execution.
-
-        Returns
-        -------
-        cast : Array
-        """
-    def view(self, target_type: _CastAs) -> Array[Scalar[_CastAs]]:
-        """
-        Return zero-copy "view" of array as another data type.
-
-        The data types must have compatible columnar buffer layouts
-
-        Parameters
-        ----------
-        target_type : DataType
-            Type to construct view as.
-
-        Returns
-        -------
-        view : Array
-        """
-    def sum(self, **kwargs) -> _Scalar_co:
-        """
-        Sum the values in a numerical array.
-
-        See :func:`pyarrow.compute.sum` for full usage.
-
-        Parameters
-        ----------
-        **kwargs : dict, optional
-            Options to pass to :func:`pyarrow.compute.sum`.
-
-        Returns
-        -------
-        sum : Scalar
-            A scalar containing the sum value.
-        """
+    ) -> Array[Scalar[_CastAs]]: ...
+
+    def view(self, target_type: _CastAs) -> Array[Scalar[_CastAs]]: ...
+
+    def sum(self, **kwargs) -> _Scalar_co: ...
+
     @property
     def type(self: Array[Scalar[_DataTypeT]]) -> _DataTypeT: ...
-    def unique(self) -> Self:
-        """
-        Compute distinct elements in array.
-
-        Returns
-        -------
-        unique : Array
-            An array of the same data type, with deduplicated elements.
-        """
-    def dictionary_encode(self, null_encoding: str = "mask") -> DictionaryArray:
-        """
-        Compute dictionary-encoded representation of array.
-
-        See :func:`pyarrow.compute.dictionary_encode` for full usage.
-
-        Parameters
-        ----------
-        null_encoding : str, default "mask"
-            How to handle null entries.
-
-        Returns
-        -------
-        encoded : DictionaryArray
-            A dictionary-encoded version of this array.
-        """
-    def value_counts(self) -> StructArray:
-        """
-        Compute counts of unique elements in array.
-
-        Returns
-        -------
-        StructArray
-            An array of  <input type "Values", int64 "Counts"> structs
-        """
+    def unique(self) -> Self: ...
+
+    def dictionary_encode(self, null_encoding: str = "mask") -> DictionaryArray: ...
+
+    def value_counts(self) -> StructArray: ...
+
     @staticmethod
     def from_pandas(
         obj: pd.Series | np.ndarray | ArrayLike,
@@ -696,39 +203,8 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         type: _DataTypeT | None = None,
         safe: bool = True,
         memory_pool: MemoryPool | None = None,
-    ) -> Array[Scalar[_DataTypeT]] | Array[Scalar]:
-        """
-        Convert pandas.Series to an Arrow Array.
-
-        This method uses Pandas semantics about what values indicate
-        nulls. See pyarrow.array for more general conversion from arrays or
-        sequences to Arrow arrays.
-
-        Parameters
-        ----------
-        obj : ndarray, pandas.Series, array-like
-        mask : array (boolean), optional
-            Indicate which values are null (True) or not null (False).
-        type : pyarrow.DataType
-            Explicit type to attempt to coerce to, otherwise will be inferred
-            from the data.
-        safe : bool, default True
-            Check for overflows or other unsafe conversions.
-        memory_pool : pyarrow.MemoryPool, optional
-            If not passed, will allocate memory from the currently-set default
-            memory pool.
-
-        Notes
-        -----
-        Localized timestamps will currently be returned as UTC (pandas's native
-        representation). Timezone-naive data will be implicitly interpreted as
-        UTC.
-
-        Returns
-        -------
-        array : pyarrow.Array or pyarrow.ChunkedArray
-            ChunkedArray is returned if object data overflows binary buffer.
-        """
+    ) -> Array[Scalar[_DataTypeT]] | Array[Scalar]: ...
+
     @staticmethod
     def from_buffers(
         type: _DataTypeT,
@@ -737,68 +213,18 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         null_count: int = -1,
         offset=0,
         children: NullableCollection[Array[Scalar[_DataTypeT]]] | None = None,
-    ) -> Array[Scalar[_DataTypeT]]:
-        """
-        Construct an Array from a sequence of buffers.
-
-        The concrete type returned depends on the datatype.
-
-        Parameters
-        ----------
-        type : DataType
-            The value type of the array.
-        length : int
-            The number of values in the array.
-        buffers : List[Buffer]
-            The buffers backing this array.
-        null_count : int, default -1
-            The number of null entries in the array. Negative value means that
-            the null count is not known.
-        offset : int, default 0
-            The array's logical offset (in values, not in bytes) from the
-            start of each buffer.
-        children : List[Array], default None
-            Nested type children with length matching type.num_fields.
-
-        Returns
-        -------
-        array : Array
-        """
+    ) -> Array[Scalar[_DataTypeT]]: ...
+
     @property
     def null_count(self) -> int: ...
     @property
-    def nbytes(self) -> int:
-        """
-        Total number of bytes consumed by the elements of the array.
-
-        In other words, the sum of bytes from all buffer
-        ranges referenced.
+    def nbytes(self) -> int: ...
 
-        Unlike `get_total_buffer_size` this method will account for array
-        offsets.
+    def get_total_buffer_size(self) -> int: ...
 
-        If buffers are shared between arrays then the shared
-        portion will be counted multiple times.
-
-        The dictionary of dictionary arrays will always be counted in their
-        entirety even if the array only references a portion of the dictionary.
-        """
-    def get_total_buffer_size(self) -> int:
-        """
-        The sum of bytes in each buffer referenced by the array.
-
-        An array may only reference a portion of a buffer.
-        This method will overestimate in this case and return the
-        byte size of the entire buffer.
-
-        If a buffer is referenced multiple times then it will
-        only be counted once.
-        """
     def __sizeof__(self) -> int: ...
-    def __iter__(self) -> Iterator[_Scalar_co]:
-        """
-        Implement iter(self).
-        """
+    def __iter__(self) -> Iterator[_Scalar_co]: ...
+
     def to_string(
         self,
         *,
@@ -807,166 +233,37 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         window: int = 10,
         container_window: int = 2,
         skip_new_lines: bool = False,
-    ) -> str:
-        """
-        Render a "pretty-printed" string representation of the Array.
-
-        Note: for data on a non-CPU device, the full array is copied to CPU
-        memory.
-
-        Parameters
-        ----------
-        indent : int, default 2
-            How much to indent the internal items in the string to
-            the right, by default ``2``.
-        top_level_indent : int, default 0
-            How much to indent right the entire content of the array,
-            by default ``0``.
-        window : int
-            How many primitive items to preview at the begin and end
-            of the array when the array is bigger than the window.
-            The other items will be ellipsed.
-        container_window : int
-            How many container items (such as a list in a list array)
-            to preview at the begin and end of the array when the array
-            is bigger than the window.
-        skip_new_lines : bool
-            If the array should be rendered as a single line of text
-            or if each element should be on its own line.
-        element_size_limit : int, default 100
-            Maximum number of characters of a single element before it is truncated.
-        """
+    ) -> str: ...
+
     format = to_string
-    def equals(self, other: Self) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.Array
-
-        Returns
-        -------
-        bool
-        """
-    def __len__(self) -> int:
-        """
-        Return len(self).
-        """
-    def is_null(self, *, nan_is_null: bool = False) -> BooleanArray:
-        """
-        Return BooleanArray indicating the null values.
-
-        Parameters
-        ----------
-        nan_is_null : bool (optional, default False)
-            Whether floating-point NaN values should also be considered null.
-
-        Returns
-        -------
-        array : boolean Array
-        """
-    def is_nan(self) -> BooleanArray:
-        """
-        Return BooleanArray indicating the NaN values.
-
-        Returns
-        -------
-        array : boolean Array
-        """
-    def is_valid(self) -> BooleanArray:
-        """
-        Return BooleanArray indicating the non-null values.
-        """
+    def equals(self, other: Self) -> bool: ...
+
+    def __len__(self) -> int: ...
+
+    def is_null(self, *, nan_is_null: bool = False) -> BooleanArray: ...
+
+    def is_nan(self) -> BooleanArray: ...
+
+    def is_valid(self) -> BooleanArray: ...
+
     def fill_null(
         self: Array[Scalar[_BasicDataType[_AsPyType]]], fill_value: _AsPyType
-    ) -> Array[Scalar[_BasicDataType[_AsPyType]]]:
-        """
-        See :func:`pyarrow.compute.fill_null` for usage.
-
-        Parameters
-        ----------
-        fill_value : any
-            The replacement value for null entries.
-
-        Returns
-        -------
-        result : Array
-            A new array with nulls replaced by the given value.
-        """
-    def __getitem__(self, key: int | builtins.slice) -> _Scalar_co | Self:
-        """
-        Slice or return value at given index
-
-        Parameters
-        ----------
-        key : integer or slice
-            Slices with step not equal to 1 (or None) will produce a copy
-            rather than a zero-copy view
-
-        Returns
-        -------
-        value : Scalar (index) or Array (slice)
-        """
-    def slice(self, offset: int = 0, length: int | None = None) -> Self:
-        """
-        Compute zero-copy slice of this array.
-
-        Parameters
-        ----------
-        offset : int, default 0
-            Offset from start of array to slice.
-        length : int, default None
-            Length of slice (default is until end of Array starting from
-            offset).
-
-        Returns
-        -------
-        sliced : Array
-            An array with the same datatype, containing the sliced values.
-        """
-    def take(self, indices: Indices) -> Self:
-        """
-        Select values from an array.
-
-        See :func:`pyarrow.compute.take` for full usage.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            The indices in the array whose values will be returned.
-
-        Returns
-        -------
-        taken : Array
-            An array with the same datatype, containing the taken values.
-        """
-    def drop_null(self) -> Self:
-        """
-        Remove missing values from an array.
-        """
+    ) -> Array[Scalar[_BasicDataType[_AsPyType]]]: ...
+
+    def __getitem__(self, key: int | builtins.slice) -> _Scalar_co | Self: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def drop_null(self) -> Self: ...
+
     def filter(
         self,
         mask: Mask,
         *,
         null_selection_behavior: Literal["drop", "emit_null"] = "drop",
-    ) -> Self:
-        """
-        Select values from an array.
-
-        See :func:`pyarrow.compute.filter` for full usage.
-
-        Parameters
-        ----------
-        mask : Array or array-like
-            The boolean mask to filter the array with.
-        null_selection_behavior : str, default "drop"
-            How nulls in the mask should be handled.
-
-        Returns
-        -------
-        filtered : Array
-            An array of the same type, with only the elements selected by
-            the boolean mask.
-        """
+    ) -> Self: ...
 
     def index(
         self: Array[_ScalarT] | Array[Scalar[_BasicDataType[_AsPyType]]],
@@ -975,567 +272,190 @@ class Array(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         end: int | None = None,
         *,
         memory_pool: MemoryPool | None = None,
-    ) -> Int64Scalar:
-        """
-        Find the first index of a value.
-
-        See :func:`pyarrow.compute.index` for full usage.
-
-        Parameters
-        ----------
-        value : Scalar or object
-            The value to look for in the array.
-        start : int, optional
-            The start index where to look for `value`.
-        end : int, optional
-            The end index where to look for `value`.
-        memory_pool : MemoryPool, optional
-            A memory pool for potential memory allocations.
-
-        Returns
-        -------
-        index : Int64Scalar
-            The index of the value in the array (-1 if not found).
-        """
-    def sort(self, order: Order = "ascending", **kwargs) -> Self:
-        """
-        Sort the Array
-
-        Parameters
-        ----------
-        order : str, default "ascending"
-            Which order to sort values in.
-            Accepted values are "ascending", "descending".
-        **kwargs : dict, optional
-            Additional sorting options.
-            As allowed by :class:`SortOptions`
-
-        Returns
-        -------
-        result : Array
-        """
-    def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
-    def to_numpy(self, zero_copy_only: bool = True, writable: bool = False) -> np.ndarray:
-        """
-        Return a NumPy view or copy of this array.
-
-        By default, tries to return a view of this array. This is only
-        supported for primitive arrays with the same memory layout as NumPy
-        (i.e. integers, floating point, ..) and without any nulls.
-
-        For the extension arrays, this method simply delegates to the
-        underlying storage array.
-
-        Parameters
-        ----------
-        zero_copy_only : bool, default True
-            If True, an exception will be raised if the conversion to a numpy
-            array would require copying the underlying data (e.g. in presence
-            of nulls, or for non-primitive types).
-        writable : bool, default False
-            For numpy arrays created with zero copy (view on the Arrow data),
-            the resulting array is not writable (Arrow data is immutable).
-            By setting this to True, a copy of the array is made to ensure
-            it is writable.
-
-        Returns
-        -------
-        array : numpy.ndarray
-        """
+    ) -> Int64Scalar: ...
+
+    def sort(self, order: Order = "ascending", **kwargs) -> Self: ...
+
+    def __array__(self, dtype: np.dtype | None = None,
+                  copy: bool | None = None) -> np.ndarray: ...
+
+    def to_numpy(self, zero_copy_only: bool = True,
+                 writable: bool = False) -> np.ndarray: ...
+
     def to_pylist(
         self: Array[Scalar[_BasicDataType[_AsPyType]]],
         *,
         maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[_AsPyType | None]:
-        """
-        Convert to a list of native Python objects.
-
-        Parameters
-        ----------
-        maps_as_pydicts : str, optional, default `None`
-            Valid values are `None`, 'lossy', or 'strict'.
-            The default behavior (`None`), is to convert Arrow Map arrays to
-            Python association lists (list-of-tuples) in the same order as the
-            Arrow Map, as in [(key1, value1), (key2, value2), ...].
-
-            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
-
-            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
-            The last seen value of a duplicate key will be in the Python dictionary.
-            If 'strict', this instead results in an exception being raised when detected.
-
-        Returns
-        -------
-        lst : list
-        """
+    ) -> list[_AsPyType | None]: ...
+
     tolist = to_pylist
-    def validate(self, *, full: bool = False) -> None:
-        """
-        Perform validation checks.  An exception is raised if validation fails.
-
-        By default only cheap validation checks are run.  Pass `full=True`
-        for thorough validation checks (potentially O(n)).
-
-        Parameters
-        ----------
-        full : bool, default False
-            If True, run expensive checks, otherwise cheap checks only.
-
-        Raises
-        ------
-        ArrowInvalid
-        """
+    def validate(self, *, full: bool = False) -> None: ...
+
     @property
-    def offset(self) -> int:
-        """
-        A relative position into another array's data.
-
-        The purpose is to enable zero-copy slicing. This value defaults to zero
-        but must be applied on all operations with the physical storage
-        buffers.
-        """
-    def buffers(self) -> list[Buffer | None]:
-        """
-        Return a list of Buffer objects pointing to this array's physical
-        storage.
-
-        To correctly interpret these buffers, you need to also apply the offset
-        multiplied with the size of the stored data type.
-        """
-    def copy_to(self, destination: MemoryManager | Device) -> Self:
-        """
-        Construct a copy of the array with all buffers on destination
-        device.
-
-        This method recursively copies the array's buffers and those of its
-        children onto the destination MemoryManager device and returns the
-        new Array.
-
-        Parameters
-        ----------
-        destination : pyarrow.MemoryManager or pyarrow.Device
-            The destination device to copy the array to.
-
-        Returns
-        -------
-        Array
-        """
-    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
-        """
-        Export to a C ArrowArray struct, given its pointer.
-
-        If a C ArrowSchema struct pointer is also given, the array type
-        is exported to it at the same time.
-
-        Parameters
-        ----------
-        out_ptr: int
-            The raw pointer to a C ArrowArray struct.
-        out_schema_ptr: int (optional)
-            The raw pointer to a C ArrowSchema struct.
-
-        Be careful: if you don't pass the ArrowArray struct to a consumer,
-        array memory will leak.  This is a low-level function intended for
-        expert users.
-        """
+    def offset(self) -> int: ...
+
+    def buffers(self) -> list[Buffer | None]: ...
+
+    def copy_to(self, destination: MemoryManager | Device) -> Self: ...
+
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
     @classmethod
-    def _import_from_c(cls, in_ptr: int, type: int | DataType) -> Self:
-        """
-        Import Array from a C ArrowArray struct, given its pointer
-        and the imported array type.
-
-        Parameters
-        ----------
-        in_ptr: int
-            The raw pointer to a C ArrowArray struct.
-        type: DataType or int
-            Either a DataType object, or the raw pointer to a C ArrowSchema
-            struct.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_array__(self, requested_schema=None) -> Any:
-        """
-        Get a pair of PyCapsules containing a C ArrowArray representation of the object.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule | None
-            A PyCapsule containing a C ArrowSchema representation of a requested
-            schema. PyArrow will attempt to cast the array to this data type.
-            If None, the array will be returned as-is, with a type matching the
-            one returned by :meth:`__arrow_c_schema__()`.
-
-        Returns
-        -------
-        Tuple[PyCapsule, PyCapsule]
-            A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
-            respectively.
-        """
+    def _import_from_c(cls, in_ptr: int, type: int | DataType) -> Self: ...
+
+    def __arrow_c_array__(self, requested_schema=None) -> Any: ...
+
     @classmethod
     def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
-    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
-        """
-        Export to a C ArrowDeviceArray struct, given its pointer.
-
-        If a C ArrowSchema struct pointer is also given, the array type
-        is exported to it at the same time.
-
-        Parameters
-        ----------
-        out_ptr: int
-            The raw pointer to a C ArrowDeviceArray struct.
-        out_schema_ptr: int (optional)
-            The raw pointer to a C ArrowSchema struct.
-
-        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
-        array memory will leak.  This is a low-level function intended for
-        expert users.
-        """
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
     @classmethod
-    def _import_from_c_device(cls, in_ptr: int, type: DataType | int) -> Self:
-        """
-        Import Array from a C ArrowDeviceArray struct, given its pointer
-        and the imported array type.
-
-        Parameters
-        ----------
-        in_ptr: int
-            The raw pointer to a C ArrowDeviceArray struct.
-        type: DataType or int
-            Either a DataType object, or the raw pointer to a C ArrowSchema
-            struct.
-
-        This is a low-level function intended for expert users.
-        """
-
-    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any:
-        """
-        Get a pair of PyCapsules containing a C ArrowDeviceArray representation
-        of the object.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule | None
-            A PyCapsule containing a C ArrowSchema representation of a requested
-            schema. PyArrow will attempt to cast the array to this data type.
-            If None, the array will be returned as-is, with a type matching the
-            one returned by :meth:`__arrow_c_schema__()`.
-        kwargs
-            Currently no additional keyword arguments are supported, but
-            this method will accept any keyword with a value of ``None``
-            for compatibility with future keywords.
-
-        Returns
-        -------
-        Tuple[PyCapsule, PyCapsule]
-            A pair of PyCapsules containing a C ArrowSchema and ArrowDeviceArray,
-            respectively.
-        """
+    def _import_from_c_device(cls, in_ptr: int, type: DataType | int) -> Self: ...
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ...
+
     @classmethod
     def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
-    def __dlpack__(self, stream: int | None = None) -> Any:
-        """
-        Export a primitive array as a DLPack capsule.
-
-        Parameters
-        ----------
-        stream : int, optional
-            A Python integer representing a pointer to a stream. Currently not supported.
-            Stream is provided by the consumer to the producer to instruct the producer
-            to ensure that operations can safely be performed on the array.
-
-        Returns
-        -------
-        capsule : PyCapsule
-            A DLPack capsule for the array, pointing to a DLManagedTensor.
-        """
-    def __dlpack_device__(self) -> tuple[int, int]:
-        """
-        Return the DLPack device tuple this arrays resides on.
-
-        Returns
-        -------
-        tuple : Tuple[int, int]
-            Tuple with index specifying the type of the device (where
-            CPU = 1, see cpp/src/arrow/c/dpack_abi.h) and index of the
-            device which is 0 by default for CPU.
-        """
-    @property
-    def device_type(self) -> DeviceAllocationType:
-        """
-        The device type where the array resides.
+    def __dlpack__(self, stream: int | None = None) -> Any: ...
 
-        Returns
-        -------
-        DeviceAllocationType
-        """
+    def __dlpack_device__(self) -> tuple[int, int]: ...
 
     @property
-    def is_cpu(self) -> bool:
-        """
-        Whether the array is CPU-accessible.
-        """
+    def device_type(self) -> DeviceAllocationType: ...
+
     @property
-    def statistics(self) -> ArrayStatistics | None:
-        """
-        Statistics of the array.
-        """
+    def is_cpu(self) -> bool: ...
+
+    @property
+    def statistics(self) -> ArrayStatistics | None: ...
+
 
 class NullArray(Array[NullScalar]):
-    """
-    Concrete class for Arrow arrays of null data type.
-    """
+    ...
+
 
 class BooleanArray(Array[BooleanScalar]):
-    """
-    Concrete class for Arrow arrays of boolean data type.
-    """
+
     @property
     def false_count(self) -> int: ...
     @property
     def true_count(self) -> int: ...
 
+
 class NumericArray(Array[_ScalarT]):
-    """
-    A base class for Arrow numeric arrays.
-    """
+    ...
+
+
 class IntegerArray(NumericArray[_ScalarT]):
-    """
-    A base class for Arrow integer arrays.
-    """
+    ...
+
+
 class FloatingPointArray(NumericArray[_ScalarT]):
-    """
-    A base class for Arrow floating-point arrays.
-    """
+    ...
+
+
 class Int8Array(IntegerArray[Int8Scalar]):
-    """
-    Concrete class for Arrow arrays of int8 data type.
-    """
+    ...
+
+
 class UInt8Array(IntegerArray[UInt8Scalar]):
-    """
-    Concrete class for Arrow arrays of uint8 data type.
-    """
+    ...
+
+
 class Int16Array(IntegerArray[Int16Scalar]):
-    """
-    Concrete class for Arrow arrays of int16 data type.
-    """
+    ...
+
+
 class UInt16Array(IntegerArray[UInt16Scalar]):
-    """
-    Concrete class for Arrow arrays of uint16 data type.
-    """
+    ...
+
+
 class Int32Array(IntegerArray[Int32Scalar]):
-    """
-    Concrete class for Arrow arrays of int32 data type.
-    """
+    ...
+
+
 class UInt32Array(IntegerArray[UInt32Scalar]):
-    """
-    Concrete class for Arrow arrays of uint32 data type.
-    """
+    ...
+
+
 class Int64Array(IntegerArray[Int64Scalar]):
-    """
-    Concrete class for Arrow arrays of int64 data type.
-    """
+    ...
+
+
 class UInt64Array(IntegerArray[UInt64Scalar]):
-    """
-    Concrete class for Arrow arrays of uint64 data type.
-    """
+    ...
+
+
 class Date32Array(NumericArray[Date32Scalar]):
-    """
-    Concrete class for Arrow arrays of date32 data type.
-    """
+    ...
+
+
 class Date64Array(NumericArray[Date64Scalar]):
-    """
-    Concrete class for Arrow arrays of date64 data type.
-    """
+    ...
+
+
 class TimestampArray(NumericArray[TimestampScalar[_Unit, _Tz]]):
-    """
-    Concrete class for Arrow arrays of timestamp data type.
-    """
+    ...
+
+
 class Time32Array(NumericArray[Time32Scalar[_Time32Unit]]):
-    """
-    Concrete class for Arrow arrays of time32 data type.
-    """
+    ...
+
+
 class Time64Array(NumericArray[Time64Scalar[_Time64Unit]]):
-    """
-    Concrete class for Arrow arrays of time64 data type.
-    """
+    ...
+
+
 class DurationArray(NumericArray[DurationScalar[_Unit]]):
-    """
-    Concrete class for Arrow arrays of duration data type.
-    """
+    ...
+
+
 class MonthDayNanoIntervalArray(Array[MonthDayNanoIntervalScalar]):
-    """
-    Concrete class for Arrow arrays of interval[MonthDayNano] type.
-    """
+    ...
+
+
 class HalfFloatArray(FloatingPointArray[HalfFloatScalar]):
-    """
-    Concrete class for Arrow arrays of float16 data type.
-    """
+    ...
+
+
 class FloatArray(FloatingPointArray[FloatScalar]):
-    """
-    Concrete class for Arrow arrays of float32 data type.
-    """
+    ...
+
+
 class DoubleArray(FloatingPointArray[DoubleScalar]):
-    """
-    Concrete class for Arrow arrays of float64 data type.
-    """
+    ...
+
+
 class FixedSizeBinaryArray(Array[FixedSizeBinaryScalar]):
-    """
-    Concrete class for Arrow arrays of a fixed-size binary data type.
-    """
+    ...
+
+
 class Decimal32Array(FixedSizeBinaryArray):
-    """
-    """
+    ...
+
+
 class Decimal64Array(FixedSizeBinaryArray):
-    """
-    Concrete class for Arrow arrays of decimal64 data type.
-    """
+    ...
+
+
 class Decimal128Array(FixedSizeBinaryArray):
-    """
-    Concrete class for Arrow arrays of decimal128 data type.
-    """
+    ...
+
+
 class Decimal256Array(FixedSizeBinaryArray):
-    """
-    Concrete class for Arrow arrays of decimal256 data type.
-    """
+    ...
+
 
 class BaseListArray(Array[_ScalarT]):
-    def flatten(self, recursive: bool = False) -> Array:
-        """
-        Unnest this [Large]ListArray/[Large]ListViewArray/FixedSizeListArray
-        according to 'recursive'.
-
-        Note that this method is different from ``self.values`` in that
-        it takes care of the slicing offset as well as null elements backed
-        by non-empty sub-lists.
-
-        Parameters
-        ----------
-        recursive : bool, default False, optional
-            When True, flatten this logical list-array recursively until an
-            array of non-list values is formed.
-
-            When False, flatten only the top level.
-
-        Returns
-        -------
-        result : Array
-
-        Examples
-        --------
-
-        Basic logical list-array's flatten
-        >>> import pyarrow as pa
-        >>> values = [1, 2, 3, 4]
-        >>> offsets = [2, 1, 0]
-        >>> sizes = [2, 2, 2]
-        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
-        >>> array
-        <pyarrow.lib.ListViewArray object at ...>
-        [
-          [
-            3,
-            4
-          ],
-          [
-            2,
-            3
-          ],
-          [
-            1,
-            2
-          ]
-        ]
-        >>> array.flatten()
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          3,
-          4,
-          2,
-          3,
-          1,
-          2
-        ]
-
-        When recursive=True, nested list arrays are flattened recursively
-        until an array of non-list values is formed.
-
-        >>> array = pa.array([
-        ...    None,
-        ...    [
-        ...        [1, None, 2],
-        ...        None,
-        ...        [3, 4]
-        ...    ],
-        ...    [],
-        ...    [
-        ...        [],
-        ...        [5, 6],
-        ...        None
-        ...    ],
-        ...    [
-        ...        [7, 8]
-        ...    ]
-        ... ], type=pa.list_(pa.list_(pa.int64())))
-        >>> array.flatten(True)
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          1,
-          null,
-          2,
-          3,
-          4,
-          5,
-          6,
-          7,
-          8
-        ]
-        """
-    def value_parent_indices(self) -> Int64Array:
-        """
-        Return array of same length as list child values array where each
-        output value is the index of the parent list array slot containing each
-        child value.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> arr = pa.array([[1, 2, 3], [], None, [4]],
-        ...                type=pa.list_(pa.int32()))
-        >>> arr.value_parent_indices()
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          0,
-          0,
-          0,
-          3
-        ]
-        """
-    def value_lengths(self) -> Int32Array:
-        """
-        Return integers array with values equal to the respective length of
-        each list element. Null list values are null in the output.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> arr = pa.array([[1, 2, 3], [], None, [4]],
-        ...                type=pa.list_(pa.int32()))
-        >>> arr.value_lengths()
-        <pyarrow.lib.Int32Array object at ...>
-        [
-          3,
-          0,
-          null,
-          1
-        ]
-        """
+    def flatten(self, recursive: bool = False) -> Array: ...
+
+    def value_parent_indices(self) -> Int64Array: ...
+
+    def value_lengths(self) -> Int32Array: ...
+
 
 class ListArray(BaseListArray[_ScalarT]):
-    """
-    Concrete class for Arrow arrays of a list data type.
-    """
+
     @classmethod
     def from_arrays(
         cls,
@@ -1545,158 +465,17 @@ class ListArray(BaseListArray[_ScalarT]):
         type: _DataTypeT | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> ListArray[ListScalar[_DataTypeT | Int64Type | Float64Type | StringType | BinaryType]] | ListArray:
-        """
-        Construct ListArray from arrays of int32 offsets and values.
-
-        Parameters
-        ----------
-        offsets : Array (int32 type)
-        values : Array (any type)
-        type : DataType, optional
-            If not specified, a default ListType with the values' type is
-            used.
-        pool : MemoryPool, optional
-        mask : Array (boolean type), optional
-            Indicate which values are null (True) or not null (False).
-
-        Returns
-        -------
-        list_array : ListArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> values = pa.array([1, 2, 3, 4])
-        >>> offsets = pa.array([0, 2, 4])
-        >>> pa.ListArray.from_arrays(offsets, values)
-        <pyarrow.lib.ListArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          [
-            3,
-            4
-          ]
-        ]
-        >>> # nulls in the offsets array become null lists
-        >>> offsets = pa.array([0, None, 2, 4])
-        >>> pa.ListArray.from_arrays(offsets, values)
-        <pyarrow.lib.ListArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          null,
-          [
-            3,
-            4
-          ]
-        ]
-        """
+    ) -> ListArray[ListScalar[_DataTypeT | Int64Type | Float64Type | StringType | BinaryType]] | ListArray: ...
+
     @property
-    def values(self) -> Array:
-        """
-        Return the underlying array of values which backs the ListArray
-        ignoring the array's offset.
-
-        If any of the list elements are null, but are backed by a
-        non-empty sub-list, those elements will be included in the
-        output.
-
-        Compare with :meth:`flatten`, which returns only the non-null
-        values taking into consideration the array's offset.
-
-        Returns
-        -------
-        values : Array
-
-        See Also
-        --------
-        ListArray.flatten : ...
-
-        Examples
-        --------
-
-        The values include null elements from sub-lists:
-
-        >>> import pyarrow as pa
-        >>> array = pa.array([[1, 2], None, [3, 4, None, 6]])
-        >>> array.values
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          1,
-          2,
-          3,
-          4,
-          null,
-          6
-        ]
-
-        If an array is sliced, the slice still uses the same
-        underlying data as the original array, just with an
-        offset. Since values ignores the offset, the values are the
-        same:
-
-        >>> sliced = array.slice(1, 2)
-        >>> sliced
-        <pyarrow.lib.ListArray object at ...>
-        [
-          null,
-          [
-            3,
-            4,
-            null,
-            6
-          ]
-        ]
-        >>> sliced.values
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          1,
-          2,
-          3,
-          4,
-          null,
-          6
-        ]
-        """
+    def values(self) -> Array: ...
+
     @property
-    def offsets(self) -> Int32Array:
-        """
-        Return the list offsets as an int32 array.
-
-        The returned array will not have a validity bitmap, so you cannot
-        expect to pass it to `ListArray.from_arrays` and get back the same
-        list array if the original one has nulls.
-
-        Returns
-        -------
-        offsets : Int32Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> array = pa.array([[1, 2], None, [3, 4, 5]])
-        >>> array.offsets
-        <pyarrow.lib.Int32Array object at ...>
-        [
-          0,
-          2,
-          2,
-          5
-        ]
-        """
+    def offsets(self) -> Int32Array: ...
+
 
 class LargeListArray(BaseListArray[LargeListScalar[_DataTypeT]]):
-    """
-    Concrete class for Arrow arrays of a large list data type.
 
-    Identical to ListArray, but 64-bit offsets.
-    """
     @classmethod
     def from_arrays(
         cls,
@@ -1706,113 +485,17 @@ class LargeListArray(BaseListArray[LargeListScalar[_DataTypeT]]):
         type: _DataTypeT | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> LargeListArray[_DataTypeT] | LargeListArray[_DataTypeT]:
-        """
-        Construct LargeListArray from arrays of int64 offsets and values.
-
-        Parameters
-        ----------
-        offsets : Array (int64 type)
-        values : Array (any type)
-        type : DataType, optional
-            If not specified, a default ListType with the values' type is
-            used.
-        pool : MemoryPool, optional
-        mask : Array (boolean type), optional
-            Indicate which values are null (True) or not null (False).
-
-        Returns
-        -------
-        list_array : LargeListArray
-        """
-    @property
-    def values(self) -> Array:
-        """
-        Return the underlying array of values which backs the LargeListArray
-        ignoring the array's offset.
-
-        If any of the list elements are null, but are backed by a
-        non-empty sub-list, those elements will be included in the
-        output.
-
-        Compare with :meth:`flatten`, which returns only the non-null
-        values taking into consideration the array's offset.
-
-        Returns
-        -------
-        values : Array
-
-        See Also
-        --------
-        LargeListArray.flatten : ...
-
-        Examples
-        --------
-
-        The values include null elements from the sub-lists:
-
-        >>> import pyarrow as pa
-        >>> array = pa.array(
-        ...     [[1, 2], None, [3, 4, None, 6]],
-        ...     type=pa.large_list(pa.int32()),
-        ... )
-        >>> array.values
-        <pyarrow.lib.Int32Array object at ...>
-        [
-          1,
-          2,
-          3,
-          4,
-          null,
-          6
-        ]
-
-        If an array is sliced, the slice still uses the same
-        underlying data as the original array, just with an
-        offset. Since values ignores the offset, the values are the
-        same:
-
-        >>> sliced = array.slice(1, 2)
-        >>> sliced
-        <pyarrow.lib.LargeListArray object at ...>
-        [
-          null,
-          [
-            3,
-            4,
-            null,
-            6
-          ]
-        ]
-        >>> sliced.values
-        <pyarrow.lib.Int32Array object at ...>
-        [
-          1,
-          2,
-          3,
-          4,
-          null,
-          6
-        ]
-        """
+    ) -> LargeListArray[_DataTypeT] | LargeListArray[_DataTypeT]: ...
+
     @property
-    def offsets(self) -> Int64Array:
-        """
-        Return the list offsets as an int64 array.
+    def values(self) -> Array: ...
 
-        The returned array will not have a validity bitmap, so you cannot
-        expect to pass it to `LargeListArray.from_arrays` and get back the
-        same list array if the original one has nulls.
+    @property
+    def offsets(self) -> Int64Array: ...
 
-        Returns
-        -------
-        offsets : Int64Array
-        """
 
 class ListViewArray(BaseListArray[ListViewScalar[_DataTypeT]]):
-    """
-    Concrete class for Arrow arrays of a list view data type.
-    """
+
     @classmethod
     def from_arrays(
         cls,
@@ -1822,194 +505,20 @@ class ListViewArray(BaseListArray[ListViewScalar[_DataTypeT]]):
         type: _DataTypeT | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> ListViewArray[_DataTypeT] | ListViewArray[_DataTypeT]:
-        """
-        Construct ListViewArray from arrays of int32 offsets, sizes, and values.
-
-        Parameters
-        ----------
-        offsets : Array (int32 type)
-        sizes : Array (int32 type)
-        values : Array (any type)
-        type : DataType, optional
-            If not specified, a default ListType with the values' type is
-            used.
-        pool : MemoryPool, optional
-        mask : Array (boolean type), optional
-            Indicate which values are null (True) or not null (False).
-
-        Returns
-        -------
-        list_view_array : ListViewArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> values = pa.array([1, 2, 3, 4])
-        >>> offsets = pa.array([0, 1, 2])
-        >>> sizes = pa.array([2, 2, 2])
-        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
-        <pyarrow.lib.ListViewArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          [
-            2,
-            3
-          ],
-          [
-            3,
-            4
-          ]
-        ]
-        >>> # use a null mask to represent null values
-        >>> mask = pa.array([False, True, False])
-        >>> pa.ListViewArray.from_arrays(offsets, sizes, values, mask=mask)
-        <pyarrow.lib.ListViewArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          null,
-          [
-            3,
-            4
-          ]
-        ]
-        >>> # null values can be defined in either offsets or sizes arrays
-        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
-        >>> offsets = pa.array([0, None, 2])
-        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
-        <pyarrow.lib.ListViewArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          null,
-          [
-            3,
-            4
-          ]
-        ]
-        """
+    ) -> ListViewArray[_DataTypeT] | ListViewArray[_DataTypeT]: ...
+
     @property
-    def values(self) -> Array:
-        """
-        Return the underlying array of values which backs the ListViewArray
-        ignoring the array's offset and sizes.
-
-        The values array may be out of order and/or contain additional values
-        that are not found in the logical representation of the array. The only
-        guarantee is that each non-null value in the ListView Array is contiguous.
-
-        Compare with :meth:`flatten`, which returns only the non-null
-        values taking into consideration the array's order and offset.
-
-        Returns
-        -------
-        values : Array
-
-        Examples
-        --------
-        The values include null elements from sub-lists:
-
-        >>> import pyarrow as pa
-        >>> values = [1, 2, None, 3, 4]
-        >>> offsets = [0, 0, 1]
-        >>> sizes = [2, 0, 4]
-        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
-        >>> array
-        <pyarrow.lib.ListViewArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          [],
-          [
-            2,
-            null,
-            3,
-            4
-          ]
-        ]
-        >>> array.values
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          1,
-          2,
-          null,
-          3,
-          4
-        ]
-        """
+    def values(self) -> Array: ...
+
     @property
-    def offsets(self) -> Int32Array:
-        """
-        Return the list offsets as an int32 array.
-
-        The returned array will not have a validity bitmap, so you cannot
-        expect to pass it to `ListViewArray.from_arrays` and get back the same
-        list array if the original one has nulls.
-
-        Returns
-        -------
-        offsets : Int32Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> values = [1, 2, None, 3, 4]
-        >>> offsets = [0, 0, 1]
-        >>> sizes = [2, 0, 4]
-        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
-        >>> array.offsets
-        <pyarrow.lib.Int32Array object at ...>
-        [
-          0,
-          0,
-          1
-        ]
-        """
+    def offsets(self) -> Int32Array: ...
+
     @property
-    def sizes(self) -> Int32Array:
-        """
-        Return the list sizes as an int32 array.
-
-        The returned array will not have a validity bitmap, so you cannot
-        expect to pass it to `ListViewArray.from_arrays` and get back the same
-        list array if the original one has nulls.
-
-        Returns
-        -------
-        sizes : Int32Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> values = [1, 2, None, 3, 4]
-        >>> offsets = [0, 0, 1]
-        >>> sizes = [2, 0, 4]
-        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
-        >>> array.sizes
-        <pyarrow.lib.Int32Array object at ...>
-        [
-          2,
-          0,
-          4
-        ]
-        """
+    def sizes(self) -> Int32Array: ...
+
 
 class LargeListViewArray(BaseListArray[LargeListScalar[_DataTypeT]]):
-    """
-    Concrete class for Arrow arrays of a large list view data type.
 
-    Identical to ListViewArray, but with 64-bit offsets.
-    """
     @classmethod
     def from_arrays(
         cls,
@@ -2019,199 +528,20 @@ class LargeListViewArray(BaseListArray[LargeListScalar[_DataTypeT]]):
         type: _DataTypeT | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> LargeListViewArray[_DataTypeT]:
-        """
-        Construct LargeListViewArray from arrays of int64 offsets and values.
-
-        Parameters
-        ----------
-        offsets : Array (int64 type)
-        sizes : Array (int64 type)
-        values : Array (any type)
-        type : DataType, optional
-            If not specified, a default ListType with the values' type is
-            used.
-        pool : MemoryPool, optional
-        mask : Array (boolean type), optional
-            Indicate which values are null (True) or not null (False).
-
-        Returns
-        -------
-        list_view_array : LargeListViewArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> values = pa.array([1, 2, 3, 4])
-        >>> offsets = pa.array([0, 1, 2])
-        >>> sizes = pa.array([2, 2, 2])
-        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
-        <pyarrow.lib.LargeListViewArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          [
-            2,
-            3
-          ],
-          [
-            3,
-            4
-          ]
-        ]
-        >>> # use a null mask to represent null values
-        >>> mask = pa.array([False, True, False])
-        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values, mask=mask)
-        <pyarrow.lib.LargeListViewArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          null,
-          [
-            3,
-            4
-          ]
-        ]
-        >>> # null values can be defined in either offsets or sizes arrays
-        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
-        >>> offsets = pa.array([0, None, 2])
-        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
-        <pyarrow.lib.LargeListViewArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          null,
-          [
-            3,
-            4
-          ]
-        ]
-        """
+    ) -> LargeListViewArray[_DataTypeT]: ...
+
     @property
-    def values(self) -> Array:
-        """
-        Return the underlying array of values which backs the LargeListArray
-        ignoring the array's offset.
-
-        The values array may be out of order and/or contain additional values
-        that are not found in the logical representation of the array. The only
-        guarantee is that each non-null value in the ListView Array is contiguous.
-
-        Compare with :meth:`flatten`, which returns only the non-null
-        values taking into consideration the array's order and offset.
-
-        Returns
-        -------
-        values : Array
-
-        See Also
-        --------
-        LargeListArray.flatten : ...
-
-        Examples
-        --------
-
-        The values include null elements from sub-lists:
-
-        >>> import pyarrow as pa
-        >>> values = [1, 2, None, 3, 4]
-        >>> offsets = [0, 0, 1]
-        >>> sizes = [2, 0, 4]
-        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
-        >>> array
-        <pyarrow.lib.LargeListViewArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          [],
-          [
-            2,
-            null,
-            3,
-            4
-          ]
-        ]
-        >>> array.values
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          1,
-          2,
-          null,
-          3,
-          4
-        ]
-        """
+    def values(self) -> Array: ...
+
     @property
-    def offsets(self) -> Int64Array:
-        """
-        Return the list view offsets as an int64 array.
-
-        The returned array will not have a validity bitmap, so you cannot
-        expect to pass it to `LargeListViewArray.from_arrays` and get back the
-        same list array if the original one has nulls.
-
-        Returns
-        -------
-        offsets : Int64Array
-
-        Examples
-        --------
-
-        >>> import pyarrow as pa
-        >>> values = [1, 2, None, 3, 4]
-        >>> offsets = [0, 0, 1]
-        >>> sizes = [2, 0, 4]
-        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
-        >>> array.offsets
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          0,
-          0,
-          1
-        ]
-        """
+    def offsets(self) -> Int64Array: ...
+
     @property
-    def sizes(self) -> Int64Array:
-        """
-        Return the list view sizes as an int64 array.
-
-        The returned array will not have a validity bitmap, so you cannot
-        expect to pass it to `LargeListViewArray.from_arrays` and get back the
-        same list array if the original one has nulls.
-
-        Returns
-        -------
-        sizes : Int64Array
-
-        Examples
-        --------
-
-        >>> import pyarrow as pa
-        >>> values = [1, 2, None, 3, 4]
-        >>> offsets = [0, 0, 1]
-        >>> sizes = [2, 0, 4]
-        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
-        >>> array.sizes
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          2,
-          0,
-          4
-        ]
-        """
+    def sizes(self) -> Int64Array: ...
+
 
 class FixedSizeListArray(BaseListArray[FixedSizeListScalar[_DataTypeT, _Size]]):
-    """
-    Concrete class for Arrow arrays of a fixed size list data type.
-    """
+
     @classmethod
     def from_arrays(
         cls,
@@ -2220,109 +550,18 @@ class FixedSizeListArray(BaseListArray[FixedSizeListScalar[_DataTypeT, _Size]]):
         *,
         type: None = None,
         mask: Mask | None = None,
-    ) -> FixedSizeListArray[_DataTypeT, _Size | None]:
-        """
-        Construct FixedSizeListArray from array of values and a list length.
-
-        Parameters
-        ----------
-        values : Array (any type)
-        list_size : int
-            The fixed length of the lists.
-        type : DataType, optional
-            If not specified, a default ListType with the values' type and
-            `list_size` length is used.
-        mask : Array (boolean type), optional
-            Indicate which values are null (True) or not null (False).
-
-
-        Returns
-        -------
-        FixedSizeListArray
-
-        Examples
-        --------
-
-        Create from a values array and a list size:
-
-        >>> import pyarrow as pa
-        >>> values = pa.array([1, 2, 3, 4])
-        >>> arr = pa.FixedSizeListArray.from_arrays(values, 2)
-        >>> arr
-        <pyarrow.lib.FixedSizeListArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          [
-            3,
-            4
-          ]
-        ]
-
-        Or create from a values array, list size and matching type:
-
-        >>> typ = pa.list_(pa.field("values", pa.int64()), 2)
-        >>> arr = pa.FixedSizeListArray.from_arrays(values, type=typ)
-        >>> arr
-        <pyarrow.lib.FixedSizeListArray object at ...>
-        [
-          [
-            1,
-            2
-          ],
-          [
-            3,
-            4
-          ]
-        ]
-        """
+    ) -> FixedSizeListArray[_DataTypeT, _Size | None]: ...
+
     @property
-    def values(self) -> BaseListArray[ListScalar[_DataTypeT]]:
-        """
-        Return the underlying array of values which backs the
-        FixedSizeListArray ignoring the array's offset.
-
-        Note even null elements are included.
-
-        Compare with :meth:`flatten`, which returns only the non-null
-        sub-list values.
-
-        Returns
-        -------
-        values : Array
-
-        See Also
-        --------
-        FixedSizeListArray.flatten : ...
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> array = pa.array(
-        ...     [[1, 2], None, [3, None]],
-        ...     type=pa.list_(pa.int32(), 2)
-        ... )
-        >>> array.values
-        <pyarrow.lib.Int32Array object at ...>
-        [
-          1,
-          2,
-          null,
-          null,
-          3,
-          null
-        ]
-        """
+    def values(self) -> BaseListArray[ListScalar[_DataTypeT]]: ...
+
 
 _MapKeyT = TypeVar("_MapKeyT", bound=_BasicDataType)
 _MapItemT = TypeVar("_MapItemT", bound=_BasicDataType)
 
+
 class MapArray(BaseListArray[MapScalar[_MapKeyT, _MapItemT]]):
-    """
-    Concrete class for Arrow arrays of a map data type.
-    """
+
     @classmethod
     def from_arrays(
         cls,
@@ -2334,159 +573,28 @@ class MapArray(BaseListArray[MapScalar[_MapKeyT, _MapItemT]]):
         type: MapType[_MapKeyT, _MapItemT] | None = None,
         pool: MemoryPool | None = None,
         mask: Mask | None = None,
-    ) -> MapArray[_MapKeyT, _MapItemT]:
-        """
-        Construct MapArray from arrays of int32 offsets and key, item arrays.
-
-        Parameters
-        ----------
-        offsets : array-like or sequence (int32 type)
-        keys : array-like or sequence (any type)
-        items : array-like or sequence (any type)
-        type : DataType, optional
-            If not specified, a default MapArray with the keys' and items' type is used.
-        pool : MemoryPool
-        mask : Array (boolean type), optional
-            Indicate which values are null (True) or not null (False).
-
-        Returns
-        -------
-        map_array : MapArray
-
-        Examples
-        --------
-        First, let's understand the structure of our dataset when viewed in a rectangular data model.
-        The total of 5 respondents answered the question "How much did you like the movie x?".
-        The value -1 in the integer array means that the value is missing. The boolean array
-        represents the null bitmask corresponding to the missing values in the integer array.
-
-        >>> import pyarrow as pa
-        >>> movies_rectangular = np.ma.masked_array([
-        ...     [10, -1, -1],
-        ...     [8, 4, 5],
-        ...     [-1, 10, 3],
-        ...     [-1, -1, -1],
-        ...     [-1, -1, -1]
-        ... ],
-        ... [
-        ...     [False, True, True],
-        ...     [False, False, False],
-        ...     [True, False, False],
-        ...     [True, True, True],
-        ...     [True, True, True],
-        ... ])
-
-        To represent the same data with the MapArray and from_arrays, the data is
-        formed like this:
-
-        >>> offsets = [
-        ...     0, #  -- row 1 start
-        ...     1, #  -- row 2 start
-        ...     4, #  -- row 3 start
-        ...     6, #  -- row 4 start
-        ...     6, #  -- row 5 start
-        ...     6, #  -- row 5 end
-        ... ]
-        >>> movies = [
-        ...     "Dark Knight", #  ---------------------------------- row 1
-        ...     "Dark Knight", "Meet the Parents", "Superman", #  -- row 2
-        ...     "Meet the Parents", "Superman", #  ----------------- row 3
-        ... ]
-        >>> likings = [
-        ...     10, #  -------- row 1
-        ...     8, 4, 5, #  --- row 2
-        ...     10, 3 #  ------ row 3
-        ... ]
-        >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
-        0                                  [(Dark Knight, 10)]
-        1    [(Dark Knight, 8), (Meet the Parents, 4), (Sup...
-        2              [(Meet the Parents, 10), (Superman, 3)]
-        3                                                   []
-        4                                                   []
-        dtype: object
-
-        If the data in the empty rows needs to be marked as missing, it's possible
-        to do so by modifying the offsets argument, so that we specify `None` as
-        the starting positions of the rows we want marked as missing. The end row
-        offset still has to refer to the existing value from keys (and values):
-
-        >>> offsets = [
-        ...     0, #  ----- row 1 start
-        ...     1, #  ----- row 2 start
-        ...     4, #  ----- row 3 start
-        ...     None, #  -- row 4 start
-        ...     None, #  -- row 5 start
-        ...     6, #  ----- row 5 end
-        ... ]
-        >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
-        0                                  [(Dark Knight, 10)]
-        1    [(Dark Knight, 8), (Meet the Parents, 4), (Sup...
-        2              [(Meet the Parents, 10), (Superman, 3)]
-        3                                                 None
-        4                                                 None
-        dtype: object
-        """
+    ) -> MapArray[_MapKeyT, _MapItemT]: ...
+
     @property
-    def keys(self) -> Array:
-        """
-        Flattened array of keys across all maps in array
-        """
+    def keys(self) -> Array: ...
+
     @property
-    def items(self) -> Array:
-        """
-        Flattened array of items across all maps in array
-        """
+    def items(self) -> Array: ...
+
 
 class UnionArray(Array[UnionScalar]):
-    """
-    Concrete class for Arrow arrays of a Union data type.
-    """
+
     @deprecated("Use fields() instead")
-    def child(self, pos: int) -> Field:
-        """
-        DEPRECATED, use field() instead.
-
-        Parameters
-        ----------
-        pos : int
-            The physical index of the union child field (not its type code).
-
-        Returns
-        -------
-        field : pyarrow.Field
-            The given child field.
-        """
-    def field(self, pos: int) -> Array:
-        """
-        Return the given child field as an individual array.
-
-        For sparse unions, the returned array has its offset, length,
-        and null count adjusted.
-
-        For dense unions, the returned array is unchanged.
-
-        Parameters
-        ----------
-        pos : int
-            The physical index of the union child field (not its type code).
-
-        Returns
-        -------
-        field : Array
-            The given child field.
-        """
+    def child(self, pos: int) -> Field: ...
+
+    def field(self, pos: int) -> Array: ...
+
     @property
-    def type_codes(self) -> Int8Array:
-        """
-        Get the type codes array.
-        """
+    def type_codes(self) -> Int8Array: ...
+
     @property
-    def offsets(self) -> Int32Array:
-        """
-        Get the value offsets array (dense arrays only).
+    def offsets(self) -> Int32Array: ...
 
-        Does not account for any slice offset.
-        """
     @staticmethod
     def from_dense(
         types: Int8Array,
@@ -2494,50 +602,19 @@ class UnionArray(Array[UnionScalar]):
         children: NullableCollection[Array],
         field_names: list[str] | None = None,
         type_codes: Int8Array | None = None,
-    ) -> UnionArray:
-        """
-        Construct dense UnionArray from arrays of int8 types, int32 offsets and
-        children arrays
-
-        Parameters
-        ----------
-        types : Array (int8 type)
-        value_offsets : Array (int32 type)
-        children : list
-        field_names : list
-        type_codes : list
-
-        Returns
-        -------
-        union_array : UnionArray
-        """
+    ) -> UnionArray: ...
+
     @staticmethod
     def from_sparse(
         types: Int8Array,
         children: NullableCollection[Array],
         field_names: list[str] | None = None,
         type_codes: Int8Array | None = None,
-    ) -> UnionArray:
-        """
-        Construct sparse UnionArray from arrays of int8 types and children
-        arrays
-
-        Parameters
-        ----------
-        types : Array (int8 type)
-        children : list
-        field_names : list
-        type_codes : list
-
-        Returns
-        -------
-        union_array : UnionArray
-        """
+    ) -> UnionArray: ...
+
 
 class StringArray(Array[StringScalar]):
-    """
-    Concrete class for Arrow arrays of string (or utf8) data type.
-    """
+
     @staticmethod
     def from_buffers(  # type: ignore[override]
         length: int,
@@ -2546,30 +623,11 @@ class StringArray(Array[StringScalar]):
         null_bitmap: Buffer | None = None,
         null_count: int | None = -1,
         offset: int | None = 0,
-    ) -> StringArray:
-        """
-        Construct a StringArray from value_offsets and data buffers.
-        If there are nulls in the data, also a null_bitmap and the matching
-        null_count must be passed.
-
-        Parameters
-        ----------
-        length : int
-        value_offsets : Buffer
-        data : Buffer
-        null_bitmap : Buffer, optional
-        null_count : int, default 0
-        offset : int, default 0
-
-        Returns
-        -------
-        string_array : StringArray
-        """
+    ) -> StringArray: ...
+
 
 class LargeStringArray(Array[LargeStringScalar]):
-    """
-    Concrete class for Arrow arrays of large string (or utf8) data type.
-    """
+
     @staticmethod
     def from_buffers(  # type: ignore[override]
         length: int,
@@ -2578,71 +636,39 @@ class LargeStringArray(Array[LargeStringScalar]):
         null_bitmap: Buffer | None = None,
         null_count: int | None = -1,
         offset: int | None = 0,
-    ) -> StringArray:
-        """
-        Construct a LargeStringArray from value_offsets and data buffers.
-        If there are nulls in the data, also a null_bitmap and the matching
-        null_count must be passed.
-
-        Parameters
-        ----------
-        length : int
-        value_offsets : Buffer
-        data : Buffer
-        null_bitmap : Buffer, optional
-        null_count : int, default 0
-        offset : int, default 0
-
-        Returns
-        -------
-        string_array : StringArray
-        """
+    ) -> StringArray: ...
+
 
 class StringViewArray(Array[StringViewScalar]):
-    """
-    Concrete class for Arrow arrays of string (or utf8) view data type.
-    """
+    ...
+
 
 class BinaryArray(Array[BinaryScalar]):
-    """
-    Concrete class for Arrow arrays of variable-sized binary data type.
-    """
+
     @property
-    def total_values_length(self) -> int:
-        """
-        The number of bytes from beginning to end of the data buffer addressed
-        by the offsets of this BinaryArray.
-        """
+    def total_values_length(self) -> int: ...
+
 
 class LargeBinaryArray(Array[LargeBinaryScalar]):
-    """
-    Concrete class for Arrow arrays of large variable-sized binary data type.
-    """
+
     @property
-    def total_values_length(self) -> int:
-        """
-        The number of bytes from beginning to end of the data buffer addressed
-        by the offsets of this LargeBinaryArray.
-        """
+    def total_values_length(self) -> int: ...
+
 
 class BinaryViewArray(Array[BinaryViewScalar]):
-    """
-    Concrete class for Arrow arrays of variable-sized binary view data type.
-    """
+    ...
+
 
 class DictionaryArray(Array[DictionaryScalar[_IndexT, _BasicValueT]]):
-    """
-    Concrete class for dictionary-encoded Arrow arrays.
-    """
+
     def dictionary_encode(self) -> Self: ...  # type: ignore[override]
-    def dictionary_decode(self) -> Array[Scalar[_BasicValueT]]:
-        """
-        Decodes the DictionaryArray to an Array.
-        """
+    def dictionary_decode(self) -> Array[Scalar[_BasicValueT]]: ...
+
     @property
     def indices(self) -> Array[Scalar[_IndexT]]: ...
     @property
     def dictionary(self) -> Array[Scalar[_BasicValueT]]: ...
+
     @staticmethod
     def from_buffers(  # type: ignore[override]
         type: _BasicValueT,
@@ -2651,30 +677,8 @@ class DictionaryArray(Array[DictionaryScalar[_IndexT, _BasicValueT]]):
         dictionary: Array | np.ndarray | pd.Series,
         null_count: int = -1,
         offset: int = 0,
-    ) -> DictionaryArray[Any, _BasicValueT]:
-        """
-        Construct a DictionaryArray from buffers.
-
-        Parameters
-        ----------
-        type : pyarrow.DataType
-        length : int
-            The number of values in the array.
-        buffers : List[Buffer]
-            The buffers backing the indices array.
-        dictionary : pyarrow.Array, ndarray or pandas.Series
-            The array of values referenced by the indices.
-        null_count : int, default -1
-            The number of null entries in the indices array. Negative value means that
-            the null count is not known.
-        offset : int, default 0
-            The array's logical offset (in values, not in bytes) from the
-            start of each buffer.
-
-        Returns
-        -------
-        dict_array : DictionaryArray
-        """
+    ) -> DictionaryArray[Any, _BasicValueT]: ...
+
     @staticmethod
     def from_arrays(
         indices: Indices,
@@ -2684,64 +688,15 @@ class DictionaryArray(Array[DictionaryScalar[_IndexT, _BasicValueT]]):
         from_pandas: bool = False,
         safe: bool = True,
         memory_pool: MemoryPool | None = None,
-    ) -> DictionaryArray:
-        """
-        Construct a DictionaryArray from indices and values.
-
-        Parameters
-        ----------
-        indices : pyarrow.Array, numpy.ndarray or pandas.Series, int type
-            Non-negative integers referencing the dictionary values by zero
-            based index.
-        dictionary : pyarrow.Array, ndarray or pandas.Series
-            The array of values referenced by the indices.
-        mask : ndarray or pandas.Series, bool type
-            True values indicate that indices are actually null.
-        ordered : bool, default False
-            Set to True if the category values are ordered.
-        from_pandas : bool, default False
-            If True, the indices should be treated as though they originated in
-            a pandas.Categorical (null encoded as -1).
-        safe : bool, default True
-            If True, check that the dictionary indices are in range.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise uses default pool.
-
-        Returns
-        -------
-        dict_array : DictionaryArray
-        """
+    ) -> DictionaryArray: ...
+
 
 class StructArray(Array[StructScalar]):
-    """
-    Concrete class for Arrow arrays of a struct data type.
-    """
-    def field(self, index: int | str) -> Array:
-        """
-        Retrieves the child array belonging to field.
-
-        Parameters
-        ----------
-        index : Union[int, str]
-            Index / position or name of the field.
-
-        Returns
-        -------
-        result : Array
-        """
-    def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]:
-        """
-        Return one individual array for each field in the struct.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool.
-
-        Returns
-        -------
-        result : List[Array]
-        """
+
+    def field(self, index: int | str) -> Array: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]: ...
+
     @staticmethod
     def from_arrays(
         arrays: Iterable[Array],
@@ -2750,78 +705,21 @@ class StructArray(Array[StructScalar]):
         mask=None,
         memory_pool: MemoryPool | None = None,
         type: StructType | None = None,
-    ) -> StructArray:
-        """
-        Construct StructArray from collection of arrays representing
-        each field in the struct.
-
-        Either field names, field instances or a struct type must be passed.
-
-        Parameters
-        ----------
-        arrays : sequence of Array
-        names : List[str] (optional)
-            Field names for each struct child.
-        fields : List[Field] (optional)
-            Field instances for each struct child.
-        mask : pyarrow.Array[bool] (optional)
-            Indicate which values are null (True) or not null (False).
-        memory_pool : MemoryPool (optional)
-            For memory allocations, if required, otherwise uses default pool.
-        type : pyarrow.StructType (optional)
-            Struct type for name and type of each child.
-
-        Returns
-        -------
-        result : StructArray
-        """
-    def sort(self, order: Order = "ascending", by: str | None = None, **kwargs) -> StructArray:
-        """
-        Sort the StructArray
-
-        Parameters
-        ----------
-        order : str, default "ascending"
-            Which order to sort values in.
-            Accepted values are "ascending", "descending".
-        by : str or None, default None
-            If to sort the array by one of its fields
-            or by the whole array.
-        **kwargs : dict, optional
-            Additional sorting options.
-            As allowed by :class:`SortOptions`
-
-        Returns
-        -------
-        result : StructArray
-        """
+    ) -> StructArray: ...
+
+    def sort(self, order: Order = "ascending", by: str |
+             None = None, **kwargs) -> StructArray: ...
+
 
 class RunEndEncodedArray(Array[RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
-    """
-    Concrete class for Arrow run-end encoded arrays.
-    """
+
     @staticmethod
     def from_arrays(
         run_ends: Int16Array | Int32Array | Int64Array,
         values: Array,
         type: DataType | None = None,
-    ) -> RunEndEncodedArray[Int16Type | Int32Type | Int64Type, _BasicValueT]:  # type: ignore[type-var]
-        """
-        Construct RunEndEncodedArray from run_ends and values arrays.
-
-        Parameters
-        ----------
-        run_ends : Array (int16, int32, or int64 type)
-            The run_ends array.
-        values : Array (any type)
-            The values array.
-        type : pyarrow.DataType, optional
-            The run_end_encoded(run_end_type, value_type) array type.
-
-        Returns
-        -------
-        RunEndEncodedArray
-        """
+    ) -> RunEndEncodedArray[Int16Type | Int32Type | Int64Type, _BasicValueT]: ...  # type: ignore[type-var]
+
     @staticmethod
     def from_buffers(  # type: ignore[override]
         type: DataType,
@@ -2830,424 +728,72 @@ class RunEndEncodedArray(Array[RunEndEncodedScalar[_RunEndType, _BasicValueT]]):
         null_count: int = -1,
         offset=0,
         children: tuple[Array, Array] | None = None,
-    ) -> RunEndEncodedArray[Any, _BasicValueT]:
-        """
-        Construct a RunEndEncodedArray from all the parameters that make up an
-        Array.
-
-        RunEndEncodedArrays do not have buffers, only children arrays, but this
-        implementation is needed to satisfy the Array interface.
-
-        Parameters
-        ----------
-        type : DataType
-            The run_end_encoded(run_end_type, value_type) type.
-        length : int
-            The logical length of the run-end encoded array. Expected to match
-            the last value of the run_ends array (children[0]) minus the offset.
-        buffers : List[Buffer]
-            Empty List or [None].
-        null_count : int, default -1
-            The number of null entries in the array. Run-end encoded arrays
-            are specified to not have valid bits and null_count always equals 0.
-        offset : int, default 0
-            The array's logical offset (in values, not in bytes) from the
-            start of each buffer.
-        children : List[Array]
-            Nested type children containing the run_ends and values arrays.
-
-        Returns
-        -------
-        RunEndEncodedArray
-        """
-    @property
-    def run_ends(self) -> Array[Scalar[_RunEndType]]:
-        """
-        An array holding the logical indexes of each run-end.
+    ) -> RunEndEncodedArray[Any, _BasicValueT]: ...
 
-        The physical offset to the array is applied.
-        """
     @property
-    def values(self) -> Array[Scalar[_BasicValueT]]:
-        """
-        An array holding the values of each run.
-
-        The physical offset to the array is applied.
-        """
-    def find_physical_offset(self) -> int:
-        """
-        Find the physical offset of this REE array.
+    def run_ends(self) -> Array[Scalar[_RunEndType]]: ...
 
-        This is the offset of the run that contains the value of the first
-        logical element of this array considering its offset.
+    @property
+    def values(self) -> Array[Scalar[_BasicValueT]]: ...
 
-        This function uses binary-search, so it has a O(log N) cost.
-        """
-    def find_physical_length(self) -> int:
-        """
-        Find the physical length of this REE array.
+    def find_physical_offset(self) -> int: ...
 
-        The physical length of an REE is the number of physical values (and
-        run-ends) necessary to represent the logical range of values from offset
-        to length.
+    def find_physical_length(self) -> int: ...
 
-        This function uses binary-search, so it has a O(log N) cost.
-        """
 
 _ArrayT = TypeVar("_ArrayT", bound=Array)
 
+
 class ExtensionArray(Array[ExtensionScalar], Generic[_ArrayT]):
-    """
-    Concrete class for Arrow extension arrays.
-    """
+
     @property
     def storage(self) -> Any: ...
+
     @staticmethod
-    def from_storage(typ: BaseExtensionType, storage: _ArrayT) -> ExtensionArray[_ArrayT]:
-        """
-        Construct ExtensionArray from type and storage array.
-
-        Parameters
-        ----------
-        typ : DataType
-            The extension type for the result array.
-        storage : Array
-            The underlying storage for the result array.
-
-        Returns
-        -------
-        ext_array : ExtensionArray
-        """
+    def from_storage(typ: BaseExtensionType,
+                     storage: _ArrayT) -> ExtensionArray[_ArrayT]: ...
+
 
 class JsonArray(ExtensionArray[_ArrayT]):
-    """
-    Concrete class for Arrow arrays of JSON data type.
-
-    This does not guarantee that the JSON data actually
-    is valid JSON.
-
-    Examples
-    --------
-    Define the extension type for JSON array
-
-    >>> import pyarrow as pa
-    >>> json_type = pa.json_(pa.large_utf8())
-
-    Create an extension array
-
-    >>> arr = [None, '{ "id":30, "values":["a", "b"] }']
-    >>> storage = pa.array(arr, pa.large_utf8())
-    >>> pa.ExtensionArray.from_storage(json_type, storage)
-    <pyarrow.lib.JsonArray object at ...>
-    [
-      null,
-      "{ "id":30, "values":["a", "b"] }"
-    ]
-    """
-    """
-    Concrete class for Arrow arrays of JSON data type.
-
-    This does not guarantee that the JSON data actually
-    is valid JSON.
-
-    Examples
-    --------
-    Define the extension type for JSON array
-
-    >>> import pyarrow as pa
-    >>> json_type = pa.json_(pa.large_utf8())
-
-    Create an extension array
-
-    >>> arr = [None, '{ "id":30, "values":["a", "b"] }']
-    >>> storage = pa.array(arr, pa.large_utf8())
-    >>> pa.ExtensionArray.from_storage(json_type, storage)
-    <pyarrow.lib.JsonArray object at ...>
-    [
-      null,
-      "{ "id":30, "values":["a", "b"] }"
-    ]
-    """
+    ...
+
 
 class UuidArray(ExtensionArray[_ArrayT]):
-    """
-    Concrete class for Arrow arrays of UUID data type.
-    """
+    ...
+
 
 class FixedShapeTensorArray(ExtensionArray[_ArrayT]):
-    """
-    Concrete class for fixed shape tensor extension arrays.
-
-    Examples
-    --------
-    Define the extension type for tensor array
-
-    >>> import pyarrow as pa
-    >>> tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
-
-    Create an extension array
-
-    >>> arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
-    >>> storage = pa.array(arr, pa.list_(pa.int32(), 4))
-    >>> pa.ExtensionArray.from_storage(tensor_type, storage)
-    <pyarrow.lib.FixedShapeTensorArray object at ...>
-    [
-      [
-        1,
-        2,
-        3,
-        4
-      ],
-      [
-        10,
-        20,
-        30,
-        40
-      ],
-      [
-        100,
-        200,
-        300,
-        400
-      ]
-    ]
-    """
-
-    def to_numpy_ndarray(self) -> np.ndarray:
-        """
-        Convert fixed shape tensor extension array to a multi-dimensional numpy.ndarray.
-
-        The resulting ndarray will have (ndim + 1) dimensions.
-        The size of the first dimension will be the length of the fixed shape tensor array
-        and the rest of the dimensions will match the permuted shape of the fixed
-        shape tensor.
-
-        The conversion is zero-copy.
-
-        Returns
-        -------
-        numpy.ndarray
-            Ndarray representing tensors in the fixed shape tensor array concatenated
-            along the first dimension.
-        """
-    def to_tensor(self) -> Tensor:
-        """
-        Convert fixed shape tensor extension array to a pyarrow.Tensor.
-
-        The resulting Tensor will have (ndim + 1) dimensions.
-        The size of the first dimension will be the length of the fixed shape tensor array
-        and the rest of the dimensions will match the permuted shape of the fixed
-        shape tensor.
-
-        The conversion is zero-copy.
-
-        Returns
-        -------
-        pyarrow.Tensor
-            Tensor representing tensors in the fixed shape tensor array concatenated
-            along the first dimension.
-        """
+
+    def to_numpy_ndarray(self) -> np.ndarray: ...
+
+    def to_tensor(self) -> Tensor: ...
 
     @classmethod
-    def from_numpy_ndarray(cls, obj: np.ndarray) -> Self:
-        """
-        Convert numpy tensors (ndarrays) to a fixed shape tensor extension array.
-        The first dimension of ndarray will become the length of the fixed
-        shape tensor array.
-        If input array data is not contiguous a copy will be made.
-
-        Parameters
-        ----------
-        obj : numpy.ndarray
-        dim_names : tuple or list of strings, default None
-            Explicit names to tensor dimensions.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> arr = np.array(
-        ...         [[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]],
-        ...         dtype=np.float32)
-        >>> pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
-        <pyarrow.lib.FixedShapeTensorArray object at ...>
-        [
-          [
-            1,
-            2,
-            3,
-            4,
-            5,
-            6
-          ],
-          [
-            1,
-            2,
-            3,
-            4,
-            5,
-            6
-          ]
-        ]
-        """
+    def from_numpy_ndarray(cls, obj: np.ndarray) -> Self: ...
+
 
 class OpaqueArray(ExtensionArray[_ArrayT]):
-    """
-    Concrete class for opaque extension arrays.
-
-    Examples
-    --------
-    Define the extension type for an opaque array
-
-    >>> import pyarrow as pa
-    >>> opaque_type = pa.opaque(
-    ...     pa.binary(),
-    ...     type_name="geometry",
-    ...     vendor_name="postgis",
-    ... )
-
-    Create an extension array
-
-    >>> arr = [None, b"data"]
-    >>> storage = pa.array(arr, pa.binary())
-    >>> pa.ExtensionArray.from_storage(opaque_type, storage)
-    <pyarrow.lib.OpaqueArray object at ...>
-    [
-      null,
-      64617461
-    ]
-    """
+    ...
+
 
 class Bool8Array(ExtensionArray):
-    """
-    Concrete class for bool8 extension arrays.
-
-    Examples
-    --------
-    Define the extension type for an bool8 array
-
-    >>> import pyarrow as pa
-    >>> bool8_type = pa.bool8()
-
-    Create an extension array
-
-    >>> arr = [-1, 0, 1, 2, None]
-    >>> storage = pa.array(arr, pa.int8())
-    >>> pa.ExtensionArray.from_storage(bool8_type, storage)
-    <pyarrow.lib.Bool8Array object at ...>
-    [
-      -1,
-      0,
-      1,
-      2,
-      null
-    ]
-    """
-
-    def to_numpy(self, zero_copy_only: bool = ..., writable: bool = ...) -> np.ndarray:
-        """
-        Return a NumPy bool view or copy of this array.
-
-        By default, tries to return a view of this array. This is only
-        supported for arrays without any nulls.
-
-        Parameters
-        ----------
-        zero_copy_only : bool, default True
-            If True, an exception will be raised if the conversion to a numpy
-            array would require copying the underlying data (e.g. in presence
-            of nulls).
-        writable : bool, default False
-            For numpy arrays created with zero copy (view on the Arrow data),
-            the resulting array is not writable (Arrow data is immutable).
-            By setting this to True, a copy of the array is made to ensure
-            it is writable.
-
-        Returns
-        -------
-        array : numpy.ndarray
-        """
+
+    def to_numpy(self, zero_copy_only: bool = ...,
+                 writable: bool = ...) -> np.ndarray: ...
+
     @classmethod
-    def from_storage(cls, storage: Int8Array) -> Self:  # type: ignore[override]
-        """
-        Construct Bool8Array from Int8Array storage.
-
-        Parameters
-        ----------
-        storage : Int8Array
-            The underlying storage for the result array.
-
-        Returns
-        -------
-        bool8_array : Bool8Array
-        """
+    def from_storage(cls, storage: Int8Array) -> Self: ...  # type: ignore[override]
+
     @classmethod
-    def from_numpy(cls, obj: np.ndarray) -> Self:
-        """
-        Convert numpy array to a bool8 extension array without making a copy.
-        The input array must be 1-dimensional, with either bool_ or int8 dtype.
-
-        Parameters
-        ----------
-        obj : numpy.ndarray
-
-        Returns
-        -------
-        bool8_array : Bool8Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> arr = np.array([True, False, True], dtype=np.bool_)
-        >>> pa.Bool8Array.from_numpy(arr)
-        <pyarrow.lib.Bool8Array object at ...>
-        [
-          1,
-          0,
-          1
-        ]
-        """
-
-def concat_arrays(arrays: Iterable[_ArrayT], memory_pool: MemoryPool | None = None) -> _ArrayT:
-    """
-    Concatenate the given arrays.
-
-    The contents of the input arrays are copied into the returned array.
-
-    Raises
-    ------
-    ArrowInvalid
-        If not all of the arrays have the same type.
-
-    Parameters
-    ----------
-    arrays : iterable of pyarrow.Array
-        Arrays to concatenate, must be identically typed.
-    memory_pool : MemoryPool, default None
-        For memory allocations. If None, the default pool is used.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> arr1 = pa.array([2, 4, 5, 100])
-    >>> arr2 = pa.array([2, 4])
-    >>> pa.concat_arrays([arr1, arr2])
-    <pyarrow.lib.Int64Array object at ...>
-    [
-      2,
-      4,
-      5,
-      100,
-      2,
-      4
-    ]
-    """
-
-def _empty_array(type: _DataTypeT) -> Array[Scalar[_DataTypeT]]:
-    """
-    Create empty array of the given type.
-    """
+    def from_numpy(cls, obj: np.ndarray) -> Self: ...
+
+
+def concat_arrays(arrays: Iterable[_ArrayT],
+                  memory_pool: MemoryPool | None = None) -> _ArrayT: ...
+
+
+def _empty_array(type: _DataTypeT) -> Array[Scalar[_DataTypeT]]: ...
+
 
 __all__ = [
     "array",
diff --git a/python/pyarrow-stubs/builder.pyi b/python/pyarrow-stubs/builder.pyi
index 39372f8e512..c379bd83afb 100644
--- a/python/pyarrow-stubs/builder.pyi
+++ b/python/pyarrow-stubs/builder.pyi
@@ -21,86 +21,33 @@ from pyarrow.lib import MemoryPool, _Weakrefable
 
 from .array import StringArray, StringViewArray
 
+
 class StringBuilder(_Weakrefable):
-    """
-    Builder class for UTF8 strings.
 
-    This class exposes facilities for incrementally adding string values and
-    building the null bitmap for a pyarrow.Array (type='string').
-    """
     def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
-    def append(self, value: str | bytes | None):
-        """
-        Append a single value to the builder.
-
-        The value can either be a string/bytes object or a null value
-        (np.nan or None).
-
-        Parameters
-        ----------
-        value : string/bytes or np.nan/None
-            The value to append to the string array builder.
-        """
-    def append_values(self, values: Iterable[str | bytes | None]):
-        """
-        Append all the values from an iterable.
-
-        Parameters
-        ----------
-        values : iterable of string/bytes or np.nan/None values
-            The values to append to the string array builder.
-        """
-    def finish(self) -> StringArray:
-        """
-        Return result of builder as an Array object; also resets the builder.
-
-        Returns
-        -------
-        array : pyarrow.Array
-        """
+    def append(self, value: str | bytes | None): ...
+
+    def append_values(self, values: Iterable[str | bytes | None]): ...
+
+    def finish(self) -> StringArray: ...
+
     @property
     def null_count(self) -> int: ...
     def __len__(self) -> int: ...
 
+
 class StringViewBuilder(_Weakrefable):
-    """
-    Builder class for UTF8 string views.
 
-    This class exposes facilities for incrementally adding string values and
-    building the null bitmap for a pyarrow.Array (type='string_view').
-    """
     def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
-    def append(self, value: str | bytes | None):
-        """
-        Append a single value to the builder.
-
-        The value can either be a string/bytes object or a null value
-        (np.nan or None).
-
-        Parameters
-        ----------
-        value : string/bytes or np.nan/None
-            The value to append to the string array builder.
-        """
-    def append_values(self, values: Iterable[str | bytes | None]):
-        """
-        Append all the values from an iterable.
-
-        Parameters
-        ----------
-        values : iterable of string/bytes or np.nan/None values
-            The values to append to the string array builder.
-        """
-    def finish(self) -> StringViewArray:
-        """
-        Return result of builder as an Array object; also resets the builder.
-
-        Returns
-        -------
-        array : pyarrow.Array
-        """
+    def append(self, value: str | bytes | None): ...
+
+    def append_values(self, values: Iterable[str | bytes | None]): ...
+
+    def finish(self) -> StringViewArray: ...
+
     @property
     def null_count(self) -> int: ...
     def __len__(self) -> int: ...
 
+
 __all__ = ["StringBuilder", "StringViewBuilder"]
diff --git a/python/pyarrow-stubs/compute.pyi b/python/pyarrow-stubs/compute.pyi
index dcedb34b14a..235e8ffc34d 100644
--- a/python/pyarrow-stubs/compute.pyi
+++ b/python/pyarrow-stubs/compute.pyi
@@ -111,67 +111,11 @@ from . import lib
 _P = ParamSpec("_P")
 _R = TypeVar("_R")
 
-def field(*name_or_index: str | tuple[str, ...] | int) -> Expression:
-    """Reference a column of the dataset.
-
-    Stores only the field's name. Type and other information is known only when
-    the expression is bound to a dataset having an explicit scheme.
-
-    Nested references are allowed by passing multiple names or a tuple of
-    names. For example ``('foo', 'bar')`` references the field named "bar"
-    inside the field named "foo".
-
-    Parameters
-    ----------
-    *name_or_index : string, multiple strings, tuple or int
-        The name or index of the (possibly nested) field the expression
-        references to.
-
-    Returns
-    -------
-    field_expr : Expression
-        Reference to the given field
-
-    Examples
-    --------
-    >>> import pyarrow.compute as pc
-    >>> pc.field("a")
-    <pyarrow.compute.Expression a>
-    >>> pc.field(1)
-    <pyarrow.compute.Expression FieldPath(1)>
-    >>> pc.field(("a", "b"))
-    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
-    >>> pc.field("a", "b")
-    <pyarrow.compute.Expression FieldRef.Nested(FieldRef.Name(a) ...
-    """
-
-def scalar(value: bool | float | str) -> Expression:
-    """Expression representing a scalar value.
-
-    Creates an Expression object representing a scalar value that can be used
-    in compute expressions and predicates.
-
-    Parameters
-    ----------
-    value : bool, int, float or string
-        Python value of the scalar. This function accepts any value that can be
-        converted to a ``pyarrow.Scalar`` using ``pa.scalar()``.
-
-    Notes
-    -----
-    This function differs from ``pyarrow.scalar()`` in the following way:
-
-    * ``pyarrow.scalar()`` creates a ``pyarrow.Scalar`` object that represents
-      a single value in Arrow's memory model.
-    * ``pyarrow.compute.scalar()`` creates an ``Expression`` object representing
-      a scalar value that can be used in compute expressions, predicates, and
-      dataset filtering operations.
-
-    Returns
-    -------
-    scalar_expr : Expression
-        An Expression representing the scalar value
-    """
+def field(*name_or_index: str | tuple[str, ...] | int) -> Expression: ...
+
+
+def scalar(value: bool | float | str) -> Expression: ...
+
 
 def _clone_signature(f: Callable[_P, _R]) -> Callable[_P, _R]: ...
 
@@ -274,53 +218,10 @@ def all(
     min_count: int = 1,
     options: ScalarAggregateOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar:
-    """
-    Test whether all elements in a boolean array evaluate to true.
-
-    Null values are ignored by default.
-    If the `skip_nulls` option is set to false, then Kleene logic is used.
-    See "kleene_and" for more details on Kleene logic.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.BooleanScalar: ...
+
 
 any = _clone_signature(all)
-"""
-Test whether any element in a boolean array evaluates to true.
-
-Null values are ignored by default.
-If the `skip_nulls` option is set to false, then Kleene logic is used.
-See "kleene_or" for more details on Kleene logic.
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def approximate_median(
     array: NumericScalar | NumericArray,
@@ -330,28 +231,8 @@ def approximate_median(
     min_count: int = 1,
     options: ScalarAggregateOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleScalar:
-    """
-    Approximate median of a numeric array with T-Digest algorithm.
-
-    Nulls and NaNs are ignored.
-    A null scalar is returned if there is no valid data point.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.DoubleScalar: ...
+
 
 def count(
     array: lib.Array | lib.ChunkedArray,
@@ -360,25 +241,8 @@ def count(
     *,
     options: CountOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar:
-    """
-    Count the number of null / non-null values.
-
-    By default, only non-null values are counted.
-    This can be changed through CountOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    mode : str, default "only_valid"
-        Which values to count in the input.
-        Accepted values are "only_valid", "only_null", "all".
-    options : pyarrow.compute.CountOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Scalar: ...
+
 
 def count_distinct(
     array: lib.Array | lib.ChunkedArray,
@@ -387,25 +251,8 @@ def count_distinct(
     *,
     options: CountOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar:
-    """
-    Count the number of unique values.
-
-    By default, only non-null values are counted.
-    This can be changed through CountOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    mode : str, default "only_valid"
-        Which values to count in the input.
-        Accepted values are "only_valid", "only_null", "all".
-    options : pyarrow.compute.CountOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Scalar: ...
+
 
 def first(
     array: lib.Array[_ScalarT] | lib.ChunkedArray[_ScalarT],
@@ -415,29 +262,8 @@ def first(
     min_count: int = 1,
     options: ScalarAggregateOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _ScalarT:
-    """
-    Compute the first value in each group.
-
-    Null values are ignored by default.
-    If skip_nulls = false, then this will return the first and last values
-    regardless if it is null
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _ScalarT: ...
+
 
 def first_last(
     array: lib.Array[Any] | lib.ChunkedArray[Any],
@@ -447,29 +273,8 @@ def first_last(
     min_count: int = 1,
     options: ScalarAggregateOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructScalar:
-    """
-    Compute the first and last values of an array.
-
-    Null values are ignored by default.
-    If skip_nulls = false, then this will return the first and last values
-    regardless if it is null
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.StructScalar: ...
+
 
 def index(
     data: lib.Array[Any] | lib.ChunkedArray[Any],
@@ -478,139 +283,13 @@ def index(
     end: int | None = None,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar:
-    """
-    Find the index of the first occurrence of a given value.
-
-    Parameters
-    ----------
-    data : Array-like
-    value : Scalar-like object
-        The value to search for.
-    start : int, optional
-    end : int, optional
-    memory_pool : MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Returns
-    -------
-    index : int
-        the index, or -1 if not found
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>> arr = pa.array(["Lorem", "ipsum", "dolor", "sit", "Lorem", "ipsum"])
-    >>> pc.index(arr, "ipsum")
-    <pyarrow.Int64Scalar: 1>
-    >>> pc.index(arr, "ipsum", start=2)
-    <pyarrow.Int64Scalar: 5>
-    >>> pc.index(arr, "amet")
-    <pyarrow.Int64Scalar: -1>
-    """
+) -> lib.Int64Scalar: ...
+
 
 last = _clone_signature(first)
-"""
-Compute the first and last values of an array.
-
-Null values are ignored by default.
-If skip_nulls = false, then this will return the first and last values
-regardless if it is null
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-In [15]: print(pc.last.__doc__)
-Compute the first value in each group.
-
-Null values are ignored by default.
-If skip_nulls = false, then this will return the first and last values
-regardless if it is null
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 max = _clone_signature(first)
-"""
-Compute the minimum or maximum values of a numeric array.
-
-Null values are ignored by default.
-This can be changed through ScalarAggregateOptions.
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 min = _clone_signature(first)
-"""
-Compute the minimum or maximum values of a numeric array.
-
-Null values are ignored by default.
-This can be changed through ScalarAggregateOptions.
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 min_max = _clone_signature(first_last)
-"""
-Compute the minimum and maximum values of a numeric array.
-
-Null values are ignored by default.
-This can be changed through ScalarAggregateOptions.
-
-Parameters
-----------
-array : Array-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-min_count : int, default 1
-    Minimum number of non-null values in the input.  If the number
-    of non-null values is below `min_count`, the output is null.
-options : pyarrow.compute.ScalarAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def mean(
     array: FloatScalar | FloatArray
@@ -623,33 +302,8 @@ def mean(
     min_count: int = 1,
     options: ScalarAggregateOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Scalar[Any]:
-    """
-    Compute the mean of a numeric array.
-
-    Null values are ignored by default. Minimum count of non-null
-    values can be set and null is returned if too few are present.
-    This can be changed through ScalarAggregateOptions.
-    The result is a double for integer and floating point arguments,
-    and a decimal with the same bit-width/precision/scale for decimal arguments.
-    For integers and floats, NaN is returned if min_count = 0 and
-    there are no values. For decimals, null is returned instead.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Scalar[Any]: ...
+
 
 def mode(
     array: NumericScalar | NumericArray,
@@ -660,46 +314,8 @@ def mode(
     min_count: int = 0,
     options: ModeOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructArray:
-    """
-    Compute the modal (most common) values of a numeric array.
-
-    Compute the n most common values and their respective occurrence counts.
-    The output has type `struct<mode: T, count: int64>`, where T is the
-    input type.
-    The results are ordered by descending `count` first, and ascending `mode`
-    when breaking ties.
-    Nulls are ignored.  If there are no non-null values in the array,
-    an empty array is returned.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    n : int, default 1
-        Number of distinct most-common values to return.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ModeOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>> arr = pa.array([1, 1, 2, 2, 3, 2, 2, 2])
-    >>> modes = pc.mode(arr, 2)
-    >>> modes[0]
-    <pyarrow.StructScalar: [('mode', 2), ('count', 5)]>
-    >>> modes[1]
-    <pyarrow.StructScalar: [('mode', 1), ('count', 2)]>
-    """
+) -> lib.StructArray: ...
+
 
 def product(
     array: _ScalarT | lib.NumericArray[_ScalarT],
@@ -709,29 +325,8 @@ def product(
     min_count: int = 1,
     options: ScalarAggregateOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _ScalarT:
-    """
-    Compute the product of values in a numeric array.
-
-    Null values are ignored by default. Minimum count of non-null
-    values can be set and null is returned if too few are present.
-    This can be changed through ScalarAggregateOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _ScalarT: ...
+
 
 def quantile(
     array: NumericScalar | NumericArray,
@@ -743,43 +338,8 @@ def quantile(
     min_count: int = 0,
     options: QuantileOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleArray:
-    """
-    Compute an array of quantiles of a numeric array or chunked array.
-
-    By default, 0.5 quantile (median) is returned.
-    If quantile lies between two data points, an interpolated value is
-    returned based on selected interpolation method.
-    Nulls and NaNs are ignored.
-    An array of nulls is returned if there is no valid data point.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    q : double or sequence of double, default 0.5
-        Probability levels of the quantiles to compute. All values must be in
-        [0, 1].
-    interpolation : str, default "linear"
-        How to break ties between competing data points for a given quantile.
-        Accepted values are:
-
-        - "linear": compute an interpolation
-        - "lower": always use the smallest of the two data points
-        - "higher": always use the largest of the two data points
-        - "nearest": select the data point that is closest to the quantile
-        - "midpoint": compute the (unweighted) mean of the two data points
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.QuantileOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.DoubleArray: ...
+
 
 def stddev(
     array: NumericScalar | NumericArray,
@@ -790,32 +350,8 @@ def stddev(
     min_count: int = 0,
     options: VarianceOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleScalar:
-    """
-    Calculate the standard deviation of a numeric array.
-
-    The number of degrees of freedom can be controlled using VarianceOptions.
-    By default (`ddof` = 0), the population standard deviation is calculated.
-    Nulls are ignored.  If there are not enough non-null values in the array
-    to satisfy `ddof`, null is returned.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    ddof : int, default 0
-        Number of degrees of freedom.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.VarianceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.DoubleScalar: ...
+
 
 def sum(
     array: _NumericScalarT | NumericArray[_NumericScalarT],
@@ -825,29 +361,8 @@ def sum(
     min_count: int = 1,
     options: ScalarAggregateOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT:
-    """
-    Compute the sum of a numeric array.
-
-    Null values are ignored by default. Minimum count of non-null
-    values can be set and null is returned if too few are present.
-    This can be changed through ScalarAggregateOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 1
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.ScalarAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _NumericScalarT: ...
+
 
 def tdigest(
     array: NumericScalar | NumericArray,
@@ -860,37 +375,8 @@ def tdigest(
     min_count: int = 0,
     options: TDigestOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleArray:
-    """
-    Approximate quantiles of a numeric array with T-Digest algorithm.
-
-    By default, 0.5 quantile (median) is returned.
-    Nulls and NaNs are ignored.
-    An array of nulls is returned if there is no valid data point.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    q : double or sequence of double, default 0.5
-        Probability levels of the quantiles to approximate. All values must be
-        in [0, 1].
-    delta : int, default 100
-        Compression parameter for the T-digest algorithm.
-    buffer_size : int, default 500
-        Buffer size for the T-digest algorithm.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.TDigestOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    """
+) -> lib.DoubleArray: ...
+
 
 def variance(
     array: NumericScalar | NumericArray,
@@ -901,32 +387,8 @@ def variance(
     min_count: int = 0,
     options: VarianceOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleScalar:
-    """
-    Calculate the variance of a numeric array.
-
-    The number of degrees of freedom can be controlled using VarianceOptions.
-    By default (`ddof` = 0), the population variance is calculated.
-    Nulls are ignored.  If there are not enough non-null values in the array
-    to satisfy `ddof`, null is returned.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    ddof : int, default 0
-        Number of degrees of freedom.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    min_count : int, default 0
-        Minimum number of non-null values in the input.  If the number
-        of non-null values is below `min_count`, the output is null.
-    options : pyarrow.compute.VarianceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.DoubleScalar: ...
+
 
 def top_k_unstable(
     values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
@@ -934,43 +396,8 @@ def top_k_unstable(
     sort_keys: list | None = None,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array:
-    """
-    Select the indices of the top-k ordered elements from array- or table-like
-    data.
-
-    This is a specialization for :func:`select_k_unstable`. Output is not
-    guaranteed to be stable.
-
-    Parameters
-    ----------
-    values : Array, ChunkedArray, RecordBatch, or Table
-        Data to sort and get top indices from.
-    k : int
-        The number of `k` elements to keep.
-    sort_keys : List-like
-        Column key names to order by when input is table-like data.
-    memory_pool : MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Returns
-    -------
-    result : Array
-        Indices of the top-k ordered elements
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
-    >>> pc.top_k_unstable(arr, k=3)
-    <pyarrow.lib.UInt64Array object at ...>
-    [
-      5,
-      4,
-      2
-    ]
-    """
+) -> lib.Array: ...
+
 
 def bottom_k_unstable(
     values: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table,
@@ -978,79 +405,18 @@ def bottom_k_unstable(
     sort_keys: list | None = None,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Array:
-    """
-    Select the indices of the bottom-k ordered elements from
-    array- or table-like data.
-
-    This is a specialization for :func:`select_k_unstable`. Output is not
-    guaranteed to be stable.
-
-    Parameters
-    ----------
-    values : Array, ChunkedArray, RecordBatch, or Table
-        Data to sort and get bottom indices from.
-    k : int
-        The number of `k` elements to keep.
-    sort_keys : List-like
-        Column key names to order by when input is table-like data.
-    memory_pool : MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    Returns
-    -------
-    result : Array of indices
-        Indices of the bottom-k ordered elements
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import pyarrow.compute as pc
-    >>> arr = pa.array(["a", "b", "c", None, "e", "f"])
-    >>> pc.bottom_k_unstable(arr, k=3)
-    <pyarrow.lib.UInt64Array object at ...>
-    [
-      0,
-      1,
-      2
-    ]
-    """
+) -> lib.Array: ...
+
 
 # ========================= 2. Element-wise (“scalar”) functions =========================
 
 # ========================= 2.1 Arithmetic =========================
 def abs(
     x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericOrDurationT | _NumericOrDurationArrayT | Expression:
-    """
-    Calculate the absolute value of the argument element-wise.
-
-    Results will wrap around on integer overflow.
-    Use function "abs_checked" if you want overflow
-    to return an error.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _NumericOrDurationT | _NumericOrDurationArrayT | Expression: ...
 
-abs_checked = _clone_signature(abs)
-"""
-Calculate the absolute value of the argument element-wise.
 
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "abs".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
+abs_checked = _clone_signature(abs)
 
 def add(
     x: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
@@ -1058,41 +424,10 @@ def add(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression:
-    """
-    Add the arguments element-wise.
-
-    Results will wrap around on integer overflow.
-    Use function "add_checked" if you want overflow
-    to return an error.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    y : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-add_checked = _clone_signature(add)
-"""
-Add the arguments element-wise.
-
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "add".
+) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression: ...
 
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
 
-"""
+add_checked = _clone_signature(add)
 
 def divide(
     x: _NumericOrTemporalScalarT | NumericOrTemporalScalar | _NumericOrTemporalArrayT | Expression,
@@ -1100,126 +435,26 @@ def divide(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression:
-    """
-    Divide the arguments element-wise.
-
-    Integer division by zero returns an error. However, integer overflow
-    wraps around, and floating-point division by zero returns an infinite.
-    Use function "divide_checked" if you want to get an error
-    in all the aforementioned cases.
-
-    Parameters
-    ----------
-    dividend : Array-like or scalar-like
-        Argument to compute function.
-    divisor : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    """
+) -> _NumericOrTemporalScalarT | _NumericOrTemporalArrayT | Expression: ...
+
 
 divide_checked = _clone_signature(divide)
-"""
-Divide the arguments element-wise.
-
-An error is returned when trying to divide by zero, or when
-integer overflow is encountered.
-
-Parameters
-----------
-dividend : Array-like or scalar-like
-    Argument to compute function.
-divisor : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def exp(
     exponent: _FloatArrayT | ArrayOrChunkedArray[NonFloatNumericScalar] | _FloatScalarT | NonFloatNumericScalar | lib.DoubleScalar,
         /, *, memory_pool: lib.MemoryPool | None = None
-) -> _FloatArrayT | lib.DoubleArray | _FloatScalarT | lib.DoubleScalar | Expression:
-    """
-    Compute Euler's number raised to the power of specified exponent, element-wise.
-
-    If exponent is null the result will be null.
+) -> _FloatArrayT | lib.DoubleArray | _FloatScalarT | lib.DoubleScalar | Expression: ...
 
-    Parameters
-    ----------
-    exponent : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 multiply = _clone_signature(add)
-"""
-Multiply the arguments element-wise.
-
-Results will wrap around on integer overflow.
-Use function "multiply_checked" if you want overflow
-to return an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 multiply_checked = _clone_signature(add)
-"""
-Multiply the arguments element-wise.
-
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "multiply".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def negate(
     x: _NumericOrDurationT | _NumericOrDurationArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericOrDurationT | _NumericOrDurationArrayT | Expression:
-    """
-    Negate the argument element-wise.
-
-    Results will wrap around on integer overflow.
-    Use function "negate_checked" if you want overflow
-    to return an error.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-negate_checked = _clone_signature(negate)
-"""
-Negate the arguments element-wise.
+) -> _NumericOrDurationT | _NumericOrDurationArrayT | Expression: ...
 
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "negate".
 
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
+negate_checked = _clone_signature(negate)
 
 def power(
     base: _NumericScalarT | _NumericArrayT | Expression | _NumericArrayT | NumericScalar,
@@ -1227,39 +462,10 @@ def power(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT | _NumericArrayT | Expression:
-    """
-    Raise arguments to power element-wise.
-
-    Integer to negative integer power returns an error. However, integer overflow
-    wraps around. If either base or exponent is null the result will be null.
-
-    Parameters
-    ----------
-    base : Array-like or scalar-like
-        Argument to compute function.
-    exponent : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
 
 power_checked = _clone_signature(power)
-"""
-Raise arguments to power element-wise.
-
-An error is returned when integer to negative integer power is encountered,
-or integer overflow is encountered.
-
-Parameters
-----------
-base : Array-like or scalar-like
-    Argument to compute function.
-exponent : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def sign(
     x: NumericOrDurationArray | NumericOrDurationScalar | Expression, /, *, memory_pool: lib.MemoryPool | None = None
@@ -1268,262 +474,42 @@ def sign(
     | lib.NumericArray[lib.FloatScalar]
     | lib.NumericArray[lib.DoubleScalar]
     | lib.Int8Scalar | lib.FloatScalar | lib.DoubleScalar | Expression
-):
-    """
-    Get the signedness of the arguments element-wise.
-
-    Output is any of (-1,1) for nonzero inputs and 0 for zero input.
-    NaN values return NaN.  Integral values return signedness as Int8 and
-    floating-point values return it with the same type as the input values.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    """
+): ...
 
-def sqrt(x: NumericArray | NumericScalar | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatArray | FloatScalar | Expression:
-    """
-    Takes the square root of arguments element-wise.
 
-    A negative argument returns a NaN.  For a variant that returns an
-    error, use function "sqrt_checked".
+def sqrt(x: NumericArray | NumericScalar | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> FloatArray | FloatScalar | Expression: ...
 
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-
-    """
 
 sqrt_checked = _clone_signature(sqrt)
-"""
-Takes the square root of arguments element-wise.
-
-A negative argument returns an error.  For a variant that returns a
-NaN, use function "sqrt".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 subtract = _clone_signature(add)
-"""
-Subtract the arguments element-wise.
-
-Results will wrap around on integer overflow.
-Use function "subtract_checked" if you want overflow
-to return an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 subtract_checked = _clone_signature(add)
-"""
-Subtract the arguments element-wise.
-
-This function returns an error on overflow.  For a variant that
-doesn't fail on overflow, use function "subtract".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.1 Bit-wise functions =========================
 def bit_wise_and(
     x: _NumericScalarT | _NumericArrayT | NumericScalar | Expression | ArrayOrChunkedArray[NumericScalar],
     y: _NumericScalarT | _NumericArrayT | NumericScalar | Expression | ArrayOrChunkedArray[NumericScalar],
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericScalarT | _NumericArrayT | Expression:
-    """
-    Bit-wise AND the arguments element-wise.
-
-    Null values return null.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    y : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
 
 def bit_wise_not(
     x: _NumericScalarT | _NumericArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _NumericScalarT | _NumericArrayT | Expression:
-    """
-    Bit-wise negate the arguments element-wise.
-
-    Null values return null.
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
 
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 bit_wise_or = _clone_signature(bit_wise_and)
-"""
-Bit-wise OR the arguments element-wise.
-
-Null values return null.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 bit_wise_xor = _clone_signature(bit_wise_and)
-"""
-Bit-wise XOR the arguments element-wise.
-
-Null values return null.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 shift_left = _clone_signature(bit_wise_and)
-"""
-Left shift `x` by `y`.
-
-The shift operates as if on the two's complement representation of the number.
-In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
-even if overflow occurs.
-`x` is returned if `y` (the amount to shift by) is (1) negative or
-(2) greater than or equal to the precision of `x`.
-Use function "shift_left_checked" if you want an invalid shift amount
-to return an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 shift_left_checked = _clone_signature(bit_wise_and)
-"""
-Left shift `x` by `y`.
-
-The shift operates as if on the two's complement representation of the number.
-In other words, this is equivalent to multiplying `x` by 2 to the power `y`,
-even if overflow occurs.
-An error is raised if `y` (the amount to shift by) is (1) negative or
-(2) greater than or equal to the precision of `x`.
-See "shift_left" for a variant that doesn't fail for an invalid shift amount.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 shift_right = _clone_signature(bit_wise_and)
-"""
-Right shift `x` by `y`.
-
-This is equivalent to dividing `x` by 2 to the power `y`.
-`x` is returned if `y` (the amount to shift by) is: (1) negative or
-(2) greater than or equal to the precision of `x`.
-Use function "shift_right_checked" if you want an invalid shift amount
-to return an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 shift_right_checked = _clone_signature(bit_wise_and)
-"""
-Right shift `x` by `y`.
-
-This is equivalent to dividing `x` by 2 to the power `y`.
-An error is raised if `y` (the amount to shift by) is (1) negative or
-(2) greater than or equal to the precision of `x`.
-See "shift_right" for a variant that doesn't fail for an invalid shift amount
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.2 Rounding functions =========================
-def ceil(x: _FloatScalarT | _FloatArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatScalarT | _FloatArrayT | Expression:
-    """
-    Round up to the nearest integer.
+def ceil(x: _FloatScalarT | _FloatArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None) -> _FloatScalarT | _FloatArrayT | Expression: ...
 
-    Compute the smallest integer value not less in magnitude than `x`.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 floor = _clone_signature(ceil)
-"""
-Round down to the nearest integer.
-
-Compute the largest integer value not greater in magnitude than `x`.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def round(
     x: _NumericScalarT | _NumericArrayT | Expression,
@@ -1544,30 +530,8 @@ def round(
     *,
     options: RoundOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT | _NumericArrayT | Expression:
-    """
-    Round to a given precision.
-
-    Options are used to control the number of digits and rounding mode.
-    Default behavior is to round to the nearest integer and
-    use half-to-even rule to break ties.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    ndigits : int, default 0
-        Number of fractional digits to round to.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    options : pyarrow.compute.RoundOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
 
 def round_to_multiple(
     x: _NumericScalarT | _NumericArrayT | Expression,
@@ -1588,31 +552,8 @@ def round_to_multiple(
     *,
     options: RoundToMultipleOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT | _NumericArrayT | Expression:
-    """
-    Round to a given multiple.
-
-    Options are used to control the rounding multiple and rounding mode.
-    Default behavior is to round to the nearest integer and
-    use half-to-even rule to break ties.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    multiple : numeric scalar, default 1.0
-        Multiple to round to. Should be a scalar of a type compatible
-        with the argument to be rounded.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    options : pyarrow.compute.RoundToMultipleOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _NumericScalarT | _NumericArrayT | Expression: ...
+
 
 def round_binary(
     x: _NumericScalarT | _NumericArrayT | Expression,
@@ -1633,1058 +574,132 @@ def round_binary(
     *,
     options: RoundBinaryOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _NumericScalarT | lib.NumericArray[_NumericScalarT] | _NumericArrayT | Expression:
-    """
-    Round to the given precision.
-
-    Options are used to control the rounding mode.
-    Default behavior is to use the half-to-even rule to break ties.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    s : Array-like or scalar-like
-        Argument to compute function.
-    round_mode : str, default "half_to_even"
-        Rounding and tie-breaking mode.
-        Accepted values are "down", "up", "towards_zero", "towards_infinity",
-        "half_down", "half_up", "half_towards_zero", "half_towards_infinity",
-        "half_to_even", "half_to_odd".
-    options : pyarrow.compute.RoundBinaryOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _NumericScalarT | lib.NumericArray[_NumericScalarT] | _NumericArrayT | Expression: ...
 
-trunc = _clone_signature(ceil)
-"""
-Compute the integral part.
 
-Compute the nearest integer not greater in magnitude than `x`.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
+trunc = _clone_signature(ceil)
 
 # ========================= 2.3 Logarithmic functions =========================
 def ln(
     x: FloatScalar | FloatArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression:
-    """
-    Compute natural logarithm.
-
-    Non-positive values return -inf or NaN. Null values return null.
-    Use function "ln_checked" if you want non-positive values to raise an error.
+) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression: ...
 
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 ln_checked = _clone_signature(ln)
-"""
-Compute natural logarithm.
-
-Non-positive values raise an error. Null values return null.
-Use function "ln" if you want non-positive values to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 log10 = _clone_signature(ln)
-"""
-Compute base 10 logarithm.
-
-Non-positive values return -inf or NaN. Null values return null.
-Use function "log10_checked" if you want non-positive values
-to raise an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 log10_checked = _clone_signature(ln)
-"""
-Compute base 10 logarithm.
-
-Non-positive values raise an error. Null values return null.
-Use function "log10" if you want non-positive values
-to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 log1p = _clone_signature(ln)
-"""
-Compute natural log of (1+x).
-
-Values <= -1 return -inf or NaN. Null values return null.
-This function may be more precise than log(1 + x) for x close to zero.
-Use function "log1p_checked" if you want invalid values to raise an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 log1p_checked = _clone_signature(ln)
-"""
-Compute natural log of (1+x).
-
-Values <= -1 return -inf or NaN. Null values return null.
-This function may be more precise than log(1 + x) for x close to zero.
-Use function "log1p" if you want invalid values to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 log2 = _clone_signature(ln)
-"""
-Compute base 2 logarithm.
-
-Non-positive values return -inf or NaN. Null values return null.
-Use function "log2_checked" if you want non-positive values
-to raise an error.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 log2_checked = _clone_signature(ln)
-"""
-Compute base 2 logarithm.
-
-Non-positive values raise an error. Null values return null.
-Use function "log2" if you want non-positive values
-to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def logb(
     x: FloatScalar | FloatArray | Expression | Any, b: FloatScalar | FloatArray | Expression | Any,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression | Any:
-    """
-    Compute base `b` logarithm.
-
-    Values <= 0 return -inf or NaN. Null values return null.
-    Use function "logb_checked" if you want non-positive values to raise an error.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    b : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression | Any: ...
+
 
 logb_checked = _clone_signature(logb)
-"""
-Compute base `b` logarithm.
-
-Values <= 0 return -inf or NaN. Null values return null.
-Use function "logb" if you want non-positive values to return -inf or NaN.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-b : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.4 Trigonometric functions =========================
 acos = _clone_signature(ln)
-"""
-Compute the inverse cosine.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "acos_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 acos_checked = _clone_signature(ln)
-"""
-Compute the inverse cosine.
-
-Invalid input values raise an error;
-to return NaN instead, see "acos".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 asin = _clone_signature(ln)
-"""
-Compute the inverse sine.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "asin_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 asin_checked = _clone_signature(ln)
-"""
-Compute the inverse sine.
-
-Invalid input values raise an error;
-to return NaN instead, see "asin".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 atan = _clone_signature(ln)
-"""
-Compute the inverse tangent of x.
-
-The return value is in the range [-pi/2, pi/2];
-for a full return range [-pi, pi], see "atan2".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 cos = _clone_signature(ln)
-"""
-Compute the cosine.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "cos_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 cos_checked = _clone_signature(ln)
-"""
-Compute the cosine.
-
-Infinite values raise an error;
-to return NaN instead, see "cos".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 sin = _clone_signature(ln)
-"""
-Compute the sine.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "sin_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 sin_checked = _clone_signature(ln)
-"""
-Compute the sine.
-
-Invalid input values raise an error;
-to return NaN instead, see "sin".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 tan = _clone_signature(ln)
-"""
-Compute the tangent.
-
-NaN is returned for invalid input values;
-to raise an error instead, see "tan_checked".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 tan_checked = _clone_signature(ln)
-"""
-Compute the tangent.
-
-Infinite values raise an error;
-to return NaN instead, see "tan".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def atan2(
     y: FloatScalar | FloatArray | Expression | Any, x: FloatScalar | FloatArray | Expression | Any,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression:
-    """
-    Compute the inverse tangent of y/x.
-
-    The return value is in the range [-pi, pi].
-
-    Parameters
-    ----------
-    y : Array-like or scalar-like
-        Argument to compute function.
-    x : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.FloatScalar | lib.DoubleScalar | lib.NumericArray[lib.FloatScalar] | lib.NumericArray[lib.DoubleScalar] | Expression: ...
+
 
 # ========================= 2.5 Comparisons functions =========================
 def equal(
     x: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
     y: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar | lib.BooleanArray | Expression:
-    """
-    Compare values for equality (x == y).
-
-    A null on either side emits a null comparison result.
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    y : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
 
 greater = _clone_signature(equal)
-"""
-Compare values for ordered inequality (x > y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 greater_equal = _clone_signature(equal)
-"""
-Compare values for ordered inequality (x >= y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 less = _clone_signature(equal)
-"""
-Compare values for ordered inequality (x < y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 less_equal = _clone_signature(equal)
-"""
-Compare values for ordered inequality (x <= y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 not_equal = _clone_signature(equal)
-"""
-Compare values for inequality (x != y).
-
-A null on either side emits a null comparison result.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def max_element_wise(
     *args: ScalarOrArray[_Scalar_CoT] | Expression,
     skip_nulls: bool = True,
     options: ElementWiseAggregateOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _Scalar_CoT | Expression:
-    """
-    Find the element-wise maximum value.
-
-    Nulls are ignored (by default) or propagated.
-    NaN is preferred over null, but not over any valid value.
-
-    Parameters
-    ----------
-    *args : Array-like or scalar-like
-        Argument to compute function.
-    skip_nulls : bool, default True
-        Whether to skip (ignore) nulls in the input.
-        If False, any null in the input forces the output to null.
-    options : pyarrow.compute.ElementWiseAggregateOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _Scalar_CoT | Expression: ...
+
 
 min_element_wise = _clone_signature(max_element_wise)
-"""
-Find the element-wise minimum value.
-
-Nulls are ignored (by default) or propagated.
-NaN is preferred over null, but not over any valid value.
-
-Parameters
-----------
-*args : Array-like or scalar-like
-    Argument to compute function.
-skip_nulls : bool, default True
-    Whether to skip (ignore) nulls in the input.
-    If False, any null in the input forces the output to null.
-options : pyarrow.compute.ElementWiseAggregateOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.6 Logical functions =========================
 def and_(
     x: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar],
     y: lib.BooleanScalar | BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar],
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar | lib.BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar]:
-    """
-    Logical 'and' boolean values.
-
-    When a null is encountered in either input, a null is output.
-    For a different null behavior, see function "and_kleene".
-
-    Parameters
-    ----------
-    x : Array-like or scalar-like
-        Argument to compute function.
-    y : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.BooleanScalar | lib.BooleanArray | Expression | ScalarOrArray[lib.BooleanScalar]: ...
+
 
 and_kleene = _clone_signature(and_)
-"""
-Logical 'and' boolean values (Kleene logic).
-
-This function behaves as follows with nulls:
-
-- true and null = null
-- null and true = null
-- false and null = false
-- null and false = false
-- null and null = null
-
-In other words, in this context a null value really means "unknown",
-and an unknown value 'and' false is always false.
-For a different null behavior, see function "and".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 and_not = _clone_signature(and_)
-"""
-Logical 'and not' boolean values.
-
-When a null is encountered in either input, a null is output.
-For a different null behavior, see function "and_not_kleene".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 and_not_kleene = _clone_signature(and_)
-"""
-Logical 'and not' boolean values (Kleene logic).
-
-This function behaves as follows with nulls:
-
-- true and not null = null
-- null and not false = null
-- false and not null = false
-- null and not true = false
-- null and not null = null
-
-In other words, in this context a null value really means "unknown",
-and an unknown value 'and not' true is always false, as is false
-'and not' an unknown value.
-For a different null behavior, see function "and_not".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 or_ = _clone_signature(and_)
-"""
-Logical 'or' boolean values.
-
-When a null is encountered in either input, a null is output.
-For a different null behavior, see function "or_kleene".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 or_kleene = _clone_signature(and_)
-"""
-Logical 'or' boolean values (Kleene logic).
-
-This function behaves as follows with nulls:
-
-- true or null = true
-- null or true = true
-- false or null = null
-- null or false = null
-- null or null = null
-
-In other words, in this context a null value really means "unknown",
-and an unknown value 'or' true is always true.
-For a different null behavior, see function "or".
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 xor = _clone_signature(and_)
-"""
-Logical 'xor' boolean values.
-
-When a null is encountered in either input, a null is output.
-
-Parameters
-----------
-x : Array-like or scalar-like
-    Argument to compute function.
-y : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def invert(
     x: lib.BooleanScalar | _BooleanArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar | _BooleanArrayT | Expression:
-    """
-    Invert boolean values.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.BooleanScalar | _BooleanArrayT | Expression: ...
+
 
 # ========================= 2.10 String predicates =========================
 def ascii_is_alnum(
     strings: StringScalar | StringArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar | lib.BooleanArray | Expression:
-    """
-    Classify strings as ASCII alphanumeric.
-
-    For each string in `strings`, emit true iff the string is non-empty
-    and consists only of alphanumeric ASCII characters.  Null strings emit null.
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
 
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 ascii_is_alpha = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII alphabetic.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of alphabetic ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_is_decimal = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII decimal.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of decimal ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_is_lower = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII lowercase.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of lowercase ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_is_printable = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII printable.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of printable ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_is_space = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII whitespace.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of whitespace ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_is_upper = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII uppercase.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of uppercase ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_alnum = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as alphanumeric.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of alphanumeric Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_alpha = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as alphabetic.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of alphabetic Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_decimal = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as decimal.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of decimal Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_digit = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as digits.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of Unicode digits.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_lower = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as lowercase.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of lowercase Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_numeric = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as numeric.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of numeric Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_printable = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as printable.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of printable Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_space = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as whitespace.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of whitespace Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_upper = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as uppercase.
-
-For each string in `strings`, emit true iff the string is non-empty
-and consists only of uppercase Unicode characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_is_title = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII titlecase.
-
-For each string in `strings`, emit true iff the string is title-cased,
-i.e. it has at least one cased character, each uppercase character
-follows an uncased character, and each lowercase character follows
-an uppercase character.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_is_title = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as titlecase.
-
-For each string in `strings`, emit true iff the string is title-cased,
-i.e. it has at least one cased character, each uppercase character
-follows an uncased character, and each lowercase character follows
-an uppercase character.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 string_is_ascii = _clone_signature(ascii_is_alnum)
-"""
-Classify strings as ASCII.
-
-For each string in `strings`, emit true iff the string consists only
-of ASCII characters.  Null strings emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.11 String transforms =========================
 def ascii_capitalize(
     strings: _StringScalarT | _StringArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _StringScalarT | _StringArrayT | Expression:
-    """
-    Capitalize the first character of ASCII input.
+) -> _StringScalarT | _StringArrayT | Expression: ...
 
-    For each string in `strings`, return a capitalized version.
-
-    This function assumes the input is fully ASCII.  If it may contain
-    non-ASCII characters, use "utf8_capitalize" instead.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 ascii_lower = _clone_signature(ascii_capitalize)
-"""
-Transform ASCII input to lowercase.
-
-For each string in `strings`, return a lowercase version.
-
-This function assumes the input is fully ASCII.  If it may contain
-non-ASCII characters, use "utf8_lower" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_reverse = _clone_signature(ascii_capitalize)
-"""
-Reverse ASCII input.
-
-For each ASCII string in `strings`, return a reversed version.
-
-This function assumes the input is fully ASCII.  If it may contain
-non-ASCII characters, use "utf8_reverse" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_swapcase = _clone_signature(ascii_capitalize)
-"""
-Transform ASCII input by inverting casing.
-
-For each string in `strings`, return a string with opposite casing.
-
-This function assumes the input is fully ASCII.  If it may contain
-non-ASCII characters, use "utf8_swapcase" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_title = _clone_signature(ascii_capitalize)
-"""
-Titlecase each word of ASCII input.
-
-For each string in `strings`, return a titlecased version.
-Each word in the output will start with an uppercase character and its
-remaining characters will be lowercase.
-
-This function assumes the input is fully ASCII.  If it may contain
-non-ASCII characters, use "utf8_title" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_upper = _clone_signature(ascii_capitalize)
-"""
-Transform ASCII input to uppercase.
-
-For each string in `strings`, return an uppercase version.
-
-This function assumes the input is fully ASCII.  It it may contain
-non-ASCII characters, use "utf8_upper" instead.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def binary_length(
     strings: lib.BinaryScalar | lib.StringScalar | lib.LargeBinaryScalar | lib.LargeStringScalar
@@ -2694,20 +709,8 @@ def binary_length(
     | lib.ChunkedArray[lib.LargeBinaryScalar] | lib.ChunkedArray[lib.LargeStringScalar]
     | Expression,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression:
-    """
-    Compute string lengths.
-
-    For each string in `strings`, emit its length of bytes.
-    Null values emit null.
+) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression: ...
 
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 def binary_repeat(
     strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
@@ -2715,21 +718,8 @@ def binary_repeat(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringOrBinaryScalarT | lib.Array[_StringOrBinaryScalarT] | _StringOrBinaryArrayT | Expression:
-    """
-    Repeat a binary string.
-
-    For each binary string in `strings`, return a replicated version.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    num_repeats : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _StringOrBinaryScalarT | lib.Array[_StringOrBinaryScalarT] | _StringOrBinaryArrayT | Expression: ...
+
 
 def binary_replace_slice(
     strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
@@ -2740,48 +730,13 @@ def binary_replace_slice(
     *,
     options: ReplaceSliceOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression:
-    """
-    Replace a slice of a binary string.
-
-    For each string in `strings`, replace a slice of the string defined by `start`
-    and `stop` indices with the given `replacement`. `start` is inclusive
-    and `stop` is exclusive, and both are measured in bytes.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int
-        Index to stop slicing at (exclusive).
-    replacement : str
-        What to replace the slice with.
-    options : pyarrow.compute.ReplaceSliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression: ...
+
 
 def binary_reverse(
     strings: _BinaryScalarT | _BinaryArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _BinaryScalarT | _BinaryArrayT | Expression:
-    """
-    Reverse binary input.
-
-    For each binary string in `strings`, return a reversed version.
+) -> _BinaryScalarT | _BinaryArrayT | Expression: ...
 
-    This function reverses the binary data at a byte-level.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 def replace_substring(
     strings: _StringScalarT | _StringArrayT | Expression,
@@ -2792,109 +747,24 @@ def replace_substring(
     max_replacements: int | None = None,
     options: ReplaceSubstringOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT | _StringArrayT | Expression:
-    """
-    Replace matching non-overlapping substrings with replacement.
-
-    For each string in `strings`, replace non-overlapping substrings that match
-    the given literal `pattern` with the given `replacement`.
-    If `max_replacements` is given and not equal to -1, it limits the
-    maximum amount replacements per input, counted from the left.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        Substring pattern to look for inside input values.
-    replacement : str
-        What to replace the pattern with.
-    max_replacements : int or None, default None
-        The maximum number of strings to replace in each
-        input value (unlimited if None).
-    options : pyarrow.compute.ReplaceSubstringOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
 
 replace_substring_regex = _clone_signature(replace_substring)
-"""
-Replace matching non-overlapping substrings with replacement.
-
-For each string in `strings`, replace non-overlapping substrings that match
-the given regular expression `pattern` with the given `replacement`.
-If `max_replacements` is given and not equal to -1, it limits the
-maximum amount replacements per input, counted from the left.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-replacement : str
-    What to replace the pattern with.
-max_replacements : int or None, default None
-    The maximum number of strings to replace in each
-    input value (unlimited if None).
-options : pyarrow.compute.ReplaceSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def utf8_capitalize(
     strings: _StringScalarT | _StringArrayT | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> _StringScalarT | _StringArrayT | Expression:
-    """
-    Capitalize the first character of input.
-
-    For each string in `strings`, return a capitalized version,
-    with the first character uppercased and the others lowercased.
+) -> _StringScalarT | _StringArrayT | Expression: ...
 
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 def utf8_length(
     strings: lib.StringScalar | lib.LargeStringScalar | lib.StringArray | lib.ChunkedArray[lib.StringScalar]
     | lib.LargeStringArray | lib.ChunkedArray[lib.LargeStringScalar] | Expression,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression:
-    """
-    Compute UTF8 string lengths.
+) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression: ...
 
-    For each string in `strings`, emit its length in UTF8 characters.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 utf8_lower = _clone_signature(utf8_capitalize)
-"""
-Transform input to lowercase.
-
-For each string in `strings`, return a lowercase version.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def utf8_replace_slice(
     strings: _StringScalarT | _StringArrayT | Expression,
@@ -2905,89 +775,13 @@ def utf8_replace_slice(
     *,
     options: ReplaceSliceOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT | _StringArrayT | Expression:
-    """
-    Replace a slice of a string.
-
-    For each string in `strings`, replace a slice of the string defined by `start`
-    and `stop` indices with the given `replacement`. `start` is inclusive
-    and `stop` is exclusive, and both are measured in UTF8 characters.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int
-        Index to stop slicing at (exclusive).
-    replacement : str
-        What to replace the slice with.
-    options : pyarrow.compute.ReplaceSliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
 
 utf8_reverse = _clone_signature(utf8_capitalize)
-"""
-Reverse input.
-
-For each string in `strings`, return a reversed version.
-
-This function operates on Unicode codepoints, not grapheme
-clusters. Hence, it will not correctly reverse grapheme clusters
-composed of multiple codepoints.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_swapcase = _clone_signature(utf8_capitalize)
-"""
-Transform input lowercase characters to uppercase and uppercase characters to lowercase.
-
-For each string in `strings`, return an opposite case version.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_title = _clone_signature(utf8_capitalize)
-"""
-Titlecase each word of input.
-
-For each string in `strings`, return a titlecased version.
-Each word in the output will start with an uppercase character and its
-remaining characters will be lowercase.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_upper = _clone_signature(utf8_capitalize)
-"""
-Transform input to uppercase.
-
-For each string in `strings`, return an uppercase version.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory poo
-"""
 
 # ========================= 2.12 String padding =========================
 def ascii_center(
@@ -2999,157 +793,14 @@ def ascii_center(
     *,
     options: PadOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT | _StringArrayT | Expression:
-    """
-    Center strings by padding with a given character.
-
-    For each string in `strings`, emit a centered string by padding both sides
-    with the given ASCII character.
-    Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    width : int
-        Desired string length.
-    padding : str, default " "
-        What to pad the string with. Should be one byte or codepoint.
-    lean_left_on_odd_padding : bool, default True
-        What to do if there is an odd number of padding characters (in case
-        of centered padding). Defaults to aligning on the left (i.e. adding
-        the extra padding character on the right).
-    options : pyarrow.compute.PadOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
 
 ascii_lpad = _clone_signature(ascii_center)
-"""
-Right-align strings by padding with a given character.
-
-For each string in `strings`, emit a right-aligned string by prepending
-the given ASCII character.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_rpad = _clone_signature(ascii_center)
-"""
-Left-align strings by padding with a given character.
-
-For each string in `strings`, emit a left-aligned string by appending
-the given ASCII character.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_center = _clone_signature(ascii_center)
-"""
-Center strings by padding with a given character.
-
-For each string in `strings`, emit a centered string by padding both sides
-with the given UTF8 codeunit.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_lpad = _clone_signature(ascii_center)
-"""
-Right-align strings by padding with a given character.
-
-For each string in `strings`, emit a right-aligned string by prepending
-the given UTF8 codeunit.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_rpad = _clone_signature(ascii_center)
-"""
-Left-align strings by padding with a given character.
-
-For each string in `strings`, emit a left-aligned string by appending
-the given UTF8 codeunit.
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-width : int
-    Desired string length.
-padding : str, default " "
-    What to pad the string with. Should be one byte or codepoint.
-lean_left_on_odd_padding : bool, default True
-    What to do if there is an odd number of padding characters (in case
-    of centered padding). Defaults to aligning on the left (i.e. adding
-    the extra padding character on the right).
-options : pyarrow.compute.PadOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.13 String trimming =========================
 def ascii_ltrim(
@@ -3159,127 +810,14 @@ def ascii_ltrim(
     *,
     options: TrimOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT | _StringArrayT | Expression:
-    """
-    Trim leading characters.
-
-    For each string in `strings`, remove any leading characters
-    from the `characters` option (as given in TrimOptions).
-    Null values emit null.
-    Both the `strings` and the `characters` are interpreted as
-    ASCII; to trim non-ASCII characters, use `utf8_ltrim`.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    characters : str
-        Individual characters to be trimmed from the string.
-    options : pyarrow.compute.TrimOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
 
 ascii_rtrim = _clone_signature(ascii_ltrim)
-"""
-Trim trailing characters.
-
-For each string in `strings`, remove any trailing characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-Both the `strings` and the `characters` are interpreted as
-ASCII; to trim non-ASCII characters, use `utf8_rtrim`.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_trim = _clone_signature(ascii_ltrim)
-"""
-Trim leading and trailing characters.
-
-For each string in `strings`, remove any leading or trailing characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-Both the `strings` and the `characters` are interpreted as
-ASCII; to trim non-ASCII characters, use `utf8_trim`.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_ltrim = _clone_signature(ascii_ltrim)
-"""
-Trim leading characters.
-
-For each string in `strings`, remove any leading characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_rtrim = _clone_signature(ascii_ltrim)
-"""
-Trim trailing characters.
-
-For each string in `strings`, remove any trailing characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_trim = _clone_signature(ascii_ltrim)
-"""
-Trim leading and trailing characters.
-
-For each string in `strings`, remove any leading or trailing characters
-from the `characters` option (as given in TrimOptions).
-Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-characters : str
-    Individual characters to be trimmed from the string.
-options : pyarrow.compute.TrimOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def ascii_ltrim_whitespace(
     strings: _StringScalarT | _StringArrayT | Expression,
@@ -3287,97 +825,14 @@ def ascii_ltrim_whitespace(
     *,
     options: TrimOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT | _StringArrayT | Expression:
-    """
-    Trim leading ASCII whitespace characters.
-
-    For each string in `strings`, emit a string with leading ASCII whitespace
-    characters removed.  Use `utf8_ltrim_whitespace` to trim leading Unicode
-    whitespace characters. Null values emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
 
 ascii_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim trailing ASCII whitespace characters.
-
-For each string in `strings`, emit a string with trailing ASCII whitespace
-characters removed. Use `utf8_rtrim_whitespace` to trim trailing Unicode
-whitespace characters. Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 ascii_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim leading and trailing ASCII whitespace characters.
-
-For each string in `strings`, emit a string with leading and trailing ASCII
-whitespace characters removed. Use `utf8_trim_whitespace` to trim Unicode
-whitespace characters. Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_ltrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim leading whitespace characters.
-
-For each string in `strings`, emit a string with leading whitespace
-characters removed, where whitespace characters are defined by the Unicode
-standard.  Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_rtrim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim trailing whitespace characters.
-
-For each string in `strings`, emit a string with trailing whitespace
-characters removed, where whitespace characters are defined by the Unicode
-standard.  Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_trim_whitespace = _clone_signature(ascii_ltrim_whitespace)
-"""
-Trim leading and trailing whitespace characters.
-
-For each string in `strings`, emit a string with leading and trailing
-whitespace characters removed, where whitespace characters are defined
-by the Unicode standard.  Null values emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.14 String splitting =========================
 def ascii_split_whitespace(
@@ -3388,31 +843,8 @@ def ascii_split_whitespace(
     reverse: bool = False,
     options: SplitOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[_StringScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] | Expression:
-    """
-    Split string according to any ASCII whitespace.
-
-    Split each string according any non-zero length sequence of ASCII
-    whitespace characters.  The output for each string input is a list
-    of strings.
-
-    The maximum number of splits and direction of splitting
-    (forward, reverse) can optionally be defined in SplitOptions.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    max_splits : int or None, default None
-        Maximum number of splits for each input value (unlimited if None).
-    reverse : bool, default False
-        Whether to start splitting from the end of each input value.
-        This only has an effect if `max_splits` is not None.
-    options : pyarrow.compute.SplitOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.ListArray[_StringScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] | Expression: ...
+
 
 def split_pattern(
     strings: _StringOrBinaryScalarT | lib.Array[lib.Scalar[_DataTypeT]] | Expression,
@@ -3423,86 +855,11 @@ def split_pattern(
     reverse: bool = False,
     options: SplitOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[_StringOrBinaryScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] | Expression:
-    """
-    Split string according to separator.
-
-    Split each string according to the exact `pattern` defined in
-    SplitPatternOptions.  The output for each string input is a list
-    of strings.
-
-    The maximum number of splits and direction of splitting
-    (forward, reverse) can optionally be defined in SplitPatternOptions.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        String pattern to split on.
-    max_splits : int or None, default None
-        Maximum number of splits for each input value (unlimited if None).
-    reverse : bool, default False
-        Whether to start splitting from the end of each input value.
-        This only has an effect if `max_splits` is not None.
-    options : pyarrow.compute.SplitPatternOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.ListArray[_StringOrBinaryScalarT] | lib.ListArray[lib.ListScalar[_DataTypeT]] | Expression: ...
+
 
 split_pattern_regex = _clone_signature(split_pattern)
-"""
-Split string according to regex pattern.
-
-Split each string according to the regex `pattern` defined in
-SplitPatternOptions.  The output for each string input is a list
-of strings.
-
-The maximum number of splits and direction of splitting
-(forward, reverse) can optionally be defined in SplitPatternOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    String pattern to split on.
-max_splits : int or None, default None
-    Maximum number of splits for each input value (unlimited if None).
-reverse : bool, default False
-    Whether to start splitting from the end of each input value.
-    This only has an effect if `max_splits` is not None.
-options : pyarrow.compute.SplitPatternOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 utf8_split_whitespace = _clone_signature(ascii_split_whitespace)
-"""
-Split string according to any Unicode whitespace.
-
-Split each string according any non-zero length sequence of Unicode
-whitespace characters.  The output for each string input is a list
-of strings.
-
-The maximum number of splits and direction of splitting
-(forward, reverse) can optionally be defined in SplitOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-max_splits : int or None, default None
-    Maximum number of splits for each input value (unlimited if None).
-reverse : bool, default False
-    Whether to start splitting from the end of each input value.
-    This only has an effect if `max_splits` is not None.
-options : pyarrow.compute.SplitOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.15 String component extraction =========================
 def extract_regex(
@@ -3512,49 +869,14 @@ def extract_regex(
     *,
     options: ExtractRegexOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructScalar | lib.StructArray | Expression:
-    """
-    Extract substrings captured by a regex pattern.
-
-    For each string in `strings`, match the regular expression and, if
-    successful, emit a struct with field names and values coming from the
-    regular expression's named capture groups. If the input is null or the
-    regular expression fails matching, a null output value is emitted.
-
-    Regular expression matching is done using the Google RE2 library.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        Regular expression with named capture fields.
-    options : pyarrow.compute.ExtractRegexOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.StructScalar | lib.StructArray | Expression: ...
+
 
 # ========================= 2.16 String join =========================
 def binary_join(
     strings, separator, /, *, memory_pool: lib.MemoryPool | None = None
-) -> StringScalar | StringArray:
-    """
-    Join a list of strings together with a separator.
-
-    Concatenate the strings in `list`. The `separator` is inserted
-    between each given string.
-    Any null input and any null `list` element emits a null output.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    separator : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> StringScalar | StringArray: ...
+
 
 def binary_join_element_wise(
     *strings: _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression,
@@ -3562,30 +884,8 @@ def binary_join_element_wise(
     null_replacement: str = "",
     options: JoinOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression:
-    """
-    Join string arguments together, with the last argument as separator.
-
-    Concatenate the `strings` except for the last one. The last argument
-    in `strings` is inserted between each given string.
-    Any null separator element emits a null output. Null elements either
-    emit a null (the default), are skipped, or replaced with a given string.
-
-    Parameters
-    ----------
-    *strings : Array-like or scalar-like
-        Argument to compute function.
-    null_handling : str, default "emit_null"
-        How to handle null values in the inputs.
-        Accepted values are "emit_null", "skip", "replace".
-    null_replacement : str, default ""
-        Replacement string to emit for null inputs if `null_handling`
-        is "replace".
-    options : pyarrow.compute.JoinOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _StringOrBinaryScalarT | _StringOrBinaryArrayT | Expression: ...
+
 
 # ========================= 2.17 String Slicing =========================
 def binary_slice(
@@ -3597,34 +897,8 @@ def binary_slice(
     *,
     options: SliceOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _BinaryScalarT | _BinaryArrayT | Expression:
-    """
-    Slice binary string.
-
-    For each binary string in `strings`, emit the substring defined by
-    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
-    inclusive and `stop` is exclusive. All three values are measured in
-    bytes.
-    If `step` is negative, the string will be advanced in reversed order.
-    An error is raised if `step` is zero.
-    Null inputs emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int or None, default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end.
-    step : int, default 1
-        Slice step.
-    options : pyarrow.compute.SliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _BinaryScalarT | _BinaryArrayT | Expression: ...
+
 
 def utf8_slice_codeunits(
     strings: _StringScalarT | _StringArrayT | Expression,
@@ -3635,34 +909,8 @@ def utf8_slice_codeunits(
     *,
     options: SliceOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _StringScalarT | _StringArrayT | Expression:
-    """
-    Slice string.
-
-    For each string in `strings`, emit the substring defined by
-    (`start`, `stop`, `step`) as given by `SliceOptions` where `start` is
-    inclusive and `stop` is exclusive. All three values are measured in
-    UTF8 codeunits.
-    If `step` is negative, the string will be advanced in reversed order.
-    An error is raised if `step` is zero.
-    Null inputs emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing at (inclusive).
-    stop : int or None, default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end.
-    step : int, default 1
-        Slice step.
-    options : pyarrow.compute.SliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _StringScalarT | _StringArrayT | Expression: ...
+
 
 # ========================= 2.18 Containment tests =========================
 def count_substring(
@@ -3678,49 +926,10 @@ def count_substring(
     ignore_case: bool = False,
     options: MatchSubstringOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression:
-    """
-    Count occurrences of substring.
-
-    For each string in `strings`, emit the number of occurrences of the given
-    literal pattern.
-    Null inputs emit null. The pattern must be given in MatchSubstringOptions.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        Substring pattern to look for inside input values.
-    ignore_case : bool, default False
-        Whether to perform a case-insensitive match.
-    options : pyarrow.compute.MatchSubstringOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int32Scalar | lib.Int64Scalar | lib.Int32Array | lib.Int64Array | Expression: ...
+
 
 count_substring_regex = _clone_signature(count_substring)
-"""
-Count occurrences of substring.
-
-For each string in `strings`, emit the number of occurrences of the given
-regular expression pattern.
-Null inputs emit null. The pattern must be given in MatchSubstringOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def ends_with(
     strings: StringScalar | BinaryScalar | StringArray | BinaryArray | Expression,
@@ -3730,72 +939,11 @@ def ends_with(
     ignore_case: bool = False,
     options: MatchSubstringOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar | lib.BooleanArray | Expression:
-    """
-    Check if strings end with a literal pattern.
-
-    For each string in `strings`, emit true iff it ends with a given pattern.
-    The pattern must be given in MatchSubstringOptions.
-    If ignore_case is set, only simple case folding is performed.
-
-    Null inputs emit null.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    pattern : str
-        Substring pattern to look for inside input values.
-    ignore_case : bool, default False
-        Whether to perform a case-insensitive match.
-    options : pyarrow.compute.MatchSubstringOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
 
 find_substring = _clone_signature(count_substring)
-"""
-Find first occurrence of substring.
-
-For each string in `strings`, emit the index in bytes of the first occurrence
-of the given literal pattern, or -1 if not found.
-Null inputs emit null. The pattern must be given in MatchSubstringOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 find_substring_regex = _clone_signature(count_substring)
-"""
-Find location of first match of regex pattern.
-
-For each string in `strings`, emit the index in bytes of the first occurrence
-of the given literal pattern, or -1 if not found.
-Null inputs emit null. The pattern must be given in MatchSubstringOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def index_in(
     values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
@@ -3805,31 +953,8 @@ def index_in(
     skip_nulls: bool = False,
     options: SetLookupOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Scalar | lib.Int32Array | Expression:
-    """
-    Return index of each element in a set of values.
-
-    For each element in `values`, return its index in a given set of
-    values, or null if it is not found there.
-    The set of values to look for must be given in SetLookupOptions.
-    By default, nulls are matched against the value set, this can be
-    changed in SetLookupOptions.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    value_set : Array
-        Set of values to look for in the input.
-    skip_nulls : bool, default False
-        If False, nulls in the input are matched in the value_set just
-        like regular values.
-        If True, nulls in the input always fail matching.
-    options : pyarrow.compute.SetLookupOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int32Scalar | lib.Int32Array | Expression: ...
+
 
 def is_in(
     values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
@@ -3839,169 +964,23 @@ def is_in(
     skip_nulls: bool = False,
     options: SetLookupOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar | lib.BooleanArray:
-    """
-    Find each element in a set of values.
-
-    For each element in `values`, return true if it is found in a given
-    set of values, false otherwise.
-    The set of values to look for must be given in SetLookupOptions.
-    By default, nulls are matched against the value set, this can be
-    changed in SetLookupOptions.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    value_set : Array
-        Set of values to look for in the input.
-    skip_nulls : bool, default False
-        If False, nulls in the input are matched in the value_set just
-        like regular values.
-        If True, nulls in the input always fail matching.
-    options : pyarrow.compute.SetLookupOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.BooleanScalar | lib.BooleanArray: ...
+
 
 match_like = _clone_signature(ends_with)
-"""
-Match strings against SQL-style LIKE pattern.
-
-For each string in `strings`, emit true iff it matches a given pattern
-at any position. '%' will match any number of characters, '_' will
-match exactly one character, and any other character matches itself.
-To match a literal '%', '_', or '\', precede the character with a backslash.
-Null inputs emit null.  The pattern must be given in MatchSubstringOptions.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 match_substring = _clone_signature(ends_with)
-"""
-Match strings against literal pattern.
-
-For each string in `strings`, emit true iff it contains a given pattern.
-Null inputs emit null.
-The pattern must be given in MatchSubstringOptions.
-If ignore_case is set, only simple case folding is performed.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 match_substring_regex = _clone_signature(ends_with)
-"""
-Match strings against regex pattern.
-
-For each string in `strings`, emit true iff it matches a given pattern
-at any position. The pattern must be given in MatchSubstringOptions.
-If ignore_case is set, only simple case folding is performed.
-
-Null inputs emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 starts_with = _clone_signature(ends_with)
-"""
-Check if strings start with a literal pattern.
-
-For each string in `strings`, emit true iff it starts with a given pattern.
-The pattern must be given in MatchSubstringOptions.
-If ignore_case is set, only simple case folding is performed.
-
-Null inputs emit null.
-
-Parameters
-----------
-strings : Array-like or scalar-like
-    Argument to compute function.
-pattern : str
-    Substring pattern to look for inside input values.
-ignore_case : bool, default False
-    Whether to perform a case-insensitive match.
-options : pyarrow.compute.MatchSubstringOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.19 Categorizations =========================
 def is_finite(
     values: NumericScalar | lib.NullScalar | NumericArray | lib.NullArray | Expression,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar | lib.BooleanArray | Expression:
-    """
-    Return true if value is finite.
-
-    For each input value, emit true iff the value is finite
-    (i.e. neither NaN, inf, nor -inf).
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
 
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 is_inf = _clone_signature(is_finite)
-"""
-Return true if infinity.
-
-For each input value, emit true iff the value is infinite (inf or -inf).
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 is_nan = _clone_signature(is_finite)
-"""
-Return true if NaN.
-
-For each input value, emit true iff the value is NaN.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def is_null(
     values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
@@ -4010,174 +989,30 @@ def is_null(
     nan_is_null: bool = False,
     options: NullOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar | lib.BooleanArray | Expression:
-    """
-    Return true if null (and optionally NaN).
-
-    For each input value, emit true iff the value is null.
-    True may also be emitted for NaN values by setting the `nan_is_null` flag.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    nan_is_null : bool, default False
-        Whether floating-point NaN values are considered null.
-    options : pyarrow.compute.NullOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
 
 def is_valid(
     values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar | lib.BooleanArray | Expression:
-    """
-    Return true if non-null.
-
-    For each input value, emit true iff the value is valid (i.e. non-null).
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
 
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 true_unless_null = _clone_signature(is_valid)
-"""
-Return true if non-null, else return null.
 
-For each input value, emit true iff the value
-is valid (non-null), otherwise emit null.
+# ========================= 2.20 Selecting / multiplexing =========================
+def case_when(cond, /, *cases, memory_pool: lib.MemoryPool | None = None): ...
 
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
-# ========================= 2.20 Selecting / multiplexing =========================
-def case_when(cond, /, *cases, memory_pool: lib.MemoryPool | None = None):
-    """
-    Choose values based on multiple conditions.
-
-    `cond` must be a struct of Boolean values. `cases` can be a mix
-    of scalar and array arguments (of any type, but all must be the
-    same type or castable to a common type), with either exactly one
-    datum per child of `cond`, or one more `cases` than children of
-    `cond` (in which case we have an "else" value).
-
-    Each row of the output will be the corresponding value of the
-    first datum in `cases` for which the corresponding child of `cond`
-    is true, or otherwise the "else" value (if given), or null.
-
-    Essentially, this implements a switch-case or if-else, if-else... statement.
-
-    Parameters
-    ----------
-    cond : Array-like or scalar-like
-        Argument to compute function.
-    *cases : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def choose(indices, /, *values, memory_pool: lib.MemoryPool | None = None):
-    """
-    Choose values from several arrays.
-
-    For each row, the value of the first argument is used as a 0-based index
-    into the list of `values` arrays (i.e. index 0 selects the first of the
-    `values` arrays). The output value is the corresponding value of the
-    selected argument.
-
-    If an index is null, the output will be null.
-
-    Parameters
-    ----------
-    indices : Array-like or scalar-like
-        Argument to compute function.
-    *values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+def choose(indices, /, *values, memory_pool: lib.MemoryPool | None = None): ...
+
 
 def coalesce(
     *values: _ScalarOrArrayT, memory_pool: lib.MemoryPool | None = None
-) -> _ScalarOrArrayT:
-    """
-    Select the first non-null value.
-
-    Each row of the output will be the value from the first corresponding input
-    for which the value is not null. If all inputs are null in a row, the output
-    will be null.
-
-    Parameters
-    ----------
-    *values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _ScalarOrArrayT: ...
+
 
 fill_null = coalesce
-"""Replace each null element in values with a corresponding
-element from fill_value.
-
-If fill_value is scalar-like, then every null element in values
-will be replaced with fill_value. If fill_value is array-like,
-then the i-th element in values will be replaced with the i-th
-element in fill_value.
-
-The fill_value's type must be the same as that of values, or it
-must be able to be implicitly casted to the array's type.
-
-This is an alias for :func:`coalesce`.
-
-Parameters
-----------
-values : Array, ChunkedArray, or Scalar-like object
-    Each null element is replaced with the corresponding value
-    from fill_value.
-fill_value : Array, ChunkedArray, or Scalar-like object
-    If not same type as values, will attempt to cast.
-
-Returns
--------
-result : depends on inputs
-    Values with all null elements replaced
-
-Examples
---------
->>> import pyarrow as pa
->>> arr = pa.array([1, 2, None, 3], type=pa.int8())
->>> fill_value = pa.scalar(5, type=pa.int8())
->>> arr.fill_null(fill_value)
-<pyarrow.lib.Int8Array object at ...>
-[
-    1,
-    2,
-    5,
-    3
-]
->>> arr = pa.array([1, 2, None, 4, None])
->>> arr.fill_null(pa.array([10, 20, 30, 40, 50]))
-<pyarrow.lib.Int64Array object at ...>
-[
-    1,
-    2,
-    30,
-    4,
-    50
-]
-"""
 
 def if_else(
     cond: ArrayLike | ScalarLike,
@@ -4186,25 +1021,8 @@ def if_else(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> ArrayLike | ScalarLike:
-    """
-    Choose values based on a condition.
-
-    `cond` must be a Boolean scalar/ array.
-    `left` or `right` must be of the same type scalar/ array.
-    `null` values in `cond` will be promoted to the output.
-
-    Parameters
-    ----------
-    cond : Array-like or scalar-like
-        Argument to compute function.
-    left : Array-like or scalar-like
-        Argument to compute function.
-    right : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> ArrayLike | ScalarLike: ...
+
 
 # ========================= 2.21 Structural transforms =========================
 
@@ -4213,21 +1031,8 @@ def list_value_length(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int32Array | lib.Int64Array | Expression:
-    """
-    Compute list lengths.
-
-    `lists` must have a list-like type.
-    For each non-null value in `lists`, its length is emitted.
-    Null values emit a null in the output.
-
-    Parameters
-    ----------
-    lists : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int32Array | lib.Int64Array | Expression: ...
+
 
 def make_struct(
     *args: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
@@ -4236,29 +1041,8 @@ def make_struct(
     field_metadata: list[lib.KeyValueMetadata] | None = None,
     options: MakeStructOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.StructScalar | lib.StructArray | Expression:
-    """
-    Wrap Arrays into a StructArray.
-
-    Names of the StructArray's fields are
-    specified through MakeStructOptions.
-
-    Parameters
-    ----------
-    *args : Array-like or scalar-like
-        Argument to compute function.
-    field_names : sequence of str
-        Names of the struct fields to create.
-    field_nullability : sequence of bool, optional
-        Nullability information for each struct field.
-        If omitted, all fields are nullable.
-    field_metadata : sequence of KeyValueMetadata, optional
-        Metadata for each struct field.
-    options : pyarrow.compute.MakeStructOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.StructScalar | lib.StructArray | Expression: ...
+
 
 # ========================= 2.22 Conversions =========================
 def ceil_temporal(
@@ -4284,163 +1068,11 @@ def ceil_temporal(
     calendar_based_origin: bool = False,
     options: RoundTemporalOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _TemporalScalarT | _TemporalArrayT | Expression:
-    """
-    Round temporal values up to nearest multiple of specified time unit.
-
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    timestamps : Array-like or scalar-like
-        Argument to compute function.
-    multiple : int, default 1
-        Number of units to round to.
-    unit : str, default "day"
-        The unit in which `multiple` is expressed.
-        Accepted values are "year", "quarter", "month", "week", "day",
-        "hour", "minute", "second", "millisecond", "microsecond",
-        "nanosecond".
-    week_starts_monday : bool, default True
-        If True, weeks start on Monday; if False, on Sunday.
-    ceil_is_strictly_greater : bool, default False
-        If True, ceil returns a rounded value that is strictly greater than the
-        input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
-        yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
-        if set to False.
-        This applies to the ceil_temporal function only.
-    calendar_based_origin : bool, default False
-        By default, the origin is 1970-01-01T00:00:00. By setting this to True,
-        rounding origin will be beginning of one less precise calendar unit.
-        E.g.: rounding to hours will use beginning of day as origin.
-
-        By default time is rounded to a multiple of units since
-        1970-01-01T00:00:00. By setting calendar_based_origin to true,
-        time will be rounded to number of units since the last greater
-        calendar unit.
-        For example: rounding to multiple of days since the beginning of the
-        month or to hours since the beginning of the day.
-        Exceptions: week and quarter are not used as greater units,
-        therefore days will be rounded to the beginning of the month not
-        week. Greater unit of week is a year.
-        Note that ceiling and rounding might change sorting order of an array
-        near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
-        5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
-        YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
-        ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
-        order of an already ordered array.
-    options : pyarrow.compute.RoundTemporalOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _TemporalScalarT | _TemporalArrayT | Expression: ...
+
 
 floor_temporal = _clone_signature(ceil_temporal)
-"""
-Round temporal values down to nearest multiple of specified time unit.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-timestamps : Array-like or scalar-like
-    Argument to compute function.
-multiple : int, default 1
-    Number of units to round to.
-unit : str, default "day"
-    The unit in which `multiple` is expressed.
-    Accepted values are "year", "quarter", "month", "week", "day",
-    "hour", "minute", "second", "millisecond", "microsecond",
-    "nanosecond".
-week_starts_monday : bool, default True
-    If True, weeks start on Monday; if False, on Sunday.
-ceil_is_strictly_greater : bool, default False
-    If True, ceil returns a rounded value that is strictly greater than the
-    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
-    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
-    if set to False.
-    This applies to the ceil_temporal function only.
-calendar_based_origin : bool, default False
-    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
-    rounding origin will be beginning of one less precise calendar unit.
-    E.g.: rounding to hours will use beginning of day as origin.
-
-    By default time is rounded to a multiple of units since
-    1970-01-01T00:00:00. By setting calendar_based_origin to true,
-    time will be rounded to number of units since the last greater
-    calendar unit.
-    For example: rounding to multiple of days since the beginning of the
-    month or to hours since the beginning of the day.
-    Exceptions: week and quarter are not used as greater units,
-    therefore days will be rounded to the beginning of the month not
-    week. Greater unit of week is a year.
-    Note that ceiling and rounding might change sorting order of an array
-    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
-    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
-    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
-    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
-    order of an already ordered array.
-options : pyarrow.compute.RoundTemporalOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 round_temporal = _clone_signature(ceil_temporal)
-"""
-Round temporal values to the nearest multiple of specified time unit.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-timestamps : Array-like or scalar-like
-    Argument to compute function.
-multiple : int, default 1
-    Number of units to round to.
-unit : str, default "day"
-    The unit in which `multiple` is expressed.
-    Accepted values are "year", "quarter", "month", "week", "day",
-    "hour", "minute", "second", "millisecond", "microsecond",
-    "nanosecond".
-week_starts_monday : bool, default True
-    If True, weeks start on Monday; if False, on Sunday.
-ceil_is_strictly_greater : bool, default False
-    If True, ceil returns a rounded value that is strictly greater than the
-    input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would
-    yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00
-    if set to False.
-    This applies to the ceil_temporal function only.
-calendar_based_origin : bool, default False
-    By default, the origin is 1970-01-01T00:00:00. By setting this to True,
-    rounding origin will be beginning of one less precise calendar unit.
-    E.g.: rounding to hours will use beginning of day as origin.
-
-    By default time is rounded to a multiple of units since
-    1970-01-01T00:00:00. By setting calendar_based_origin to true,
-    time will be rounded to number of units since the last greater
-    calendar unit.
-    For example: rounding to multiple of days since the beginning of the
-    month or to hours since the beginning of the day.
-    Exceptions: week and quarter are not used as greater units,
-    therefore days will be rounded to the beginning of the month not
-    week. Greater unit of week is a year.
-    Note that ceiling and rounding might change sorting order of an array
-    near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to
-    5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to
-    YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will
-    ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the
-    order of an already ordered array.
-options : pyarrow.compute.RoundTemporalOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def cast(
     arr: lib.Scalar | lib.Array | lib.ChunkedArray,
@@ -4448,60 +1080,8 @@ def cast(
     safe: bool | None = None,
     options: CastOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Scalar[_DataTypeT] | lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]]:
-    """
-    Cast array values to another data type. Can also be invoked as an array
-    instance method.
-
-    Parameters
-    ----------
-    arr : Array-like
-    target_type : DataType or str
-        Type to cast to
-    safe : bool, default True
-        Check for overflows or other unsafe conversions
-    options : CastOptions, default None
-        Additional checks pass by CastOptions
-    memory_pool : MemoryPool, optional
-        memory pool to use for allocations during function execution.
-
-    Examples
-    --------
-    >>> from datetime import datetime
-    >>> import pyarrow as pa
-    >>> arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)])
-    >>> arr.type
-    TimestampType(timestamp[us])
-
-    You can use ``pyarrow.DataType`` objects to specify the target type:
-
-    >>> cast(arr, pa.timestamp("ms"))
-    <pyarrow.lib.TimestampArray object at ...>
-    [
-      2010-01-01 00:00:00.000,
-      2015-01-01 00:00:00.000
-    ]
-
-    >>> cast(arr, pa.timestamp("ms")).type
-    TimestampType(timestamp[ms])
-
-    Alternatively, it is also supported to use the string aliases for these
-    types:
-
-    >>> arr.cast("timestamp[ms]")
-    <pyarrow.lib.TimestampArray object at ...>
-    [
-      2010-01-01 00:00:00.000,
-      2015-01-01 00:00:00.000
-    ]
-    >>> arr.cast("timestamp[ms]").type
-    TimestampType(timestamp[ms])
-
-    Returns
-    -------
-    casted : Array
-        The cast result as a new Array
-    """
+) -> lib.Scalar[_DataTypeT] | lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]]: ...
+
 
 def strftime(
     timestamps: TemporalScalar | TemporalArray | Expression,
@@ -4511,34 +1091,8 @@ def strftime(
     *,
     options: StrftimeOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.StringScalar | lib.StringArray | Expression:
-    """
-    Format temporal values according to a format string.
-
-    For each input value, emit a formatted string.
-    The time format string and locale can be set using StrftimeOptions.
-    The output precision of the "%S" (seconds) format code depends on
-    the input time precision: it is an integer for timestamps with
-    second precision, a real number with the required number of fractional
-    digits for higher precisions.
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database, or if the specified locale
-    does not exist on this system.
-
-    Parameters
-    ----------
-    timestamps : Array-like or scalar-like
-        Argument to compute function.
-    format : str, default "%Y-%m-%dT%H:%M:%S"
-        Pattern for formatting input values.
-    locale : str, default "C"
-        Locale to use for locale-specific format specifiers.
-    options : pyarrow.compute.StrftimeOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.StringScalar | lib.StringArray | Expression: ...
+
 
 def strptime(
     strings: StringScalar | StringArray | Expression,
@@ -4549,53 +1103,14 @@ def strptime(
     *,
     options: StrptimeOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.TimestampScalar | lib.TimestampArray | Expression:
-    """
-    Parse timestamps.
-
-    For each string in `strings`, parse it as a timestamp.
-    The timestamp unit and the expected string pattern must be given
-    in StrptimeOptions. Null inputs emit null. If a non-null string
-    fails parsing, an error is returned by default.
-
-    Parameters
-    ----------
-    strings : Array-like or scalar-like
-        Argument to compute function.
-    format : str
-        Pattern for parsing input strings as timestamps, such as "%Y/%m/%d".
-        Note that the semantics of the format follow the C/C++ strptime, not the Python one.
-        There are differences in behavior, for example how the "%y" placeholder
-        handles years with less than four digits.
-    unit : str
-        Timestamp unit of the output.
-        Accepted values are "s", "ms", "us", "ns".
-    error_is_null : boolean, default False
-        Return null on parsing errors if true or raise if false.
-    options : pyarrow.compute.StrptimeOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.TimestampScalar | lib.TimestampArray | Expression: ...
+
 
 # ========================= 2.23 Temporal component extraction =========================
 def day(
     values: TemporalScalar | TemporalArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int64Scalar | lib.Int64Array | Expression:
-    """
-    Extract day number.
-
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
 
 def day_of_week(
     values: TemporalScalar | TemporalArray | Expression,
@@ -4605,50 +1120,10 @@ def day_of_week(
     week_start: int = 1,
     options: DayOfWeekOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar | lib.Int64Array | Expression:
-    """
-    Extract day of the week number.
-
-    By default, the week starts on Monday represented by 0 and ends on Sunday
-    represented by 6.
-    `DayOfWeekOptions.week_start` can be used to set another starting day using
-    the ISO numbering convention (1=start week on Monday, 7=start week on Sunday).
-    Day numbers can start at 0 or 1 based on `DayOfWeekOptions.count_from_zero`.
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    count_from_zero : bool, default True
-        If True, number days from 0, otherwise from 1.
-    week_start : int, default 1
-        Which day does the week start with (Monday=1, Sunday=7).
-        How this value is numbered is unaffected by `count_from_zero`.
-    options : pyarrow.compute.DayOfWeekOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
 
 day_of_year = _clone_signature(day)
-"""
-Extract day of year number.
-
-January 1st maps to day number 1, February 1st to 32, etc.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def hour(
     values: lib.TimestampScalar[Any] | lib.Time32Scalar[Any] | lib.Time64Scalar[Any]
@@ -4659,80 +1134,22 @@ def hour(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar | lib.Int64Array | Expression:
-    """
-    Extract hour value.
-
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
 
 def is_dst(
     values: lib.TimestampScalar | lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar] | Expression,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.BooleanScalar | lib.BooleanArray | Expression:
-    """
-    Extracts if currently observing daylight savings.
-
-    IsDaylightSavings returns true if a timestamp has a daylight saving
-    offset in the given timezone.
-    Null values emit null.
-    An error is returned if the values do not have a defined timezone.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
 
 def iso_week(
     values: lib.TimestampScalar | lib.TimestampArray[Any] | lib.ChunkedArray[lib.TimestampScalar[Any]] | Expression,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int64Scalar | lib.Int64Array | Expression:
-    """
-    Extract ISO week of year number.
-
-    First ISO week has the majority (4 or more) of its days in January.
-    ISO week starts on Monday. The week number starts with 1 and can run
-    up to 53.
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
 
 iso_year = _clone_signature(iso_week)
-"""
-Extract ISO year number.
-
-First week of an ISO year has the majority (4 or more) of its days in January.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def is_leap_year(
     values: lib.TimestampScalar[Any] | lib.Date32Scalar | lib.Date64Scalar | lib.TimestampArray
@@ -4744,199 +1161,20 @@ def is_leap_year(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.BooleanScalar | lib.BooleanArray | Expression:
-    """
-    Extract if year is a leap year.
-
-    Null values emit null.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
+
 
 microsecond = _clone_signature(iso_week)
-"""
-Extract microsecond values.
-
-Microsecond returns number of microseconds since the last full millisecond.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 millisecond = _clone_signature(iso_week)
-"""
-Extract millisecond values.
-
-Millisecond returns number of milliseconds since the last full second.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 minute = _clone_signature(iso_week)
-"""
-Extract minute values.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 month = _clone_signature(day_of_week)
-"""
-Extract month number.
-
-Month is encoded as January=1, December=12.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 nanosecond = _clone_signature(hour)
-"""
-Extract nanosecond values.
-
-Nanosecond returns number of nanoseconds since the last full microsecond.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 quarter = _clone_signature(day_of_week)
-"""
-Extract quarter of year number.
-
-First quarter maps to 1 and forth quarter maps to 4.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 second = _clone_signature(hour)
-"""
-Extract second values.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 subsecond = _clone_signature(hour)
-"""
-Extract subsecond values.
-
-Subsecond returns the fraction of a second since the last full second.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 us_week = _clone_signature(iso_week)
-"""
-Extract US week of year number.
-
-First US week has the majority (4 or more) of its days in January.
-US week starts on Monday. The week number starts with 1 and can run
-up to 53.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 us_year = _clone_signature(iso_week)
-"""
-Extract US epidemiological year number.
-
-First week of US epidemiological year has the majority (4 or more) of
-it's days in January. Last week of US epidemiological year has the
-year's last Wednesday in it. US epidemiological week starts on Sunday.
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 year = _clone_signature(iso_week)
-"""
-Extract year number.
-
-Null values emit null.
-An error is returned if the values have a defined timezone but it
-cannot be found in the timezone database.
-
-Parameters
-----------
-values : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def week(
     values: lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression,
@@ -4947,274 +1185,39 @@ def week(
     first_week_is_fully_in_year: bool = False,
     options: WeekOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar | lib.Int64Array | Expression:
-    """
-    Extract week of year number.
-
-    First week has the majority (4 or more) of its days in January.
-    Year can have 52 or 53 weeks. Week numbering can start with 0 or 1 using
-    DayOfWeekOptions.count_from_zero.
-    An error is returned if the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    week_starts_monday : bool, default True
-        If True, weeks start on Monday; if False, on Sunday.
-    count_from_zero : bool, default False
-        If True, dates at the start of a year that fall into the last week
-        of the previous year emit 0.
-        If False, they emit 52 or 53 (the week number of the last week
-        of the previous year).
-    first_week_is_fully_in_year : bool, default False
-        If True, week number 0 is fully in January.
-        If False, a week that begins on December 29, 30 or 31 is considered
-        to be week number 0 of the following year.
-    options : pyarrow.compute.WeekOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Scalar | lib.Int64Array | Expression: ...
+
 
 def year_month_day(
     values: TemporalScalar | TemporalArray | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.StructScalar | lib.StructArray | Expression:
-    """
-    Extract (year, month, day) struct.
-
-    Null values emit null.
-    An error is returned in the values have a defined timezone but it
-    cannot be found in the timezone database.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.StructScalar | lib.StructArray | Expression: ...
+
 
 # ========================= 2.24 Temporal difference =========================
-def day_time_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
-    """
-    Compute the number of days and milliseconds between two timestamps.
-
-    Returns the number of days and milliseconds from `start` to `end`.
-    That is, first the difference in days is computed as if both
-    timestamps were truncated to the day, then the difference between time times
-    of the two timestamps is computed as if both times were truncated to the
-    millisecond.
-    Null values return null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+def day_time_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None): ...
+
 
 def days_between(
     start, end, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int64Scalar | lib.Int64Array:
-    """
-    Compute the number of days between two timestamps.
-
-    Returns the number of day boundaries crossed from `start` to `end`.
-    That is, the difference is calculated as if the timestamps were
-    truncated to the day.
-    Null values emit null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Scalar | lib.Int64Array: ...
+
 
 hours_between = _clone_signature(days_between)
-"""
-Compute the number of hours between two timestamps.
-
-Returns the number of hour boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the hour.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 microseconds_between = _clone_signature(days_between)
-"""
-Compute the number of microseconds between two timestamps.
-
-Returns the number of microsecond boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the microsecond.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 milliseconds_between = _clone_signature(days_between)
-"""
-Compute the number of millisecond boundaries between two timestamps.
-
-Returns the number of millisecond boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the millisecond.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 minutes_between = _clone_signature(days_between)
-"""
-Compute the number of millisecond boundaries between two timestamps.
-
-Returns the number of millisecond boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the millisecond.
-Null values emit null.
-In [152]: print(pc.minutes_between.__doc__)
-Compute the number of minute boundaries between two timestamps.
-
-Returns the number of minute boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the minute.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def month_day_nano_interval_between(
     start, end, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.MonthDayNanoIntervalScalar | lib.MonthDayNanoIntervalArray:
-    """
-    Compute the number of months, days and nanoseconds between two timestamps.
-
-    Returns the number of months, days, and nanoseconds from `start` to `end`.
-    That is, first the difference in months is computed as if both timestamps
-    were truncated to the months, then the difference between the days
-    is computed, and finally the difference between the times of the two
-    timestamps is computed as if both times were truncated to the nanosecond.
-    Null values return null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def month_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None):
-    """
-    Compute the number of months between two timestamps.
-
-    Returns the number of month boundaries crossed from `start` to `end`.
-    That is, the difference is calculated as if the timestamps were
-    truncated to the month.
-    Null values emit null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.MonthDayNanoIntervalScalar | lib.MonthDayNanoIntervalArray: ...
+
+
+def month_interval_between(start, end, /, *, memory_pool: lib.MemoryPool | None = None): ...
+
 
 nanoseconds_between = _clone_signature(days_between)
-"""
-Compute the number of nanoseconds between two timestamps.
-
-Returns the number of nanosecond boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the nanosecond.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 quarters_between = _clone_signature(days_between)
-"""
-Compute the number of quarters between two timestamps.
-
-Returns the number of quarter start boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the quarter.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 seconds_between = _clone_signature(days_between)
-"""
-Compute the number of seconds between two timestamps.
-
-Returns the number of second boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the second.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 def weeks_between(
     start,
@@ -5225,50 +1228,10 @@ def weeks_between(
     week_start: int = 1,
     options: DayOfWeekOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.Int64Scalar | lib.Int64Array:
-    """
-    Compute the number of weeks between two timestamps.
-
-    Returns the number of week boundaries crossed from `start` to `end`.
-    That is, the difference is calculated as if the timestamps were
-    truncated to the week.
-    Null values emit null.
-
-    Parameters
-    ----------
-    start : Array-like or scalar-like
-        Argument to compute function.
-    end : Array-like or scalar-like
-        Argument to compute function.
-    count_from_zero : bool, default True
-        If True, number days from 0, otherwise from 1.
-    week_start : int, default 1
-        Which day does the week start with (Monday=1, Sunday=7).
-        How this value is numbered is unaffected by `count_from_zero`.
-    options : pyarrow.compute.DayOfWeekOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Scalar | lib.Int64Array: ...
+
 
 years_between = _clone_signature(days_between)
-"""
-Compute the number of years between two timestamps.
-
-Returns the number of year boundaries crossed from `start` to `end`.
-That is, the difference is calculated as if the timestamps were
-truncated to the year.
-Null values emit null.
-
-Parameters
-----------
-start : Array-like or scalar-like
-    Argument to compute function.
-end : Array-like or scalar-like
-    Argument to compute function.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 
 # ========================= 2.25 Timezone handling =========================
 def assume_timezone(
@@ -5280,58 +1243,14 @@ def assume_timezone(
     nonexistent: Literal["raise", "earliest", "latest"] = "raise",
     options: AssumeTimezoneOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression:
-    """
-    Convert naive timestamp to timezone-aware timestamp.
-
-    Input timestamps are assumed to be relative to the timezone given in the
-    `timezone` option. They are converted to UTC-relative timestamps and
-    the output type has its timezone set to the value of the `timezone`
-    option. Null values emit null.
-    This function is meant to be used when an external system produces
-    "timezone-naive" timestamps which need to be converted to
-    "timezone-aware" timestamps. An error is returned if the timestamps
-    already have a defined timezone.
-
-    Parameters
-    ----------
-    timestamps : Array-like or scalar-like
-        Argument to compute function.
-    timezone : str
-        Timezone to assume for the input.
-    ambiguous : str, default "raise"
-        How to handle timestamps that are ambiguous in the assumed timezone.
-        Accepted values are "raise", "earliest", "latest".
-    nonexistent : str, default "raise"
-        How to handle timestamps that don't exist in the assumed timezone.
-        Accepted values are "raise", "earliest", "latest".
-    options : pyarrow.compute.AssumeTimezoneOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression: ...
+
 
 def local_timestamp(
     timestamps: lib.TimestampScalar | lib.TimestampArray | lib.ChunkedArray[lib.TimestampScalar] | Expression,
     /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.TimestampScalar | lib.TimestampArray | Expression:
-    """
-    Convert timestamp to a timezone-naive local time timestamp.
-
-    LocalTimestamp converts timezone-aware timestamp to local timestamp
-    of the given timestamp's timezone and removes timezone metadata.
-    Alternative name for this timestamp is also wall clock time.
-    If input is in UTC or without timezone, then unchanged input values
-    without timezone metadata are returned.
-    Null values emit null.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.TimestampScalar | lib.TimestampArray | Expression: ...
+
 
 # ========================= 2.26 Random number generation =========================
 def random(
@@ -5340,28 +1259,8 @@ def random(
     initializer: Literal["system"] | int = "system",
     options: RandomOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.DoubleArray:
-    """
-    Generate numbers in the range [0, 1).
-
-    Generated values are uniformly-distributed, double-precision
-    in range [0, 1). Algorithm and seed can be changed via RandomOptions.
-
-    Parameters
-    ----------
-    n : int
-        Number of values to generate, must be greater than or equal to 0
-    initializer : int or str
-        How to initialize the underlying random generator.
-        If an integer is given, it is used as a seed.
-        If "system" is given, the random generator is initialized with
-        a system-specific source of (hopefully true) randomness.
-        Other values are invalid.
-    options : pyarrow.compute.RandomOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.DoubleArray: ...
+
 
 # ========================= 3. Array-wise (“vector”) functions =========================
 
@@ -5374,168 +1273,15 @@ def cumulative_sum(
     skip_nulls: bool = False,
     options: CumulativeSumOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _NumericArrayT | Expression:
-    """
-    Compute the cumulative sum over a numeric input.
-
-    `values` must be numeric. Return an array/chunked array which is the
-    cumulative sum computed over `values`. Results will wrap around on
-    integer overflow. Use function "cumulative_sum_checked" if you want
-    overflow to return an error. The default start is 0.
-
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    start : Scalar, default None
-        Starting value for the cumulative operation. If none is given,
-        a default value depending on the operation and input type is used.
-    skip_nulls : bool, default False
-        When false, the first encountered null is propagated.
-    options : pyarrow.compute.CumulativeOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _NumericArrayT | Expression: ...
+
 
 cumulative_sum_checked = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative sum over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative sum computed over `values`. This function returns an error
-on overflow. For a variant that doesn't fail on overflow, use
-function "cumulative_sum". The default start is 0.
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 cumulative_prod = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative product over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative product computed over `values`. Results will wrap around on
-integer overflow. Use function "cumulative_prod_checked" if you want
-overflow to return an error. The default start is 1.
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 cumulative_prod_checked = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative product over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative product computed over `values`. This function returns an error
-on overflow. For a variant that doesn't fail on overflow, use
-function "cumulative_prod". The default start is 1.
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 cumulative_max = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative max over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative max computed over `values`. The default start is the minimum
-value of input type (so that any other value will replace the
-start as the new maximum).
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 cumulative_min = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative min over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative min computed over `values`. The default start is the maximum
-value of input type (so that any other value will replace the
-start as the new minimum).
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 cumulative_mean = _clone_signature(cumulative_sum)
-"""
-Compute the cumulative max over a numeric input.
-
-`values` must be numeric. Return an array/chunked array which is the
-cumulative max computed over `values`. The default start is the minimum
-value of input type (so that any other value will replace the
-start as the new maximum).
-
-Parameters
-----------
-values : Array-like
-    Argument to compute function.
-start : Scalar, default None
-    Starting value for the cumulative operation. If none is given,
-    a default value depending on the operation and input type is used.
-skip_nulls : bool, default False
-    When false, the first encountered null is propagated.
-options : pyarrow.compute.CumulativeOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
 # ========================= 3.2 Associative transforms =========================
 
 def dictionary_encode(
@@ -5615,45 +1361,6 @@ def drop_null(
 
 filter = array_filter
 take = array_take
-"""
-Select values (or records) from array- or table-like data given integer
-selection indices.
-
-The result will be of the same type(s) as the input, with elements taken
-from the input array (or record batch / table fields) at the given
-indices. If an index is null then the corresponding value in the output
-will be null.
-
-Parameters
-----------
-data : Array, ChunkedArray, RecordBatch, or Table
-indices : Array, ChunkedArray
-    Must be of integer type
-boundscheck : boolean, default True
-    Whether to boundscheck the indices. If False and there is an out of
-    bounds index, will likely cause the process to crash.
-memory_pool : MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-
-Returns
--------
-result : depends on inputs
-    Selected values for the given indices
-
-Examples
---------
->>> import pyarrow as pa
->>> arr = pa.array(["a", "b", "c", None, "e", "f"])
->>> indices = pa.array([0, None, 4, 3])
->>> arr.take(indices)
-<pyarrow.lib.StringArray object at ...>
-[
-    "a",
-    null,
-    "e",
-    null
-]
-"""
 
 # ========================= 3.4 Containment tests  =========================
 def indices_nonzero(
@@ -5665,20 +1372,8 @@ def indices_nonzero(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array | Expression:
-    """
-    Return the indices of the values in the array that are non-zero.
-
-    For each input value, check if it's zero, false or null. Emit the index
-    of the value in the array if it's none of the those.
+) -> lib.UInt64Array | Expression: ...
 
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
 
 # ========================= 3.5 Sorts and partitions  =========================
 def array_sort_indices(
@@ -5689,33 +1384,8 @@ def array_sort_indices(
     null_placement: _Placement = "at_end",
     options: ArraySortOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array | Expression:
-    """
-    Return the indices that would sort an array.
-
-    This function computes an array of indices that define a stable sort
-    of the input array.  By default, Null values are considered greater
-    than any other value and are therefore sorted at the end of the array.
-    For floating-point types, NaNs are considered greater than any
-    other non-null value, but smaller than null values.
-
-    The handling of nulls and NaNs can be changed in ArraySortOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    order : str, default "ascending"
-        Which order to sort values in.
-        Accepted values are "ascending", "descending".
-    null_placement : str, default "at_end"
-        Where nulls in the input should be sorted.
-        Accepted values are "at_start", "at_end".
-    options : pyarrow.compute.ArraySortOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.UInt64Array | Expression: ...
+
 
 def partition_nth_indices(
     array: lib.Array | lib.ChunkedArray | Expression,
@@ -5725,39 +1395,8 @@ def partition_nth_indices(
     null_placement: _Placement = "at_end",
     options: PartitionNthOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array | Expression:
-    """
-    Return the indices that would partition an array around a pivot.
-
-    This functions computes an array of indices that define a non-stable
-    partial sort of the input array.
-
-    The output is such that the `N`'th index points to the `N`'th element
-    of the input in sorted order, and all indices before the `N`'th point
-    to elements in the input less or equal to elements at or after the `N`'th.
-
-    By default, null values are considered greater than any other value
-    and are therefore partitioned towards the end of the array.
-    For floating-point types, NaNs are considered greater than any
-    other non-null value, but smaller than null values.
-
-    The pivot index `N` must be given in PartitionNthOptions.
-    The handling of nulls and NaNs can also be changed in PartitionNthOptions.
-
-    Parameters
-    ----------
-    array : Array-like
-        Argument to compute function.
-    pivot : int
-        Index into the equivalent sorted array of the pivot element.
-    null_placement : str, default "at_end"
-        Where nulls in the input should be partitioned.
-        Accepted values are "at_start", "at_end".
-    options : pyarrow.compute.PartitionNthOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.UInt64Array | Expression: ...
+
 
 def rank(
     input: lib.Array | lib.ChunkedArray,
@@ -5768,49 +1407,8 @@ def rank(
     tiebreaker: Literal["min", "max", "first", "dense"] = "first",
     options: RankOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array:
-    """
-    Compute ordinal ranks of an array (1-based).
-
-    This function computes a rank of the input array.
-    By default, null values are considered greater than any other value and
-    are therefore sorted at the end of the input. For floating-point types,
-    NaNs are considered greater than any other non-null value, but smaller
-    than null values. The default tiebreaker is to assign ranks in order of
-    when ties appear in the input.
-
-    The handling of nulls, NaNs and tiebreakers can be changed in RankOptions.
-
-    Parameters
-    ----------
-    input : Array-like or scalar-like
-        Argument to compute function.
-    sort_keys : sequence of (name, order) tuples or str, default "ascending"
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-        Alternatively, one can simply pass "ascending" or "descending" as a string
-        if the input is array-like.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted.
-        Accepted values are "at_start", "at_end".
-    tiebreaker : str, default "first"
-        Configure how ties between equal values are handled.
-        Accepted values are:
-
-        - "min": Ties get the smallest possible rank in sorted order.
-        - "max": Ties get the largest possible rank in sorted order.
-        - "first": Ranks are assigned in order of when ties appear in the
-                   input. This ensures the ranks are a stable permutation
-                   of the input.
-        - "dense": The ranks span a dense [1, M] interval where M is the
-                   number of distinct values in the input.
-    options : pyarrow.compute.RankOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.UInt64Array: ...
+
 
 def select_k_unstable(
     input: lib.Array | lib.ChunkedArray | Expression,
@@ -5820,35 +1418,8 @@ def select_k_unstable(
     *,
     options: SelectKOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array | Expression:
-    """
-    Select the indices of the first `k` ordered elements from the input.
-
-    This function selects an array of indices of the first `k` ordered elements
-    from the `input` array, record batch or table specified in the column keys
-    (`options.sort_keys`). Output is not guaranteed to be stable.
-    Null values are considered greater than any other value and are
-    therefore ordered at the end. For floating-point types, NaNs are considered
-    greater than any other non-null value, but smaller than null values.
-
-    Parameters
-    ----------
-    input : Array-like or scalar-like
-        Argument to compute function.
-    k : int
-        Number of leading values to select in sorted order
-        (i.e. the largest values if sort order is "descending",
-        the smallest otherwise).
-    sort_keys : sequence of (name, order) tuples
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-    options : pyarrow.compute.SelectKOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.UInt64Array | Expression: ...
+
 
 def sort_indices(
     input: lib.Array | lib.ChunkedArray | lib.RecordBatch | lib.Table | Expression,
@@ -5858,58 +1429,15 @@ def sort_indices(
     null_placement: _Placement = "at_end",
     options: SortOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.UInt64Array | Expression:
-    """
-    Return the indices that would sort an array, record batch or table.
-
-    This function computes an array of indices that define a stable sort
-    of the input array, record batch or table.  By default, null values are
-    considered greater than any other value and are therefore sorted at the
-    end of the input. For floating-point types, NaNs are considered greater
-    than any other non-null value, but smaller than null values.
-
-    The handling of nulls and NaNs can be changed in SortOptions.
-
-    Parameters
-    ----------
-    input : Array-like or scalar-like
-        Argument to compute function.
-    sort_keys : sequence of (name, order) tuples
-        Names of field/column keys to sort the input on,
-        along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
-        The field name can be a string column name or expression.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted, only applying to
-        columns/fields mentioned in `sort_keys`.
-        Accepted values are "at_start", "at_end".
-    options : pyarrow.compute.SortOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.UInt64Array | Expression: ...
+
 
 # ========================= 3.6 Structural transforms =========================
 def list_element(
     lists: lib.Array[ListScalar[_DataTypeT]] | lib.ChunkedArray[ListScalar[_DataTypeT]] | ListScalar[_DataTypeT] | Expression,
     index: ScalarLike, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]] | _DataTypeT | Expression:
-    """
-    Compute elements using of nested list values using an index.
-
-    `lists` must have a list-like type.
-    For each value in each list of `lists`, the element at `index`
-    is emitted. Null values emit a null in the output.
-
-    Parameters
-    ----------
-    lists : Array-like or scalar-like
-        Argument to compute function.
-    index : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Array[lib.Scalar[_DataTypeT]] | lib.ChunkedArray[lib.Scalar[_DataTypeT]] | _DataTypeT | Expression: ...
+
 
 def list_flatten(
     lists: ArrayOrChunkedArray[ListScalar[Any]] | Expression,
@@ -5918,49 +1446,13 @@ def list_flatten(
     *,
     options: ListFlattenOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[Any] | Expression:
-    """
-    Flatten list values.
-
-    `lists` must have a list-like type (lists, list-views, and
-    fixed-size lists).
-    Return an array with the top list level flattened unless
-    `recursive` is set to true in ListFlattenOptions. When that
-    is that case, flattening happens recursively until a non-list
-    array is formed.
-
-    Null list values do not emit anything to the output.
-
-    Parameters
-    ----------
-    lists : Array-like
-        Argument to compute function.
-    recursive : bool, default False
-        When True, the list array is flattened recursively until an array
-        of non-list values is formed.
-    options : pyarrow.compute.ListFlattenOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.ListArray[Any] | Expression: ...
+
 
 def list_parent_indices(
     lists: ArrayOrChunkedArray[Any] | Expression, /, *, memory_pool: lib.MemoryPool | None = None
-) -> lib.Int64Array | Expression:
-    """
-    Compute parent indices of nested list values.
-
-    `lists` must have a list-like or list-view type.
-    For each value in each list of `lists`, the top-level list index
-    is emitted.
-
-    Parameters
-    ----------
-    lists : Array-like or scalar-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.Int64Array | Expression: ...
+
 
 def list_slice(
     lists: ArrayOrChunkedArray[Any] | Expression,
@@ -5972,35 +1464,8 @@ def list_slice(
     *,
     options: ListSliceOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> lib.ListArray[Any] | Expression:
-    """
-    Compute slice of list-like array.
-
-    `lists` must have a list-like type.
-    For each list element, compute a slice, returning a new list array.
-    A variable or fixed size list array is returned, depending on options.
-
-    Parameters
-    ----------
-    lists : Array-like or scalar-like
-        Argument to compute function.
-    start : int
-        Index to start slicing inner list elements (inclusive).
-    stop : Optional[int], default None
-        If given, index to stop slicing at (exclusive).
-        If not given, slicing will stop at the end. (NotImplemented)
-    step : int, default 1
-        Slice step.
-    return_fixed_size_list : Optional[bool], default None
-        Whether to return a FixedSizeListArray. If true _and_ stop is after
-        a list element's length, nulls will be appended to create the
-        requested slice size. The default of `None` will return the same
-        type which was passed in.
-    options : pyarrow.compute.ListSliceOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> lib.ListArray[Any] | Expression: ...
+
 
 def map_lookup(
     container,
@@ -6010,28 +1475,8 @@ def map_lookup(
     *,
     options: MapLookupOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-):
-    """
-    Find the items corresponding to a given key in a Map.
-
-    For a given query key (passed via MapLookupOptions), extract
-    either the FIRST, LAST or ALL items from a Map that have
-    matching keys.
-
-    Parameters
-    ----------
-    container : Array-like or scalar-like
-        Argument to compute function.
-    query_key : Scalar or Object can be converted to Scalar
-        The key to search for.
-    occurrence : str
-        The occurrence(s) to return from the Map
-        Accepted values are "first", "last", or "all".
-    options : pyarrow.compute.MapLookupOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+): ...
+
 
 def struct_field(
     values,
@@ -6040,62 +1485,14 @@ def struct_field(
     *,
     options: StructFieldOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-):
-    """
-    Extract children of a struct or union by index.
-
-    Given a list of indices (passed via StructFieldOptions), extract
-    the child array or scalar with the given child index, recursively.
-
-    For union inputs, nulls are emitted for union values that reference
-    a different child than specified. Also, the indices are always
-    in physical order, not logical type codes - for example, the first
-    child is always index 0.
-
-    An empty list of indices returns the argument unchanged.
-
-    Parameters
-    ----------
-    values : Array-like or scalar-like
-        Argument to compute function.
-    indices : List[str], List[bytes], List[int], Expression, bytes, str, or int
-        List of indices for chained field lookup, for example `[4, 1]`
-        will look up the second nested field in the fifth outer field.
-    options : pyarrow.compute.StructFieldOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def fill_null_backward(values, /, *, memory_pool: lib.MemoryPool | None = None):
-    """
-    Carry non-null values backward to fill null slots.
-
-    Given an array, propagate next valid observation backward to previous valid
-    or nothing if all next values are null.
-
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
-
-def fill_null_forward(values, /, *, memory_pool: lib.MemoryPool | None = None):
-    """
-    Carry non-null values forward to fill null slots.
-
-    Given an array, propagate last valid observation forward to next valid
-    or nothing if all previous values are null.
-
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+): ...
+
+
+def fill_null_backward(values, /, *, memory_pool: lib.MemoryPool | None = None): ...
+
+
+def fill_null_forward(values, /, *, memory_pool: lib.MemoryPool | None = None): ...
+
 
 def replace_with_mask(
     values,
@@ -6104,28 +1501,8 @@ def replace_with_mask(
     /,
     *,
     memory_pool: lib.MemoryPool | None = None,
-):
-    """
-    Replace items selected with a mask.
-
-    Given an array and a boolean mask (either scalar or of equal length),
-    along with replacement values (either scalar or array),
-    each element of the array for which the corresponding mask element is
-    true will be replaced by the next value from the replacements,
-    or with null if the mask is null.
-    Hence, for replacement arrays, len(replacements) == sum(mask == true).
-
-    Parameters
-    ----------
-    values : Array-like
-        Argument to compute function.
-    mask : Array-like
-        Argument to compute function.
-    replacements : Array-like
-        Argument to compute function.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+): ...
+
 
 # ========================= 3.7 Pairwise functions =========================
 def pairwise_diff(
@@ -6135,51 +1512,7 @@ def pairwise_diff(
     *,
     options: PairwiseOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
-) -> _NumericOrTemporalArrayT | Expression:
-    """
-    Compute first order difference of an array.
-
-    Computes the first order difference of an array, It internally calls
-    the scalar function "subtract" to compute
-     differences, so its
-    behavior and supported types are the same as
-    "subtract". The period can be specified in :struct:`PairwiseOptions`.
-
-    Results will wrap around on integer overflow. Use function
-    "pairwise_diff_checked" if you want overflow to return an error.
-
-    Parameters
-    ----------
-    input : Array-like
-        Argument to compute function.
-    period : int, default 1
-        Period for applying the period function.
-    options : pyarrow.compute.PairwiseOptions, optional
-        Alternative way of passing options.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the default memory pool.
-    """
+) -> _NumericOrTemporalArrayT | Expression: ...
+
 
 pairwise_diff_checked = _clone_signature(pairwise_diff)
-"""
-Compute first order difference of an array.
-
-Computes the first order difference of an array, It internally calls
-the scalar function "subtract_checked" (or the checked variant) to compute
-differences, so its behavior and supported types are the same as
-"subtract_checked". The period can be specified in :struct:`PairwiseOptions`.
-
-This function returns an error on overflow. For a variant that doesn't
-fail on overflow, use function "pairwise_diff".
-
-Parameters
-----------
-input : Array-like
-    Argument to compute function.
-period : int, default 1
-    Period for applying the period function.
-options : pyarrow.compute.PairwiseOptions, optional
-    Alternative way of passing options.
-memory_pool : pyarrow.MemoryPool, optional
-    If not passed, will allocate memory from the default memory pool.
-"""
diff --git a/python/pyarrow-stubs/config.pyi b/python/pyarrow-stubs/config.pyi
index 7c2eb8a9c98..62555a506f3 100644
--- a/python/pyarrow-stubs/config.pyi
+++ b/python/pyarrow-stubs/config.pyi
@@ -17,11 +17,13 @@
 
 from typing import NamedTuple
 
+
 class VersionInfo(NamedTuple):
     major: int
     minor: int
     patch: int
 
+
 class BuildInfo(NamedTuple):
     version: str
     version_info: VersionInfo
@@ -35,17 +37,21 @@ class BuildInfo(NamedTuple):
     package_kind: str
     build_type: str
 
+
 class RuntimeInfo(NamedTuple):
     simd_level: str
     detected_simd_level: str
 
+
 cpp_build_info: BuildInfo
 cpp_version: str
 cpp_version_info: VersionInfo
 
+
 def runtime_info() -> RuntimeInfo: ...
 def set_timezone_db_path(path: str) -> None: ...
 
+
 __all__ = [
     "VersionInfo",
     "BuildInfo",
diff --git a/python/pyarrow-stubs/dataset.pyi b/python/pyarrow-stubs/dataset.pyi
index 6cb7fed43e6..160ed19ee4b 100644
--- a/python/pyarrow-stubs/dataset.pyi
+++ b/python/pyarrow-stubs/dataset.pyi
@@ -128,10 +128,13 @@ __all__ = [
 
 _DatasetFormat: TypeAlias = Literal["parquet", "ipc", "arrow", "feather", "csv"]
 
+
 @overload
 def partitioning(
     schema: Schema,
 ) -> Partitioning: ...
+
+
 @overload
 def partitioning(
     schema: Schema,
@@ -139,6 +142,8 @@ def partitioning(
     flavor: Literal["filename"],
     dictionaries: dict[str, Array] | None = None,
 ) -> Partitioning: ...
+
+
 @overload
 def partitioning(
     schema: Schema,
@@ -146,12 +151,16 @@ def partitioning(
     flavor: Literal["filename"],
     dictionaries: Literal["infer"],
 ) -> PartitioningFactory: ...
+
+
 @overload
 def partitioning(
     field_names: list[str],
     *,
     flavor: Literal["filename"],
 ) -> PartitioningFactory: ...
+
+
 @overload
 def partitioning(
     schema: Schema,
@@ -159,11 +168,15 @@ def partitioning(
     flavor: Literal["hive"],
     dictionaries: Literal["infer"],
 ) -> PartitioningFactory: ...
+
+
 @overload
 def partitioning(
     *,
     flavor: Literal["hive"],
 ) -> PartitioningFactory: ...
+
+
 @overload
 def partitioning(
     schema: Schema,
@@ -171,6 +184,8 @@ def partitioning(
     flavor: Literal["hive"],
     dictionaries: dict[str, Array] | None = None,
 ) -> Partitioning: ...
+
+
 def parquet_dataset(
     metadata_path: StrPath,
     schema: Schema | None = None,
@@ -179,6 +194,8 @@ def parquet_dataset(
     partitioning: Partitioning | PartitioningFactory | None = None,
     partition_base_dir: str | None = None,
 ) -> FileSystemDataset: ...
+
+
 @overload
 def dataset(
     source: StrPath | Sequence[StrPath],
@@ -190,6 +207,8 @@ def dataset(
     exclude_invalid_files: bool | None = None,
     ignore_prefixes: list[str] | None = None,
 ) -> FileSystemDataset: ...
+
+
 @overload
 def dataset(
     source: list[Dataset],
@@ -201,6 +220,8 @@ def dataset(
     exclude_invalid_files: bool | None = None,
     ignore_prefixes: list[str] | None = None,
 ) -> UnionDataset: ...
+
+
 @overload
 def dataset(
     source: Iterable[RecordBatch] | Iterable[Table] | RecordBatchReader,
@@ -212,6 +233,8 @@ def dataset(
     exclude_invalid_files: bool | None = None,
     ignore_prefixes: list[str] | None = None,
 ) -> InMemoryDataset: ...
+
+
 @overload
 def dataset(
     source: RecordBatch | Table,
@@ -223,6 +246,8 @@ def dataset(
     exclude_invalid_files: bool | None = None,
     ignore_prefixes: list[str] | None = None,
 ) -> InMemoryDataset: ...
+
+
 def write_dataset(
     data: Dataset | Table | RecordBatch | RecordBatchReader | list[Table] | Iterable[RecordBatch],
     base_dir: StrPath,
@@ -241,6 +266,7 @@ def write_dataset(
     min_rows_per_group: int = 0,
     max_rows_per_group: int = 1024 * 1024,
     file_visitor: Callable[[str], None] | None = None,
-    existing_data_behavior: Literal["error", "overwrite_or_ignore", "delete_matching"] = "error",
+    existing_data_behavior: Literal["error",
+                                    "overwrite_or_ignore", "delete_matching"] = "error",
     create_dir: bool = True,
 ): ...
diff --git a/python/pyarrow-stubs/device.pyi b/python/pyarrow-stubs/device.pyi
index 6c4f1fdeeea..d77fe2504af 100644
--- a/python/pyarrow-stubs/device.pyi
+++ b/python/pyarrow-stubs/device.pyi
@@ -19,6 +19,7 @@ import enum
 
 from pyarrow.lib import _Weakrefable
 
+
 class DeviceAllocationType(enum.Flag):
     CPU = enum.auto()
     CUDA = enum.auto()
@@ -35,71 +36,33 @@ class DeviceAllocationType(enum.Flag):
     WEBGPU = enum.auto()
     HEXAGON = enum.auto()
 
-class Device(_Weakrefable):
-    """
-    Abstract interface for hardware devices
 
-    This object represents a device with access to some memory spaces.
-    When handling a Buffer or raw memory address, it allows deciding in which
-    context the raw memory address should be interpreted
-    (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
-    """
+class Device(_Weakrefable):
 
     @property
-    def type_name(self) -> str:
-        """
-        A shorthand for this device's type.
-        """
+    def type_name(self) -> str: ...
+
     @property
-    def device_id(self) -> int:
-        """
-        A device ID to identify this device if there are multiple of this type.
+    def device_id(self) -> int: ...
 
-        If there is no "device_id" equivalent (such as for the main CPU device on
-        non-numa systems) returns -1.
-        """
     @property
-    def is_cpu(self) -> bool:
-        """
-        Whether this device is the main CPU device.
+    def is_cpu(self) -> bool: ...
 
-        This shorthand method is very useful when deciding whether a memory address
-        is CPU-accessible.
-        """
     @property
-    def device_type(self) -> DeviceAllocationType:
-        """
-        Return the DeviceAllocationType of this device.
-        """
+    def device_type(self) -> DeviceAllocationType: ...
 
-class MemoryManager(_Weakrefable):
-    """
-    An object that provides memory management primitives.
 
-    A MemoryManager is always tied to a particular Device instance.
-    It can also have additional parameters (such as a MemoryPool to
-    allocate CPU memory).
+class MemoryManager(_Weakrefable):
 
-    """
     @property
-    def device(self) -> Device:
-        """
-        The device this MemoryManager is tied to.
-        """
+    def device(self) -> Device: ...
+
     @property
-    def is_cpu(self) -> bool:
-        """
-        Whether this MemoryManager is tied to the main CPU device.
+    def is_cpu(self) -> bool: ...
 
-        This shorthand method is very useful when deciding whether a memory
-        address is CPU-accessible.
-        """
 
-def default_cpu_memory_manager() -> MemoryManager:
-    """
-    Return the default CPU MemoryManager instance.
+def default_cpu_memory_manager() -> MemoryManager: ...
 
-    The returned singleton instance uses the default MemoryPool.
-    """
 
-__all__ = ["DeviceAllocationType", "Device", "MemoryManager", "default_cpu_memory_manager"]
+__all__ = ["DeviceAllocationType", "Device",
+           "MemoryManager", "default_cpu_memory_manager"]
diff --git a/python/pyarrow-stubs/feather.pyi b/python/pyarrow-stubs/feather.pyi
index ce8d83dbcd9..10281e91152 100644
--- a/python/pyarrow-stubs/feather.pyi
+++ b/python/pyarrow-stubs/feather.pyi
@@ -32,18 +32,25 @@ __all__ = [
     "read_table",
 ]
 
+
 class FeatherDataset:
     path_or_paths: str | list[str]
     validate_schema: bool
 
-    def __init__(self, path_or_paths: str | list[str], validate_schema: bool = True) -> None: ...
+    def __init__(self, path_or_paths: str |
+                 list[str], validate_schema: bool = True) -> None: ...
+
     def read_table(self, columns: list[str] | None = None) -> Table: ...
     def validate_schemas(self, piece, table: Table) -> None: ...
+
     def read_pandas(
         self, columns: list[str] | None = None, use_threads: bool = True
     ) -> pd.DataFrame: ...
 
+
 def check_chunked_overflow(name: str, col) -> None: ...
+
+
 def write_feather(
     df: pd.DataFrame | Table,
     dest: StrPath | IO,
@@ -52,6 +59,8 @@ def write_feather(
     chunksize: int | None = None,
     version: Literal[1, 2] = 2,
 ) -> None: ...
+
+
 def read_feather(
     source: StrPath | IO,
     columns: list[str] | None = None,
@@ -59,6 +68,8 @@ def read_feather(
     memory_map: bool = False,
     **kwargs,
 ) -> pd.DataFrame: ...
+
+
 def read_table(
     source: StrPath | IO,
     columns: list[str] | None = None,
diff --git a/python/pyarrow-stubs/fs.pyi b/python/pyarrow-stubs/fs.pyi
index 6c5a0af8d19..61a557ea428 100644
--- a/python/pyarrow-stubs/fs.pyi
+++ b/python/pyarrow-stubs/fs.pyi
@@ -45,6 +45,7 @@ from pyarrow._s3fs import (  # noqa
 
 FileStats = FileInfo
 
+
 def copy_files(
     source: str,
     destination: str,
@@ -55,10 +56,12 @@ def copy_files(
     use_threads: bool = True,
 ) -> None: ...
 
+
 class FSSpecHandler(FileSystemHandler):  # type: ignore[misc]
     fs: SupportedFileSystem
     def __init__(self, fs: SupportedFileSystem) -> None: ...
 
+
 __all__ = [
     # _fs
     "FileSelector",
diff --git a/python/pyarrow-stubs/interchange/buffer.pyi b/python/pyarrow-stubs/interchange/buffer.pyi
index 6890a24030c..e1d8ae949c9 100644
--- a/python/pyarrow-stubs/interchange/buffer.pyi
+++ b/python/pyarrow-stubs/interchange/buffer.pyi
@@ -19,6 +19,7 @@ import enum
 
 from pyarrow.lib import Buffer
 
+
 class DlpackDeviceType(enum.IntEnum):
     CPU = 1
     CUDA = 2
@@ -29,6 +30,7 @@ class DlpackDeviceType(enum.IntEnum):
     VPI = 9
     ROCM = 10
 
+
 class _PyArrowBuffer:
     def __init__(self, x: Buffer, allow_copy: bool = True) -> None: ...
     @property
diff --git a/python/pyarrow-stubs/interchange/column.pyi b/python/pyarrow-stubs/interchange/column.pyi
index 970ad3e07be..04861a72b0b 100644
--- a/python/pyarrow-stubs/interchange/column.pyi
+++ b/python/pyarrow-stubs/interchange/column.pyi
@@ -23,6 +23,7 @@ from pyarrow.lib import Array, ChunkedArray
 
 from .buffer import _PyArrowBuffer
 
+
 class DtypeKind(enum.IntEnum):
     INT = 0
     UINT = 1
@@ -32,8 +33,10 @@ class DtypeKind(enum.IntEnum):
     DATETIME = 22
     CATEGORICAL = 23
 
+
 Dtype: TypeAlias = tuple[DtypeKind, int, str, str]
 
+
 class ColumnNullType(enum.IntEnum):
     NON_NULLABLE = 0
     USE_NAN = 1
@@ -41,26 +44,34 @@ class ColumnNullType(enum.IntEnum):
     USE_BITMASK = 3
     USE_BYTEMASK = 4
 
+
 class ColumnBuffers(TypedDict):
     data: tuple[_PyArrowBuffer, Dtype]
     validity: tuple[_PyArrowBuffer, Dtype] | None
     offsets: tuple[_PyArrowBuffer, Dtype] | None
 
+
 class CategoricalDescription(TypedDict):
     is_ordered: bool
     is_dictionary: bool
     categories: _PyArrowColumn | None
 
+
 class Endianness(enum.Enum):
     LITTLE = "<"
     BIG = ">"
     NATIVE = "="
     NA = "|"
 
-class NoBufferPresent(Exception): ...
+
+class NoBufferPresent(Exception):
+    ...
+
 
 class _PyArrowColumn:
-    def __init__(self, column: Array | ChunkedArray, allow_copy: bool = True) -> None: ...
+    def __init__(self, column: Array | ChunkedArray,
+                 allow_copy: bool = True) -> None: ...
+
     def size(self) -> int: ...
     @property
     def offset(self) -> int: ...
diff --git a/python/pyarrow-stubs/interchange/dataframe.pyi b/python/pyarrow-stubs/interchange/dataframe.pyi
index fb97e9a414f..cafbe0fc200 100644
--- a/python/pyarrow-stubs/interchange/dataframe.pyi
+++ b/python/pyarrow-stubs/interchange/dataframe.pyi
@@ -26,10 +26,12 @@ from typing import Any, Iterable, Sequence
 from pyarrow.interchange.column import _PyArrowColumn
 from pyarrow.lib import RecordBatch, Table
 
+
 class _PyArrowDataFrame:
     def __init__(
         self, df: Table | RecordBatch, nan_as_null: bool = False, allow_copy: bool = True
     ) -> None: ...
+
     def __dataframe__(
         self, nan_as_null: bool = False, allow_copy: bool = True
     ) -> _PyArrowDataFrame: ...
diff --git a/python/pyarrow-stubs/interchange/from_dataframe.pyi b/python/pyarrow-stubs/interchange/from_dataframe.pyi
index b13d5976337..e7f1c6e91ff 100644
--- a/python/pyarrow-stubs/interchange/from_dataframe.pyi
+++ b/python/pyarrow-stubs/interchange/from_dataframe.pyi
@@ -26,27 +26,39 @@ from .column import (
     DtypeKind,
 )
 
+
 class DataFrameObject(Protocol):
-    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True) -> Any: ...
+    def __dataframe__(self, nan_as_null: bool = False,
+                      allow_copy: bool = True) -> Any: ...
+
 
 ColumnObject: TypeAlias = Any
 
+
 def from_dataframe(df: DataFrameObject, allow_copy=True) -> Table: ...
 
-def protocol_df_chunk_to_pyarrow(df: DataFrameObject, allow_copy: bool = True) -> RecordBatch: ...
+
+def protocol_df_chunk_to_pyarrow(
+    df: DataFrameObject, allow_copy: bool = True) -> RecordBatch: ...
+
 
 def column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array: ...
 
+
 def bool_column_to_array(col: ColumnObject, allow_copy: bool = True) -> Array: ...
 
+
 def categorical_column_to_dictionary(
     col: ColumnObject, allow_copy: bool = True
 ) -> DictionaryArray: ...
 
+
 def parse_datetime_format_str(format_str: str) -> tuple[str, str]: ...
 
+
 def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> DataType: ...
 
+
 def buffers_to_array(
     buffers: ColumnBuffers,
     data_type: tuple[DtypeKind, int, str, str],
@@ -56,6 +68,7 @@ def buffers_to_array(
     allow_copy: bool = True,
 ) -> Array: ...
 
+
 def validity_buffer_from_mask(
     validity_buff: Buffer,
     validity_dtype: Dtype,
@@ -65,6 +78,7 @@ def validity_buffer_from_mask(
     allow_copy: bool = True,
 ) -> Buffer: ...
 
+
 def validity_buffer_nan_sentinel(
     data_pa_buffer: Buffer,
     data_type: Dtype,
diff --git a/python/pyarrow-stubs/io.pyi b/python/pyarrow-stubs/io.pyi
index 3d630498a1d..ea259f02142 100644
--- a/python/pyarrow-stubs/io.pyi
+++ b/python/pyarrow-stubs/io.pyi
@@ -40,290 +40,76 @@ from pyarrow.lib import MemoryPool, _Weakrefable
 from .device import Device, DeviceAllocationType, MemoryManager
 from ._types import KeyValueMetadata
 
-def have_libhdfs() -> bool:
-    """
-    Return true if HDFS (HadoopFileSystem) library is set up correctly.
-    """
-
-def io_thread_count() -> int:
-    """
-    Return the number of threads to use for I/O operations.
-
-    Many operations, such as scanning a dataset, will implicitly make
-    use of this pool. The number of threads is set to a fixed value at
-    startup. It can be modified at runtime by calling
-    :func:`set_io_thread_count()`.
-
-    See Also
-    --------
-    set_io_thread_count : Modify the size of this pool.
-    cpu_count : The analogous function for the CPU thread pool.
-    """
-
-def set_io_thread_count(count: int) -> None:
-    """
-    Set the number of threads to use for I/O operations.
-
-    Many operations, such as scanning a dataset, will implicitly make
-    use of this pool.
-
-    Parameters
-    ----------
-    count : int
-        The max number of threads that may be used for I/O.
-        Must be positive.
-
-    See Also
-    --------
-    io_thread_count : Get the size of this pool.
-    set_cpu_count : The analogous function for the CPU thread pool.
-    """
+def have_libhdfs() -> bool: ...
 
-Mode: TypeAlias = Literal["rb", "wb", "rb+", "ab"]
 
-class NativeFile(_Weakrefable):
-    """
-    The base class for all Arrow streams.
+def io_thread_count() -> int: ...
+
+
+def set_io_thread_count(count: int) -> None: ...
 
-    Streams are either readable, writable, or both.
-    They optionally support seeking.
 
-    While this class exposes methods to read or write data from Python, the
-    primary intent of using a Arrow stream is to pass it to other Arrow
-    facilities that will make use of it, such as Arrow IPC routines.
+Mode: TypeAlias = Literal["rb", "wb", "rb+", "ab"]
+
+class NativeFile(_Weakrefable):
 
-    Be aware that there are subtle differences with regular Python files,
-    e.g. destroying a writable Arrow stream without closing it explicitly
-    will not flush any pending data.
-    """
 
     _default_chunk_size: int
 
     def __enter__(self) -> Self: ...
     def __exit__(self, *args) -> None: ...
     @property
-    def mode(self) -> Mode:
-        """
-        The file mode. Currently instances of NativeFile may support:
-
-        * rb: binary read
-        * wb: binary write
-        * rb+: binary read and write
-        * ab: binary append
-        """
+    def mode(self) -> Mode: ...
+
     def readable(self) -> bool: ...
     def seekable(self) -> bool: ...
     def isatty(self) -> bool: ...
-    def fileno(self) -> int:
-        """
-        NOT IMPLEMENTED
-        """
+    def fileno(self) -> int: ...
+
     @property
     def closed(self) -> bool: ...
     def close(self) -> None: ...
-    def size(self) -> int:
-        """
-        Return file size
-        """
-    def metadata(self) -> KeyValueMetadata:
-        """
-        Return file metadata
-        """
-    def tell(self) -> int:
-        """
-        Return current stream position
-        """
-    def seek(self, position: int, whence: int = 0) -> int:
-        """
-        Change current file stream position
-
-        Parameters
-        ----------
-        position : int
-            Byte offset, interpreted relative to value of whence argument
-        whence : int, default 0
-            Point of reference for seek offset
-
-        Notes
-        -----
-        Values of whence:
-        * 0 -- start of stream (the default); offset should be zero or positive
-        * 1 -- current stream position; offset may be negative
-        * 2 -- end of stream; offset is usually negative
-
-        Returns
-        -------
-        int
-            The new absolute stream position.
-        """
-    def flush(self) -> None:
-        """
-        Flush the stream, if applicable.
-
-        An error is raised if stream is not writable.
-        """
-    def write(self, data: bytes | SupportPyBuffer) -> int:
-        """
-        Write data to the file.
-
-        Parameters
-        ----------
-        data : bytes-like object or exporter of buffer protocol
-
-        Returns
-        -------
-        int
-            nbytes: number of bytes written
-        """
-    def read(self, nbytes: int | None = None) -> bytes:
-        """
-        Read and return up to n bytes.
-
-        If *nbytes* is None, then the entire remaining file contents are read.
-
-        Parameters
-        ----------
-        nbytes : int, default None
-
-        Returns
-        -------
-        data : bytes
-        """
-    def get_stream(self, file_offset: int, nbytes: int) -> Self:
-        """
-        Return an input stream that reads a file segment independent of the
-        state of the file.
-
-        Allows reading portions of a random access file as an input stream
-        without interfering with each other.
-
-        Parameters
-        ----------
-        file_offset : int
-        nbytes : int
-
-        Returns
-        -------
-        stream : NativeFile
-        """
-    def read_at(self, nbytes: int, offset: int) -> bytes:
-        """
-        Read indicated number of bytes at offset from the file
-
-        Parameters
-        ----------
-        nbytes : int
-        offset : int
-
-        Returns
-        -------
-        data : bytes
-        """
-    def read1(self, nbytes: int | None = None) -> bytes:
-        """
-        Read and return up to n bytes.
-
-        Unlike read(), if *nbytes* is None then a chunk is read, not the
-        entire file.
-
-        Parameters
-        ----------
-        nbytes : int, default None
-            The maximum number of bytes to read.
-
-        Returns
-        -------
-        data : bytes
-        """
+    def size(self) -> int: ...
+
+    def metadata(self) -> KeyValueMetadata: ...
+
+    def tell(self) -> int: ...
+
+    def seek(self, position: int, whence: int = 0) -> int: ...
+
+    def flush(self) -> None: ...
+
+    def write(self, data: bytes | SupportPyBuffer) -> int: ...
+
+    def read(self, nbytes: int | None = None) -> bytes: ...
+
+    def get_stream(self, file_offset: int, nbytes: int) -> Self: ...
+
+    def read_at(self, nbytes: int, offset: int) -> bytes: ...
+
+    def read1(self, nbytes: int | None = None) -> bytes: ...
+
     def readall(self) -> bytes: ...
-    def readinto(self, b: SupportPyBuffer) -> int:
-        """
-        Read into the supplied buffer
-
-        Parameters
-        ----------
-        b : buffer-like object
-            A writable buffer object (such as a bytearray).
-
-        Returns
-        -------
-        written : int
-            number of bytes written
-        """
-
-    def readline(self, size: int | None = None) -> bytes:
-        """Read and return a line of bytes from the file.
-
-        If size is specified, read at most size bytes.
-
-        Line terminator is always b"\\n".
-
-        Parameters
-        ----------
-        size : int
-            maximum number of bytes read
-        """
-    def readlines(self, hint: int | None = None) -> list[bytes]:
-        """
-        NOT IMPLEMENTED. Read lines of the file
-
-        Parameters
-        ----------
-        hint : int
-            maximum number of bytes read until we stop
-        """
-    def __iter__(self) -> Self:
-        """
-        Implement iter(self).
-        """
+    def readinto(self, b: SupportPyBuffer) -> int: ...
+
+
+    def readline(self, size: int | None = None) -> bytes: ...
+
+    def readlines(self, hint: int | None = None) -> list[bytes]: ...
+
+    def __iter__(self) -> Self: ...
+
     def __next__(self) -> bytes: ...
-    def read_buffer(self, nbytes: int | None = None) -> Buffer:
-        """
-        Read from buffer.
-
-        Parameters
-        ----------
-        nbytes : int, optional
-            maximum number of bytes read
-        """
-    def truncate(self) -> None:
-        """
-        NOT IMPLEMENTED
-        """
-    def writelines(self, lines: list[bytes]):
-        """
-        Write lines to the file.
-
-        Parameters
-        ----------
-        lines : iterable
-            Iterable of bytes-like objects or exporters of buffer protocol
-        """
-    def download(self, stream_or_path: StrPath | IOBase, buffer_size: int | None = None) -> None:
-        """
-        Read this file completely to a local path or destination stream.
-
-        This method first seeks to the beginning of the file.
-
-        Parameters
-        ----------
-        stream_or_path : str or file-like object
-            If a string, a local file path to write to; otherwise,
-            should be a writable stream.
-        buffer_size : int, optional
-            The buffer size to use for data transfers.
-        """
-    def upload(self, stream: IOBase, buffer_size: int | None) -> None:
-        """
-        Write from a source stream to this file.
-
-        Parameters
-        ----------
-        stream : file-like object
-            Source stream to pipe to this file.
-        buffer_size : int, optional
-            The buffer size to use for data transfers.
-        """
+    def read_buffer(self, nbytes: int | None = None) -> Buffer: ...
+
+    def truncate(self) -> None: ...
+
+    def writelines(self, lines: list[bytes]): ...
+
+    def download(self, stream_or_path: StrPath | IOBase, buffer_size: int | None = None) -> None: ...
+
+    def upload(self, stream: IOBase, buffer_size: int | None) -> None: ...
+
 
     def writable(self): ...
 
@@ -331,183 +117,29 @@ class NativeFile(_Weakrefable):
 # Python file-like objects
 
 class PythonFile(NativeFile):
-    """
-    A stream backed by a Python file object.
-
-    This class allows using Python file objects with arbitrary Arrow
-    functions, including functions written in another language than Python.
-
-    As a downside, there is a non-zero redirection cost in translating
-    Arrow stream calls to Python method calls.  Furthermore, Python's
-    Global Interpreter Lock may limit parallelism in some situations.
-
-    Examples
-    --------
-    >>> import io
-    >>> import pyarrow as pa
-    >>> pa.PythonFile(io.BytesIO())
-    <pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
-
-    Create a stream for writing:
-
-    >>> buf = io.BytesIO()
-    >>> f =  pa.PythonFile(buf, mode = 'w')
-    >>> f.writable()
-    True
-    >>> f.write(b'PythonFile')
-    10
-    >>> buf.getvalue()
-    b'PythonFile'
-    >>> f.close()
-    >>> f
-    <pyarrow.PythonFile closed=True own_file=False is_seekable=False is_writable=True is_readable=False>
-
-    Create a stream for reading:
-
-    >>> buf = io.BytesIO(b'PythonFile')
-    >>> f =  pa.PythonFile(buf, mode = 'r')
-    >>> f.mode
-    'rb'
-    >>> f.read()
-    b'PythonFile'
-    >>> f
-    <pyarrow.PythonFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
-    >>> f.close()
-    >>> f
-    <pyarrow.PythonFile closed=True own_file=False is_seekable=True is_writable=False is_readable=True>
-    """
-    def __init__(self, handle: IOBase, mode: Literal["r", "w"] | None = None) -> None: ...
-    def truncate(self, pos: int | None = None) -> None:
-        """
-        Parameters
-        ----------
-        pos : int, optional
-        """
-
-class MemoryMappedFile(NativeFile):
-    """
-    A stream that represents a memory-mapped file.
-
-    Supports 'r', 'r+', 'w' modes.
 
-    Examples
-    --------
-    Create a new file with memory map:
+    def __init__(self, handle: IOBase, mode: Literal["r", "w"] | None = None) -> None: ...
+    def truncate(self, pos: int | None = None) -> None: ...
 
-    >>> import pyarrow as pa
-    >>> mmap = pa.create_memory_map('example_mmap.dat', 10)
-    >>> mmap
-    <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=True is_readable=True>
-    >>> mmap.close()
 
-    Open an existing file with memory map:
+class MemoryMappedFile(NativeFile):
 
-    >>> with pa.memory_map('example_mmap.dat') as mmap:
-    ...     mmap
-    ...
-    <pyarrow.MemoryMappedFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
-    """
     @classmethod
-    def create(cls, path: str, size: int) -> Self:
-        """
-        Create a MemoryMappedFile
-
-        Parameters
-        ----------
-        path : str
-            Where to create the file.
-        size : int
-            Size of the memory mapped file.
-        """
+    def create(cls, path: str, size: int) -> Self: ...
+
     def _open(self, path: str, mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"): ...
-    def resize(self, new_size: int) -> None:
-        """
-        Resize the map and underlying file.
+    def resize(self, new_size: int) -> None: ...
 
-        Parameters
-        ----------
-        new_size : new size in bytes
-        """
 
 def memory_map(
     path: str, mode: Literal["r", "rb", "w", "wb", "r+", "r+b", "rb+"] = "r"
-) -> MemoryMappedFile:
-    """
-    Open memory map at file path. Size of the memory map cannot change.
-
-    Parameters
-    ----------
-    path : str
-    mode : {'r', 'r+', 'w'}, default 'r'
-        Whether the file is opened for reading ('r'), writing ('w')
-        or both ('r+').
-
-    Returns
-    -------
-    mmap : MemoryMappedFile
-
-    Examples
-    --------
-    Reading from a memory map without any memory allocation or copying:
-
-    >>> import pyarrow as pa
-    >>> with pa.output_stream('example_mmap.txt') as stream:
-    ...     stream.write(b'Constructing a buffer referencing the mapped memory')
-    ...
-    51
-    >>> with pa.memory_map('example_mmap.txt') as mmap:
-    ...     mmap.read_at(6,45)
-    ...
-    b'memory'
-    """
+) -> MemoryMappedFile: ...
+
 
 create_memory_map = MemoryMappedFile.create
 
 class OSFile(NativeFile):
-    """
-    A stream backed by a regular file descriptor.
-
-    Examples
-    --------
-    Create a new file to write to:
-
-    >>> import pyarrow as pa
-    >>> with pa.OSFile('example_osfile.arrow', mode='w') as f:
-    ...     f.writable()
-    ...     f.write(b'OSFile')
-    ...     f.seekable()
-    ...
-    True
-    6
-    False
-
-    Open the file to read:
-
-    >>> with pa.OSFile('example_osfile.arrow', mode='r') as f:
-    ...     f.mode
-    ...     f.read()
-    ...
-    'rb'
-    b'OSFile'
-
-    Open the file to append:
-
-    >>> with pa.OSFile('example_osfile.arrow', mode='ab') as f:
-    ...     f.mode
-    ...     f.write(b' is super!')
-    ...
-    'ab'
-    10
-    >>> with pa.OSFile('example_osfile.arrow') as f:
-    ...     f.read()
-    ...
-    b'OSFile is super!'
-
-    Inspect created OSFile:
-
-    >>> pa.OSFile('example_osfile.arrow')
-    <pyarrow.OSFile closed=False own_file=False is_seekable=True is_writable=False is_readable=True>
-    """
+
     def __init__(
         self,
         path: str,
@@ -516,435 +148,125 @@ class OSFile(NativeFile):
     ) -> None: ...
 
 class FixedSizeBufferWriter(NativeFile):
-    """
-    A stream writing to a Arrow buffer.
-
-    Examples
-    --------
-    Create a stream to write to ``pyarrow.Buffer``:
-
-    >>> import pyarrow as pa
-    >>> buf = pa.allocate_buffer(5)
-    >>> with pa.output_stream(buf) as stream:
-    ...     stream.write(b'abcde')
-    ...     stream
-    ...
-    5
-    <pyarrow.FixedSizeBufferWriter closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
-
-    Inspect the buffer:
-
-    >>> buf.to_pybytes()
-    b'abcde'
-    >>> buf
-    <pyarrow.Buffer address=... size=5 is_cpu=True is_mutable=True>
-    """
+
     def __init__(self, buffer: Buffer) -> None: ...
-    def set_memcopy_threads(self, num_threads: int) -> None:
-        """
-        Parameters
-        ----------
-        num_threads : int
-        """
-    def set_memcopy_blocksize(self, blocksize: int) -> None:
-        """
-        Parameters
-        ----------
-        blocksize : int64
-        """
-    def set_memcopy_threshold(self, threshold: int) -> None:
-        """
-        Parameters
-        ----------
-        threshold : int64
-        """
+    def set_memcopy_threads(self, num_threads: int) -> None: ...
+
+    def set_memcopy_blocksize(self, blocksize: int) -> None: ...
+
+    def set_memcopy_threshold(self, threshold: int) -> None: ...
+
 
 # ----------------------------------------------------------------------
 # Arrow buffers
 
 class Buffer(_Weakrefable):
-    """
-    The base class for all Arrow buffers.
-
-    A buffer represents a contiguous memory area.  Many buffers will own
-    their memory, though not all of them do.
-    """
-    def __len__(self) -> int:
-        """
-        Return len(self).
-        """
+
+    def __len__(self) -> int: ...
+
     def _assert_cpu(self) -> None: ...
     @property
-    def size(self) -> int:
-        """
-        The buffer size in bytes.
-        """
+    def size(self) -> int: ...
+
     @property
-    def address(self) -> int:
-        """
-        The buffer's address, as an integer.
-
-        The returned address may point to CPU or device memory.
-        Use `is_cpu()` to disambiguate.
-        """
-    def hex(self) -> bytes:
-        """
-        Compute hexadecimal representation of the buffer.
-
-        Returns
-        -------
-        : bytes
-        """
+    def address(self) -> int: ...
+
+    def hex(self) -> bytes: ...
+
     @property
-    def is_mutable(self) -> bool:
-        """
-        Whether the buffer is mutable.
-        """
+    def is_mutable(self) -> bool: ...
+
     @property
-    def is_cpu(self) -> bool:
-        """
-        Whether the buffer is CPU-accessible.
-        """
+    def is_cpu(self) -> bool: ...
+
     @property
-    def device(self) -> Device:
-        """
-        The device where the buffer resides.
-
-        Returns
-        -------
-        Device
-        """
+    def device(self) -> Device: ...
+
     @property
-    def memory_manager(self) -> MemoryManager:
-        """
-        The memory manager associated with the buffer.
-
-        Returns
-        -------
-        MemoryManager
-        """
+    def memory_manager(self) -> MemoryManager: ...
+
     @property
-    def device_type(self) -> DeviceAllocationType:
-        """
-        The device type where the buffer resides.
-
-        Returns
-        -------
-        DeviceAllocationType
-        """
+    def device_type(self) -> DeviceAllocationType: ...
+
     @property
     def parent(self) -> Buffer | None: ...
-    def __getitem__(self, key: builtins.slice | int) -> Self | int:
-        """
-        Return self[key].
-        """
-    def slice(self, offset: int = 0, length: int | None = None) -> Self:
-        """
-        Slice this buffer.  Memory is not copied.
-
-        You can also use the Python slice notation ``buffer[start:stop]``.
-
-        Parameters
-        ----------
-        offset : int, default 0
-            Offset from start of buffer to slice.
-        length : int, default None
-            Length of slice (default is until end of Buffer starting from
-            offset).
-
-        Returns
-        -------
-        sliced : Buffer
-            A logical view over this buffer.
-        """
-    def equals(self, other: Self) -> bool:
-        """
-        Determine if two buffers contain exactly the same data.
-
-        Parameters
-        ----------
-        other : Buffer
-
-        Returns
-        -------
-        are_equal : bool
-            True if buffer contents and size are equal
-        """
+    def __getitem__(self, key: builtins.slice | int) -> Self | int: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def equals(self, other: Self) -> bool: ...
+
     def __reduce_ex__(self, protocol: SupportsIndex) -> str | tuple[Any, ...]: ...
-    def to_pybytes(self) -> bytes:
-        """
-        Return this buffer as a Python bytes object. Memory is copied.
-        """
+    def to_pybytes(self) -> bytes: ...
+
 
 class ResizableBuffer(Buffer):
-    """
-    A base class for buffers that can be resized.
-    """
-
-    def resize(self, new_size: int, shrink_to_fit: bool = False) -> None:
-        """
-        Resize buffer to indicated size.
-
-        Parameters
-        ----------
-        new_size : int
-            New size of buffer (padding may be added internally).
-        shrink_to_fit : bool, default False
-            If this is true, the buffer is shrunk when new_size is less
-            than the current size.
-            If this is false, the buffer is never shrunk.
-        """
+
+
+    def resize(self, new_size: int, shrink_to_fit: bool = False) -> None: ...
+
 
 def allocate_buffer(
     size: int, memory_pool: MemoryPool | None = None, resizable: Literal[False] | Literal[True] | None = None
-) -> Buffer | ResizableBuffer:
-    """
-    Allocate a mutable buffer.
-
-    Parameters
-    ----------
-    size : int
-        Number of bytes to allocate (plus internal padding)
-    memory_pool : MemoryPool, optional
-        The pool to allocate memory from.
-        If not given, the default memory pool is used.
-    resizable : bool, default False
-        If true, the returned buffer is resizable.
-
-    Returns
-    -------
-    buffer : Buffer or ResizableBuffer
-    """
+) -> Buffer | ResizableBuffer: ...
+
 
 # ----------------------------------------------------------------------
 # Arrow Stream
 class BufferOutputStream(NativeFile):
-    """
-    An output stream that writes to a resizable buffer.
-
-    The buffer is produced as a result when ``getvalue()`` is called.
-
-    Examples
-    --------
-    Create an output stream, write data to it and finalize it with
-    ``getvalue()``:
-
-    >>> import pyarrow as pa
-    >>> f = pa.BufferOutputStream()
-    >>> f.write(b'pyarrow.Buffer')
-    14
-    >>> f.closed
-    False
-    >>> f.getvalue()
-    <pyarrow.Buffer address=... size=14 is_cpu=True is_mutable=True>
-    >>> f.closed
-    True
-    """
+
     def __init__(self, memory_pool: MemoryPool | None = None) -> None: ...
-    def getvalue(self) -> Buffer:
-        """
-        Finalize output stream and return result as pyarrow.Buffer.
+    def getvalue(self) -> Buffer: ...
 
-        Returns
-        -------
-        value : Buffer
-        """
 
 class MockOutputStream(NativeFile): ...
 
 class BufferReader(NativeFile):
-    """
-    Zero-copy reader from objects convertible to Arrow buffer.
-
-    Parameters
-    ----------
-    obj : Python bytes or pyarrow.Buffer
-
-    Examples
-    --------
-    Create an Arrow input stream and inspect it:
-
-    >>> import pyarrow as pa
-    >>> data = b'reader data'
-    >>> buf = memoryview(data)
-    >>> with pa.input_stream(buf) as stream:
-    ...     stream.size()
-    ...     stream.read(6)
-    ...     stream.seek(7)
-    ...     stream.read(15)
-    ...
-    11
-    b'reader'
-    7
-    b'data'
-    """
-    def __init__(self, obj) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
+
+    def __init__(self, obj) -> None: ...
+
 
 class CompressedInputStream(NativeFile):
-    """
-    An input stream wrapper which decompresses data on the fly.
-
-    Parameters
-    ----------
-    stream : string, path, pyarrow.NativeFile, or file-like object
-        Input stream object to wrap with the compression.
-    compression : str
-        The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
-
-    Examples
-    --------
-    Create an output stream which compresses the data:
-
-    >>> import pyarrow as pa
-    >>> data = b"Compressed stream"
-    >>> raw = pa.BufferOutputStream()
-    >>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
-    ...     compressed.write(data)
-    ...
-    17
-
-    Create an input stream with decompression referencing the
-    buffer with compressed data:
-
-    >>> cdata = raw.getvalue()
-    >>> with pa.input_stream(cdata, compression="gzip") as compressed:
-    ...     compressed.read()
-    ...
-    b'Compressed stream'
-
-    which actually translates to the use of ``BufferReader``and
-    ``CompressedInputStream``:
-
-    >>> raw = pa.BufferReader(cdata)
-    >>> with pa.CompressedInputStream(raw, "gzip") as compressed:
-    ...     compressed.read()
-    ...
-    b'Compressed stream'
-    """
+
 
     def __init__(
         self,
         stream: StrPath | NativeFile | IOBase,
         compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
-    ) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
+    ) -> None: ...
+
 
 class CompressedOutputStream(NativeFile):
-    """
-    An output stream wrapper which compresses data on the fly.
-
-    Parameters
-    ----------
-    stream : string, path, pyarrow.NativeFile, or file-like object
-        Input stream object to wrap with the compression.
-    compression : str
-        The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
-
-    Examples
-    --------
-    Create an output stream which compresses the data:
-
-    >>> import pyarrow as pa
-    >>> data = b"Compressed stream"
-    >>> raw = pa.BufferOutputStream()
-    >>> with pa.CompressedOutputStream(raw, "gzip") as compressed:
-    ...     compressed.write(data)
-    ...
-    17
-    """
+
     def __init__(
         self,
         stream: StrPath | NativeFile | IOBase,
         compression: Literal["bz2", "brotli", "gzip", "lz4", "zstd"],
-    ) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
+    ) -> None: ...
+
 
 class BufferedInputStream(NativeFile):
-    """
-    An input stream that performs buffered reads from
-    an unbuffered input stream, which can mitigate the overhead
-    of many small reads in some cases.
-
-    Parameters
-    ----------
-    stream : NativeFile
-        The input stream to wrap with the buffer
-    buffer_size : int
-        Size of the temporary read buffer.
-    memory_pool : MemoryPool
-        The memory pool used to allocate the buffer.
-    """
+
     def __init__(
         self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
-    ) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
-    def detach(self) -> NativeFile:
-        """
-        Release the raw InputStream.
-        Further operations on this stream are invalid.
-
-        Returns
-        -------
-        raw : NativeFile
-            The underlying raw input stream
-        """
+    ) -> None: ...
+
+    def detach(self) -> NativeFile: ...
+
 
 class BufferedOutputStream(NativeFile):
-    """
-    An output stream that performs buffered reads from
-    an unbuffered output stream, which can mitigate the overhead
-    of many small writes in some cases.
-
-    Parameters
-    ----------
-    stream : NativeFile
-        The writable output stream to wrap with the buffer
-    buffer_size : int
-        Size of the buffer that should be added.
-    memory_pool : MemoryPool
-        The memory pool used to allocate the buffer.
-    """
+
     def __init__(
         self, stream: NativeFile, buffer_size: int, memory_pool: MemoryPool | None = None
-    ) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
-    def detach(self) -> NativeFile:
-        """
-        Flush any buffered writes and release the raw OutputStream.
-        Further operations on this stream are invalid.
-
-        Returns
-        -------
-        raw : NativeFile
-            The underlying raw output stream.
-        """
+    ) -> None: ...
+
+    def detach(self) -> NativeFile: ...
+
 
 class TransformInputStream(NativeFile):
-    """
-    Transform an input stream.
-
-    Parameters
-    ----------
-    stream : NativeFile
-        The stream to transform.
-    transform_func : callable
-        The transformation to apply.
-    """
-    def __init__(self, stream: NativeFile, transform_func: Callable[[Buffer], Any]) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
+
+    def __init__(self, stream: NativeFile, transform_func: Callable[[Buffer], Any]) -> None: ...
+
 
 class Transcoder:
     def __init__(self, decoder, encoder) -> None: ...
@@ -952,82 +274,21 @@ class Transcoder:
 
 def transcoding_input_stream(
     stream: NativeFile, src_encoding: str, dest_encoding: str
-) -> TransformInputStream:
-    """
-    Add a transcoding transformation to the stream.
-    Incoming data will be decoded according to ``src_encoding`` and
-    then re-encoded according to ``dest_encoding``.
-
-    Parameters
-    ----------
-    stream : NativeFile
-        The stream to which the transformation should be applied.
-    src_encoding : str
-        The codec to use when reading data.
-    dest_encoding : str
-        The codec to use for emitted data.
-    """
-
-def py_buffer(obj: SupportPyBuffer) -> Buffer:
-    """
-    Construct an Arrow buffer from a Python bytes-like or buffer-like object
-
-    Parameters
-    ----------
-    obj : object
-        the object from which the buffer should be constructed.
-    """
-
-def foreign_buffer(address: int, size: int, base: Any | None = None) -> Buffer:
-    """
-    Construct an Arrow buffer with the given *address* and *size*.
-
-    The buffer will be optionally backed by the Python *base* object, if given.
-    The *base* object will be kept alive as long as this buffer is alive,
-    including across language boundaries (for example if the buffer is
-    referenced by C++ code).
-
-    Parameters
-    ----------
-    address : int
-        The starting address of the buffer. The address can
-        refer to both device or host memory but it must be
-        accessible from device after mapping it with
-        `get_device_address` method.
-    size : int
-        The size of device buffer in bytes.
-    base : {None, object}
-        Object that owns the referenced memory.
-    """
+) -> TransformInputStream: ...
+
+
+def py_buffer(obj: SupportPyBuffer) -> Buffer: ...
+
+
+def foreign_buffer(address: int, size: int, base: Any | None = None) -> Buffer: ...
+
 
 def as_buffer(o: Buffer | SupportPyBuffer) -> Buffer: ...
 
 # ---------------------------------------------------------------------
 
 class CacheOptions(_Weakrefable):
-    """
-    Cache options for a pre-buffered fragment scan.
-
-    Parameters
-    ----------
-    hole_size_limit : int, default 8KiB
-        The maximum distance in bytes between two consecutive ranges; beyond
-        this value, ranges are not combined.
-    range_size_limit : int, default 32MiB
-        The maximum size in bytes of a combined range; if combining two
-        consecutive ranges would produce a range of a size greater than this,
-        they are not combined
-    lazy : bool, default True
-        lazy = false: request all byte ranges when PreBuffer or WillNeed is called.
-        lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader
-        needs them.
-        lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the
-        range that is currently being read.
-    prefetch_limit : int, default 0
-        The maximum number of ranges to be prefetched. This is only used for
-        lazy cache to asynchronously read some ranges after reading the target
-        range.
-    """
+
 
     hole_size_limit: int
     range_size_limit: int
@@ -1040,10 +301,8 @@ class CacheOptions(_Weakrefable):
         range_size_limit: int | None = None,
         lazy: bool = True,
         prefetch_limit: int = 0,
-    ) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
+    ) -> None: ...
+
     @classmethod
     def from_network_metrics(
         cls,
@@ -1051,211 +310,45 @@ class CacheOptions(_Weakrefable):
         transfer_bandwidth_mib_per_sec: int,
         ideal_bandwidth_utilization_frac: float = 0.9,
         max_ideal_request_size_mib: int = 64,
-    ) -> Self:
-        """
-        Create suitable CacheOptions based on provided network metrics.
-
-        Typically this will be used with object storage solutions like Amazon S3,
-        Google Cloud Storage and Azure Blob Storage.
-
-        Parameters
-        ----------
-        time_to_first_byte_millis : int
-            Seek-time or Time-To-First-Byte (TTFB) in milliseconds, also called call
-            setup latency of a new read request. The value is a positive integer.
-        transfer_bandwidth_mib_per_sec : int
-            Data transfer Bandwidth (BW) in MiB/sec (per connection). The value is a positive
-            integer.
-        ideal_bandwidth_utilization_frac : int, default 0.9
-            Transfer bandwidth utilization fraction (per connection) to maximize the net
-            data load. The value is a positive float less than 1.
-        max_ideal_request_size_mib : int, default 64
-            The maximum single data request size (in MiB) to maximize the net data load.
-
-        Returns
-        -------
-        CacheOptions
-        """
+    ) -> Self: ...
+
 
 class Codec(_Weakrefable):
-    """
-    Compression codec.
-
-    Parameters
-    ----------
-    compression : str
-        Type of compression codec to initialize, valid values are: 'gzip',
-        'bz2', 'brotli', 'lz4' (or 'lz4_frame'), 'lz4_raw', 'zstd' and
-        'snappy'.
-    compression_level : int, None
-        Optional parameter specifying how aggressively to compress.  The
-        possible ranges and effect of this parameter depend on the specific
-        codec chosen.  Higher values compress more but typically use more
-        resources (CPU/RAM).  Some codecs support negative values.
-
-        gzip
-            The compression_level maps to the memlevel parameter of
-            deflateInit2.  Higher levels use more RAM but are faster
-            and should have higher compression ratios.
-
-        bz2
-            The compression level maps to the blockSize100k parameter of
-            the BZ2_bzCompressInit function.  Higher levels use more RAM
-            but are faster and should have higher compression ratios.
-
-        brotli
-            The compression level maps to the BROTLI_PARAM_QUALITY
-            parameter.  Higher values are slower and should have higher
-            compression ratios.
-
-        lz4/lz4_frame/lz4_raw
-            The compression level parameter is not supported and must
-            be None
-
-        zstd
-            The compression level maps to the compressionLevel parameter
-            of ZSTD_initCStream.  Negative values are supported.  Higher
-            values are slower and should have higher compression ratios.
-
-        snappy
-            The compression level parameter is not supported and must
-            be None
-
-
-    Raises
-    ------
-    ValueError
-        If invalid compression value is passed.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> pa.Codec.is_available('gzip')
-    True
-    >>> codec = pa.Codec('gzip')
-    >>> codec.name
-    'gzip'
-    >>> codec.compression_level
-    9
-    """
-    def __init__(self, compression: Compression, compression_level: int | None = None) -> None:
-        """
-        Initialize self.  See help(type(self)) for accurate signature.
-        """
+
+    def __init__(self, compression: Compression, compression_level: int | None = None) -> None: ...
+
     @classmethod
-    def detect(cls, path: StrPath) -> Self:
-        """
-        Detect and instantiate compression codec based on file extension.
-
-        Parameters
-        ----------
-        path : str, path-like
-            File-path to detect compression from.
-
-        Raises
-        ------
-        TypeError
-            If the passed value is not path-like.
-        ValueError
-            If the compression can't be detected from the path.
-
-        Returns
-        -------
-        Codec
-        """
+    def detect(cls, path: StrPath) -> Self: ...
+
     @staticmethod
-    def is_available(compression: Compression) -> bool:
-        """
-        Returns whether the compression support has been built and enabled.
-
-        Parameters
-        ----------
-        compression : str
-             Type of compression codec,
-             refer to Codec docstring for a list of supported ones.
-
-        Returns
-        -------
-        bool
-        """
+    def is_available(compression: Compression) -> bool: ...
+
     @staticmethod
-    def supports_compression_level(compression: Compression) -> int:
-        """
-        Returns true if the compression level parameter is supported
-        for the given codec.
-
-        Parameters
-        ----------
-        compression : str
-            Type of compression codec,
-            refer to Codec docstring for a list of supported ones.
-        """
+    def supports_compression_level(compression: Compression) -> int: ...
+
     @staticmethod
-    def default_compression_level(compression: Compression) -> int:
-        """
-        Returns the compression level that Arrow will use for the codec if
-        None is specified.
-
-        Parameters
-        ----------
-        compression : str
-            Type of compression codec,
-            refer to Codec docstring for a list of supported ones.
-        """
+    def default_compression_level(compression: Compression) -> int: ...
+
     @staticmethod
-    def minimum_compression_level(compression: Compression) -> int:
-        """
-        Returns the smallest valid value for the compression level
-
-        Parameters
-        ----------
-        compression : str
-            Type of compression codec,
-            refer to Codec docstring for a list of supported ones.
-        """
+    def minimum_compression_level(compression: Compression) -> int: ...
+
     @staticmethod
-    def maximum_compression_level(compression: Compression) -> int:
-        """
-        Returns the largest valid value for the compression level
-
-        Parameters
-        ----------
-        compression : str
-            Type of compression codec,
-            refer to Codec docstring for a list of supported ones.
-        """
+    def maximum_compression_level(compression: Compression) -> int: ...
+
     @property
-    def name(self) -> Compression:
-        """
-        Returns the name of the codec
-        """
+    def name(self) -> Compression: ...
+
     @property
-    def compression_level(self) -> int:
-        """
-        Returns the compression level parameter of the codec
-        """
+    def compression_level(self) -> int: ...
+
     def compress(
         self,
         buf: Buffer | bytes | SupportPyBuffer,
         *,
         asbytes: Literal[False] | Literal[True] | None = None,
         memory_pool: MemoryPool | None = None,
-    ) -> Buffer | bytes:
-        """
-        Compress data from buffer-like object.
-
-        Parameters
-        ----------
-        buf : pyarrow.Buffer, bytes, or other object supporting buffer protocol
-        asbytes : bool, default False
-            Return result as Python bytes object, otherwise Buffer
-        memory_pool : MemoryPool, default None
-            Memory pool to use for buffer allocations, if any
-
-        Returns
-        -------
-        compressed : pyarrow.Buffer or bytes (if asbytes=True)
-        """
+    ) -> Buffer | bytes: ...
+
     def decompress(
         self,
         buf: Buffer | bytes | SupportPyBuffer,
@@ -1263,24 +356,8 @@ class Codec(_Weakrefable):
         *,
         asbytes: Literal[False] | Literal[True] | None = None,
         memory_pool: MemoryPool | None = None,
-    ) -> Buffer | bytes:
-        """
-        Decompress data from buffer-like object.
-
-        Parameters
-        ----------
-        buf : pyarrow.Buffer, bytes, or memoryview-compatible object
-        decompressed_size : int, default None
-            Size of the decompressed result
-        asbytes : boolean, default False
-            Return result as Python bytes object, otherwise Buffer
-        memory_pool : MemoryPool, default None
-            Memory pool to use for buffer allocations, if any.
-
-        Returns
-        -------
-        uncompressed : pyarrow.Buffer or bytes (if asbytes=True)
-        """
+    ) -> Buffer | bytes: ...
+
 
 def compress(
     buf: Buffer | bytes | SupportPyBuffer,
@@ -1288,25 +365,8 @@ def compress(
     *,
     asbytes: Literal[False] | Literal[True] | None = None,
     memory_pool: MemoryPool | None = None,
-) -> Buffer | bytes:
-    """
-    Compress data from buffer-like object.
-
-    Parameters
-    ----------
-    buf : pyarrow.Buffer, bytes, or other object supporting buffer protocol
-    codec : str, default 'lz4'
-        Compression codec.
-        Supported types: {'brotli, 'gzip', 'lz4', 'lz4_raw', 'snappy', 'zstd'}
-    asbytes : bool, default False
-        Return result as Python bytes object, otherwise Buffer.
-    memory_pool : MemoryPool, default None
-        Memory pool to use for buffer allocations, if any.
-
-    Returns
-    -------
-    compressed : pyarrow.Buffer or bytes (if asbytes=True)
-    """
+) -> Buffer | bytes: ...
+
 
 def decompress(
     buf: Buffer | bytes | SupportPyBuffer,
@@ -1315,148 +375,22 @@ def decompress(
     *,
     asbytes: Literal[False] | Literal[True] | None = None,
     memory_pool: MemoryPool | None = None,
-) -> Buffer | bytes:
-    """
-    Decompress data from buffer-like object.
-
-    Parameters
-    ----------
-    buf : pyarrow.Buffer, bytes, or memoryview-compatible object
-        Input object to decompress data from.
-    decompressed_size : int, default None
-        Size of the decompressed result
-    codec : str, default 'lz4'
-        Compression codec.
-        Supported types: {'brotli, 'gzip', 'lz4', 'lz4_raw', 'snappy', 'zstd'}
-    asbytes : bool, default False
-        Return result as Python bytes object, otherwise Buffer.
-    memory_pool : MemoryPool, default None
-        Memory pool to use for buffer allocations, if any.
-
-    Returns
-    -------
-    uncompressed : pyarrow.Buffer or bytes (if asbytes=True)
-    """
+) -> Buffer | bytes: ...
+
 
 def input_stream(
     source: StrPath | Buffer | IOBase,
     compression: Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] = "detect",
     buffer_size: int | None = None,
-) -> BufferReader:
-    """
-    Create an Arrow input stream.
-
-    Parameters
-    ----------
-    source : str, Path, buffer, or file-like object
-        The source to open for reading.
-    compression : str optional, default 'detect'
-        The compression algorithm to use for on-the-fly decompression.
-        If "detect" and source is a file path, then compression will be
-        chosen based on the file extension.
-        If None, no compression will be applied.
-        Otherwise, a well-known algorithm name must be supplied (e.g. "gzip").
-    buffer_size : int, default None
-        If None or 0, no buffering will happen. Otherwise the size of the
-        temporary read buffer.
-
-    Examples
-    --------
-    Create a readable BufferReader (NativeFile) from a Buffer or a memoryview object:
-
-    >>> import pyarrow as pa
-    >>> buf = memoryview(b"some data")
-    >>> with pa.input_stream(buf) as stream:
-    ...     stream.read(4)
-    ...
-    b'some'
-
-    Create a readable OSFile (NativeFile) from a string or file path:
-
-    >>> import gzip
-    >>> with gzip.open('example.gz', 'wb') as f:
-    ...     f.write(b'some data')
-    ...
-    9
-    >>> with pa.input_stream('example.gz') as stream:
-    ...     stream.read()
-    ...
-    b'some data'
-
-    Create a readable PythonFile (NativeFile) from a a Python file object:
-
-    >>> with open('example.txt', mode='w') as f:
-    ...     f.write('some text')
-    ...
-    9
-    >>> with pa.input_stream('example.txt') as stream:
-    ...     stream.read(6)
-    ...
-    b'some t'
-    """
+) -> BufferReader: ...
+
 
 def output_stream(
     source: StrPath | Buffer | IOBase,
     compression: Literal["detect", "bz2", "brotli", "gzip", "lz4", "zstd"] = "detect",
     buffer_size: int | None = None,
-) -> NativeFile:
-    """
-    Create an Arrow output stream.
-
-    Parameters
-    ----------
-    source : str, Path, buffer, file-like object
-        The source to open for writing.
-    compression : str optional, default 'detect'
-        The compression algorithm to use for on-the-fly compression.
-        If "detect" and source is a file path, then compression will be
-        chosen based on the file extension.
-        If None, no compression will be applied.
-        Otherwise, a well-known algorithm name must be supplied (e.g. "gzip").
-    buffer_size : int, default None
-        If None or 0, no buffering will happen. Otherwise the size of the
-        temporary write buffer.
-
-    Examples
-    --------
-    Create a writable NativeFile from a pyarrow Buffer:
-
-    >>> import pyarrow as pa
-    >>> data = b"buffer data"
-    >>> empty_obj = bytearray(11)
-    >>> buf = pa.py_buffer(empty_obj)
-    >>> with pa.output_stream(buf) as stream:
-    ...     stream.write(data)
-    ...
-    11
-    >>> with pa.input_stream(buf) as stream:
-    ...     stream.read(6)
-    ...
-    b'buffer'
-
-    or from a memoryview object:
-
-    >>> buf = memoryview(empty_obj)
-    >>> with pa.output_stream(buf) as stream:
-    ...     stream.write(data)
-    ...
-    11
-    >>> with pa.input_stream(buf) as stream:
-    ...     stream.read()
-    ...
-    b'buffer data'
-
-    Create a writable NativeFile from a string or file path:
-
-    >>> with pa.output_stream('example_second.txt') as stream:
-    ...     stream.write(b'Write some data')
-    ...
-    15
-    >>> with pa.input_stream('example_second.txt') as stream:
-    ...     stream.read()
-    ...
-    b'Write some data'
-    """
+) -> NativeFile: ...
+
 
 __all__ = [
     "have_libhdfs",
diff --git a/python/pyarrow-stubs/ipc.pyi b/python/pyarrow-stubs/ipc.pyi
index 985cf0678f9..a6e7c71dd12 100644
--- a/python/pyarrow-stubs/ipc.pyi
+++ b/python/pyarrow-stubs/ipc.pyi
@@ -39,6 +39,7 @@ from pyarrow.lib import (
     write_tensor,
 )
 
+
 class RecordBatchStreamReader(lib._RecordBatchStreamReader):
     def __init__(
         self,
@@ -48,6 +49,7 @@ class RecordBatchStreamReader(lib._RecordBatchStreamReader):
         memory_pool: lib.MemoryPool | None = None,
     ) -> None: ...
 
+
 class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
     def __init__(
         self,
@@ -58,6 +60,7 @@ class RecordBatchStreamWriter(lib._RecordBatchStreamWriter):
         options: IpcWriteOptions | None = None,
     ) -> None: ...
 
+
 class RecordBatchFileReader(lib._RecordBatchFileReader):
     def __init__(
         self,
@@ -68,6 +71,7 @@ class RecordBatchFileReader(lib._RecordBatchFileReader):
         memory_pool: lib.MemoryPool | None = None,
     ) -> None: ...
 
+
 class RecordBatchFileWriter(lib._RecordBatchFileWriter):
     def __init__(
         self,
@@ -78,6 +82,7 @@ class RecordBatchFileWriter(lib._RecordBatchFileWriter):
         options: IpcWriteOptions | None = None,
     ) -> None: ...
 
+
 def new_stream(
     sink: str | lib.NativeFile | IOBase,
     schema: lib.Schema,
@@ -85,12 +90,16 @@ def new_stream(
     use_legacy_format: bool | None = None,
     options: IpcWriteOptions | None = None,
 ) -> RecordBatchStreamWriter: ...
+
+
 def open_stream(
     source: bytes | lib.Buffer | lib.NativeFile | IOBase,
     *,
     options: IpcReadOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
 ) -> RecordBatchStreamReader: ...
+
+
 def new_file(
     sink: str | lib.NativeFile | IOBase,
     schema: lib.Schema,
@@ -98,6 +107,8 @@ def new_file(
     use_legacy_format: bool | None = None,
     options: IpcWriteOptions | None = None,
 ) -> RecordBatchFileWriter: ...
+
+
 def open_file(
     source: bytes | lib.Buffer | lib.NativeFile | IOBase,
     footer_offset: int | None = None,
@@ -105,10 +116,16 @@ def open_file(
     options: IpcReadOptions | None = None,
     memory_pool: lib.MemoryPool | None = None,
 ) -> RecordBatchFileReader: ...
+
+
 def serialize_pandas(
     df: pd.DataFrame, *, nthreads: int | None = None, preserve_index: bool | None = None
 ) -> lib.Buffer: ...
-def deserialize_pandas(buf: lib.Buffer, *, use_threads: bool = True) -> pd.DataFrame: ...
+
+
+def deserialize_pandas(
+    buf: lib.Buffer, *, use_threads: bool = True) -> pd.DataFrame: ...
+
 
 __all__ = [
     "IpcReadOptions",
diff --git a/python/pyarrow-stubs/lib.pyi b/python/pyarrow-stubs/lib.pyi
index 565feb4b3db..eea11a2e8f1 100644
--- a/python/pyarrow-stubs/lib.pyi
+++ b/python/pyarrow-stubs/lib.pyi
@@ -40,45 +40,15 @@ class MonthDayNano(NamedTuple):
     months: int
     nanoseconds: int
 
-def cpu_count() -> int:
-    """
-    Return the number of threads to use in parallel operations.
 
-    The number of threads is determined at startup by inspecting the
-    ``OMP_NUM_THREADS`` and ``OMP_THREAD_LIMIT`` environment variables.
-    If neither is present, it will default to the number of hardware threads
-    on the system. It can be modified at runtime by calling
-    :func:`set_cpu_count()`.
+def cpu_count() -> int: ...
 
-    See Also
-    --------
-    set_cpu_count : Modify the size of this pool.
-    io_thread_count : The analogous function for the I/O thread pool.
-    """
 
-def set_cpu_count(count: int) -> None:
-    """
-    Set the number of threads to use in parallel operations.
+def set_cpu_count(count: int) -> None: ...
 
-    Parameters
-    ----------
-    count : int
-        The number of concurrent threads that should be used.
 
-    See Also
-    --------
-    cpu_count : Get the size of this pool.
-    set_io_thread_count : The analogous function for the I/O thread pool.
-    """
+def is_threading_enabled() -> bool: ...
 
-def is_threading_enabled() -> bool:
-    """
-    Returns True if threading is enabled in libarrow.
-
-    If it isn't enabled, then python shouldn't create any
-    threads either, because we're probably on a system where
-    threading doesn't work (e.g. Emscripten).
-    """
 
 Type_NA: int
 Type_BOOL: int
diff --git a/python/pyarrow-stubs/memory.pyi b/python/pyarrow-stubs/memory.pyi
index 4fc723a1950..ab5db5b1f06 100644
--- a/python/pyarrow-stubs/memory.pyi
+++ b/python/pyarrow-stubs/memory.pyi
@@ -18,165 +18,60 @@
 from pyarrow.lib import _Weakrefable
 
 class MemoryPool(_Weakrefable):
-    """
-    Base class for memory allocation.
-
-    Besides tracking its number of allocated bytes, a memory pool also
-    takes care of the required 64-byte alignment for Arrow data.
-    """
-
-    def release_unused(self) -> None:
-        """
-        Attempt to return to the OS any memory being held onto by the pool.
-
-        This function should not be called except potentially for
-        benchmarking or debugging as it could be expensive and detrimental to
-        performance.
-
-        This is best effort and may not have any effect on some memory pools
-        or in some situations (e.g. fragmentation).
-        """
-    def bytes_allocated(self) -> int:
-        """
-        Return the number of bytes that are currently allocated from this
-        memory pool.
-        """
-    def total_bytes_allocated(self) -> int:
-        """
-        Return the total number of bytes that have been allocated from this
-        memory pool.
-        """
-    def max_memory(self) -> int | None:
-        """
-        Return the peak memory allocation in this memory pool.
-        This can be an approximate number in multi-threaded applications.
-
-        None is returned if the pool implementation doesn't know how to
-        compute this number.
-        """
-    def num_allocations(self) -> int:
-        """
-        Return the number of allocations or reallocations that were made
-        using this memory pool.
-        """
-    def print_stats(self) -> None:
-        """
-        Print statistics about this memory pool.
-
-        The output format is implementation-specific. Not all memory pools
-        implement this method.
-        """
+
+
+    def release_unused(self) -> None: ...
+
+    def bytes_allocated(self) -> int: ...
+
+    def total_bytes_allocated(self) -> int: ...
+
+    def max_memory(self) -> int | None: ...
+
+    def num_allocations(self) -> int: ...
+
+    def print_stats(self) -> None: ...
+
     @property
-    def backend_name(self) -> str:
-        """
-        The name of the backend used by this MemoryPool (e.g. "jemalloc").
-        """
+    def backend_name(self) -> str: ...
+
 
 class LoggingMemoryPool(MemoryPool): ...
-class ProxyMemoryPool(MemoryPool):
-    """
-    Memory pool implementation that tracks the number of bytes and
-    maximum memory allocated through its direct calls, while redirecting
-    to another memory pool.
-    """
-
-def default_memory_pool() -> MemoryPool:
-    """
-    Return the process-global memory pool.
-
-    Examples
-    --------
-    >>> default_memory_pool()
-    <pyarrow.MemoryPool backend_name=... bytes_allocated=0 max_memory=...>
-    """
-
-def proxy_memory_pool(parent: MemoryPool) -> ProxyMemoryPool:
-    """
-    Create and return a MemoryPool instance that redirects to the
-    *parent*, but with separate allocation statistics.
-
-    Parameters
-    ----------
-    parent : MemoryPool
-        The real memory pool that should be used for allocations.
-    """
-
-def logging_memory_pool(parent: MemoryPool) -> LoggingMemoryPool:
-    """
-    Create and return a MemoryPool instance that redirects to the
-    *parent*, but also dumps allocation logs on stderr.
-
-    Parameters
-    ----------
-    parent : MemoryPool
-        The real memory pool that should be used for allocations.
-    """
-
-def system_memory_pool() -> MemoryPool:
-    """
-    Return a memory pool based on the C malloc heap.
-    """
-
-def jemalloc_memory_pool() -> MemoryPool:
-    """
-    Return a memory pool based on the jemalloc heap.
-
-    NotImplementedError is raised if jemalloc support is not enabled.
-    """
-
-def mimalloc_memory_pool() -> MemoryPool:
-    """
-    Return a memory pool based on the mimalloc heap.
-
-    NotImplementedError is raised if mimalloc support is not enabled.
-    """
-
-def set_memory_pool(pool: MemoryPool) -> None:
-    """
-    Set the default memory pool.
-
-    Parameters
-    ----------
-    pool : MemoryPool
-        The memory pool that should be used by default.
-    """
-
-def log_memory_allocations(enable: bool = True) -> None:
-    """
-    Enable or disable memory allocator logging for debugging purposes
-
-    Parameters
-    ----------
-    enable : bool, default True
-        Pass False to disable logging
-    """
-
-def total_allocated_bytes() -> int:
-    """
-    Return the currently allocated bytes from the default memory pool.
-    Other memory pools may not be accounted for.
-    """
-
-def jemalloc_set_decay_ms(decay_ms: int) -> None:
-    """
-    Set arenas.dirty_decay_ms and arenas.muzzy_decay_ms to indicated number of
-    milliseconds. A value of 0 (the default) results in dirty / muzzy memory
-    pages being released right away to the OS, while a higher value will result
-    in a time-based decay. See the jemalloc docs for more information
-
-    It's best to set this at the start of your application.
-
-    Parameters
-    ----------
-    decay_ms : int
-        Number of milliseconds to set for jemalloc decay conf parameters. Note
-        that this change will only affect future memory arenas
-    """
-
-def supported_memory_backends() -> list[str]:
-    """
-    Return a list of available memory pool backends
-    """
+class ProxyMemoryPool(MemoryPool): ...
+
+
+def default_memory_pool() -> MemoryPool: ...
+
+
+def proxy_memory_pool(parent: MemoryPool) -> ProxyMemoryPool: ...
+
+
+def logging_memory_pool(parent: MemoryPool) -> LoggingMemoryPool: ...
+
+
+def system_memory_pool() -> MemoryPool: ...
+
+
+def jemalloc_memory_pool() -> MemoryPool: ...
+
+
+def mimalloc_memory_pool() -> MemoryPool: ...
+
+
+def set_memory_pool(pool: MemoryPool) -> None: ...
+
+
+def log_memory_allocations(enable: bool = True) -> None: ...
+
+
+def total_allocated_bytes() -> int: ...
+
+
+def jemalloc_set_decay_ms(decay_ms: int) -> None: ...
+
+
+def supported_memory_backends() -> list[str]: ...
+
 
 __all__ = [
     "MemoryPool",
diff --git a/python/pyarrow-stubs/orc.pyi b/python/pyarrow-stubs/orc.pyi
index 557f38a2b9e..5e0289e61f7 100644
--- a/python/pyarrow-stubs/orc.pyi
+++ b/python/pyarrow-stubs/orc.pyi
@@ -29,152 +29,73 @@ from . import _orc
 from ._fs import SupportedFileSystem
 from .lib import KeyValueMetadata, NativeFile, RecordBatch, Schema, Table
 
-class ORCFile:
-    """
-    Reader interface for a single ORC file
 
-    Parameters
-    ----------
-    source : str or pyarrow.NativeFile
-        Readable source. For passing Python file objects or byte buffers,
-        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
-    """
+class ORCFile:
 
     reader: _orc.ORCReader
     def __init__(self, source: StrPath | NativeFile | IO) -> None: ...
     @property
-    def metadata(self) -> KeyValueMetadata:
-        """The file metadata, as an arrow KeyValueMetadata"""
+    def metadata(self) -> KeyValueMetadata: ...
+
     @property
-    def schema(self) -> Schema:
-        """The file schema, as an arrow schema"""
+    def schema(self) -> Schema: ...
+
     @property
-    def nrows(self) -> int:
-        """The number of rows in the file"""
+    def nrows(self) -> int: ...
+
     @property
-    def nstripes(self) -> int:
-        """The number of stripes in the file"""
+    def nstripes(self) -> int: ...
+
     @property
-    def file_version(self) -> str:
-        """Format version of the ORC file, must be 0.11 or 0.12"""
+    def file_version(self) -> str: ...
+
     @property
-    def software_version(self) -> str:
-        """Software instance and version that wrote this file"""
+    def software_version(self) -> str: ...
+
     @property
-    def compression(self) -> Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"]:
-        """Compression codec of the file"""
+    def compression(self) -> Literal["UNCOMPRESSED",
+                                     "ZLIB", "SNAPPY", "LZ4", "ZSTD"]: ...
+
     @property
-    def compression_size(self) -> int:
-        """Number of bytes to buffer for the compression codec in the file"""
+    def compression_size(self) -> int: ...
+
     @property
-    def writer(self) -> str:
-        """Name of the writer that wrote this file.
-        If the writer is unknown then its Writer ID
-        (a number) is returned"""
+    def writer(self) -> str: ...
+
     @property
-    def writer_version(self) -> str:
-        """Version of the writer"""
+    def writer_version(self) -> str: ...
+
     @property
-    def row_index_stride(self) -> int:
-        """Number of rows per an entry in the row index or 0
-        if there is no row index"""
+    def row_index_stride(self) -> int: ...
+
     @property
-    def nstripe_statistics(self) -> int:
-        """Number of stripe statistics"""
+    def nstripe_statistics(self) -> int: ...
+
     @property
-    def content_length(self) -> int:
-        """Length of the data stripes in the file in bytes"""
+    def content_length(self) -> int: ...
+
     @property
-    def stripe_statistics_length(self) -> int:
-        """The number of compressed bytes in the file stripe statistics"""
+    def stripe_statistics_length(self) -> int: ...
+
     @property
-    def file_footer_length(self) -> int:
-        """The number of compressed bytes in the file footer"""
+    def file_footer_length(self) -> int: ...
+
     @property
-    def file_postscript_length(self) -> int:
-        """The number of bytes in the file postscript"""
+    def file_postscript_length(self) -> int: ...
+
     @property
-    def file_length(self) -> int:
-        """The number of bytes in the file"""
-    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch:
-        """Read a single stripe from the file.
-
-        Parameters
-        ----------
-        n : int
-            The stripe index
-        columns : list
-            If not None, only these columns will be read from the stripe. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-            Content of the stripe as a RecordBatch.
-        """
-    def read(self, columns: list[str] | None = None) -> Table:
-        """Read the whole file.
-
-        Parameters
-        ----------
-        columns : list
-            If not None, only these columns will be read from the file. A
-            column name may be a prefix of a nested field, e.g. 'a' will select
-            'a.b', 'a.c', and 'a.d.e'. Output always follows the
-            ordering of the file and not the `columns` list.
-
-        Returns
-        -------
-        pyarrow.Table
-            Content of the file as a Table.
-        """
+    def file_length(self) -> int: ...
+
+    def read_stripe(self, n: int, columns: list[str] | None = None) -> RecordBatch: ...
+
+    def read(self, columns: list[str] | None = None) -> Table: ...
+
 
 class ORCWriter:
-    """
-    Writer interface for a single ORC file
-
-    Parameters
-    ----------
-    where : str or pyarrow.io.NativeFile
-        Writable target. For passing Python file objects or byte buffers,
-        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
-        or pyarrow.io.FixedSizeBufferWriter.
-    file_version : {"0.11", "0.12"}, default "0.12"
-        Determine which ORC file version to use.
-        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
-        is the older version
-        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
-        is the newer one.
-    batch_size : int, default 1024
-        Number of rows the ORC writer writes at a time.
-    stripe_size : int, default 64 * 1024 * 1024
-        Size of each ORC stripe in bytes.
-    compression : string, default 'uncompressed'
-        The compression codec.
-        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
-        Note that LZ0 is currently not supported.
-    compression_block_size : int, default 64 * 1024
-        Size of each compression block in bytes.
-    compression_strategy : string, default 'speed'
-        The compression strategy i.e. speed vs size reduction.
-        Valid values: {'SPEED', 'COMPRESSION'}
-    row_index_stride : int, default 10000
-        The row index stride i.e. the number of rows per
-        an entry in the row index.
-    padding_tolerance : double, default 0.0
-        The padding tolerance.
-    dictionary_key_size_threshold : double, default 0.0
-        The dictionary key size threshold. 0 to disable dictionary encoding.
-        1 to always enable dictionary encoding.
-    bloom_filter_columns : None, set-like or list-like, default None
-        Columns that use the bloom filter.
-    bloom_filter_fpp : double, default 0.05
-        Upper limit of the false-positive rate of the bloom filter.
-    """
 
     writer: _orc.ORCWriter
     is_open: bool
+
     def __init__(
         self,
         where: StrPath | NativeFile | IO,
@@ -182,7 +103,8 @@ class ORCWriter:
         file_version: str = "0.12",
         batch_size: int = 1024,
         stripe_size: int = 64 * 1024 * 1024,
-        compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+        compression: Literal["UNCOMPRESSED", "ZLIB",
+                             "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
         compression_block_size: int = 65536,
         compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
         row_index_stride: int = 10000,
@@ -193,47 +115,17 @@ class ORCWriter:
     ): ...
     def __enter__(self) -> Self: ...
     def __exit__(self, *args, **kwargs) -> None: ...
-    def write(self, table: Table) -> None:
-        """
-        Write the table into an ORC file. The schema of the table must
-        be equal to the schema used when opening the ORC file.
-
-        Parameters
-        ----------
-        table : pyarrow.Table
-            The table to be written into the ORC file
-        """
-    def close(self) -> None:
-        """
-        Close the ORC file
-        """
+    def write(self, table: Table) -> None: ...
+
+    def close(self) -> None: ...
+
 
 def read_table(
     source: StrPath | NativeFile | IO,
     columns: list[str] | None = None,
     filesystem: SupportedFileSystem | None = None,
-) -> Table:
-    """
-    Read a Table from an ORC file.
-
-    Parameters
-    ----------
-    source : str, pyarrow.NativeFile, or file-like object
-        If a string passed, can be a single file name. For file-like objects,
-        only read a single file. Use pyarrow.BufferReader to read a file
-        contained in a bytes or buffer-like object.
-    columns : list
-        If not None, only these columns will be read from the file. A column
-        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
-        'a.c', and 'a.d.e'. Output always follows the ordering of the file and
-        not the `columns` list. If empty, no columns will be read. Note
-        that the table will still have the correct num_rows set despite having
-        no columns.
-    filesystem : FileSystem, default None
-        If nothing passed, will be inferred based on path.
-        Path will try to be found in the local on-disk filesystem otherwise
-        it will be parsed as an URI to determine the filesystem.
-    """
+) -> Table: ...
+
 
 def write_table(
     table: Table,
@@ -242,7 +134,8 @@ def write_table(
     file_version: str = "0.12",
     batch_size: int = 1024,
     stripe_size: int = 64 * 1024 * 1024,
-    compression: Literal["UNCOMPRESSED", "ZLIB", "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
+    compression: Literal["UNCOMPRESSED", "ZLIB",
+                         "SNAPPY", "LZ4", "ZSTD"] = "UNCOMPRESSED",
     compression_block_size: int = 65536,
     compression_strategy: Literal["COMPRESSION", "SPEED"] = "SPEED",
     row_index_stride: int = 10000,
@@ -250,47 +143,4 @@ def write_table(
     dictionary_key_size_threshold: float = 0.0,
     bloom_filter_columns: list[int] | None = None,
     bloom_filter_fpp: float = 0.05,
-) -> None:
-    """
-    Write a table into an ORC file.
-
-    Parameters
-    ----------
-    table : pyarrow.lib.Table
-        The table to be written into the ORC file
-    where : str or pyarrow.io.NativeFile
-        Writable target. For passing Python file objects or byte buffers,
-        see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
-        or pyarrow.io.FixedSizeBufferWriter.
-    file_version : {"0.11", "0.12"}, default "0.12"
-        Determine which ORC file version to use.
-        `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
-        is the older version
-        while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
-        is the newer one.
-    batch_size : int, default 1024
-        Number of rows the ORC writer writes at a time.
-    stripe_size : int, default 64 * 1024 * 1024
-        Size of each ORC stripe in bytes.
-    compression : string, default 'uncompressed'
-        The compression codec.
-        Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
-        Note that LZ0 is currently not supported.
-    compression_block_size : int, default 64 * 1024
-        Size of each compression block in bytes.
-    compression_strategy : string, default 'speed'
-        The compression strategy i.e. speed vs size reduction.
-        Valid values: {'SPEED', 'COMPRESSION'}
-    row_index_stride : int, default 10000
-        The row index stride i.e. the number of rows per
-        an entry in the row index.
-    padding_tolerance : double, default 0.0
-        The padding tolerance.
-    dictionary_key_size_threshold : double, default 0.0
-        The dictionary key size threshold. 0 to disable dictionary encoding.
-        1 to always enable dictionary encoding.
-    bloom_filter_columns : None, set-like or list-like, default None
-        Columns that use the bloom filter.
-    bloom_filter_fpp : double, default 0.05
-        Upper limit of the false-positive rate of the bloom filter.
-    """
+) -> None: ...
diff --git a/python/pyarrow-stubs/pandas_compat.pyi b/python/pyarrow-stubs/pandas_compat.pyi
index 82fcb19ad97..f25d1ad24a6 100644
--- a/python/pyarrow-stubs/pandas_compat.pyi
+++ b/python/pyarrow-stubs/pandas_compat.pyi
@@ -26,12 +26,14 @@ from .lib import Array, DataType, Schema, Table
 
 _T = TypeVar("_T")
 
+
 def get_logical_type_map() -> dict[int, str]: ...
 def get_logical_type(arrow_type: DataType) -> str: ...
 def get_numpy_logical_type_map() -> dict[type[np.generic], str]: ...
 def get_logical_type_from_numpy(pandas_collection) -> str: ...
 def get_extension_dtype_info(column) -> tuple[str, dict[str, Any]]: ...
 
+
 class _ColumnMetadata(TypedDict):
     name: str
     field_name: str
@@ -39,9 +41,12 @@ class _ColumnMetadata(TypedDict):
     numpy_type: str
     metadata: dict | None
 
+
 def get_column_metadata(
     column: pd.Series | pd.Index, name: str, arrow_type: DataType, field_name: str
 ) -> _ColumnMetadata: ...
+
+
 def construct_metadata(
     columns_to_convert: list[pd.Series],
     df: pd.DataFrame,
@@ -52,9 +57,13 @@ def construct_metadata(
     types: list[DataType],
     column_field_names: list[str] = ...,
 ) -> dict[bytes, bytes]: ...
+
+
 def dataframe_to_types(
     df: pd.DataFrame, preserve_index: bool | None, columns: list[str] | None = None
 ) -> tuple[list[str], list[DataType], dict[bytes, bytes]]: ...
+
+
 def dataframe_to_arrays(
     df: pd.DataFrame,
     schema: Schema,
@@ -65,6 +74,8 @@ def dataframe_to_arrays(
 ) -> tuple[Array, Schema, int]: ...
 def get_datetimetz_type(values: _T, dtype, type_) -> tuple[_T, DataType]: ...
 def make_datetimetz(unit: str, tz: str) -> DatetimeTZDtype: ...
+
+
 def table_to_dataframe(
     options, table: Table, categories=None, ignore_metadata: bool = False, types_mapper=None
 ) -> pd.DataFrame: ...
diff --git a/python/pyarrow-stubs/parquet/core.pyi b/python/pyarrow-stubs/parquet/core.pyi
index 67882f3a747..8cb4f152ff7 100644
--- a/python/pyarrow-stubs/parquet/core.pyi
+++ b/python/pyarrow-stubs/parquet/core.pyi
@@ -77,12 +77,19 @@ __all__ = (
     "filters_to_expression",
 )
 
-def filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
+def filters_to_expression(
+    filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
 
 @deprecated("use filters_to_expression")
-def _filters_to_expression(filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+def _filters_to_expression(
+    filters: list[FilterTuple | list[FilterTuple]]) -> Expression: ...
+
+
+_Compression: TypeAlias = Literal["gzip", "bz2",
+                                  "brotli", "lz4", "zstd", "snappy", "none"]
 
-_Compression: TypeAlias = Literal["gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"]
 
 class ParquetFile:
     reader: ParquetReader
@@ -118,6 +125,7 @@ class ParquetFile:
     def close(self, force: bool = False) -> None: ...
     @property
     def closed(self) -> bool: ...
+
     def read_row_group(
         self,
         i: int,
@@ -125,6 +133,7 @@ class ParquetFile:
         use_threads: bool = True,
         use_pandas_metadata: bool = False,
     ) -> Table: ...
+
     def read_row_groups(
         self,
         row_groups: list,
@@ -132,6 +141,7 @@ class ParquetFile:
         use_threads: bool = True,
         use_pandas_metadata: bool = False,
     ) -> Table: ...
+
     def iter_batches(
         self,
         batch_size: int = 65536,
@@ -140,13 +150,16 @@ class ParquetFile:
         use_threads: bool = True,
         use_pandas_metadata: bool = False,
     ) -> Iterator[RecordBatch]: ...
+
     def read(
         self,
         columns: list | None = None,
         use_threads: bool = True,
         use_pandas_metadata: bool = False,
     ) -> Table: ...
-    def scan_contents(self, columns: list | None = None, batch_size: int = 65536) -> int: ...
+    def scan_contents(self, columns: list | None = None,
+                      batch_size: int = 65536) -> int: ...
+
 
 class ParquetWriter:
     flavor: str
@@ -186,14 +199,18 @@ class ParquetWriter:
     ) -> None: ...
     def __enter__(self) -> Self: ...
     def __exit__(self, *args, **kwargs) -> Literal[False]: ...
+
     def write(
         self, table_or_batch: RecordBatch | Table, row_group_size: int | None = None
     ) -> None: ...
-    def write_batch(self, batch: RecordBatch, row_group_size: int | None = None) -> None: ...
+    def write_batch(self, batch: RecordBatch,
+                    row_group_size: int | None = None) -> None: ...
+
     def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...
     def close(self) -> None: ...
     def add_key_value_metadata(self, key_value_metadata: dict[str, str]) -> None: ...
 
+
 class ParquetDataset:
     def __init__(
         self,
@@ -220,6 +237,7 @@ class ParquetDataset:
     def equals(self, other: ParquetDataset) -> bool: ...
     @property
     def schema(self) -> Schema: ...
+
     def read(
         self,
         columns: list[str] | None = None,
@@ -236,6 +254,7 @@ class ParquetDataset:
     @property
     def partitioning(self) -> Partitioning: ...
 
+
 def read_table(
     source: SingleOrList[str] | SingleOrList[Path] | SingleOrList[NativeFile] | SingleOrList[IO],
     *,
@@ -258,10 +277,12 @@ def read_table(
     page_checksum_verification: bool = False,
 ) -> Table: ...
 
+
 def read_pandas(
     source: str | Path | NativeFile | IO, columns: list | None = None, **kwargs
 ) -> Table: ...
 
+
 def write_table(
     table: Table,
     where: str | Path | NativeFile | IO,
@@ -292,6 +313,7 @@ def write_table(
     **kwargs,
 ) -> None: ...
 
+
 def write_to_dataset(
     table: Table,
     root_path: str | Path,
@@ -307,6 +329,7 @@ def write_to_dataset(
     **kwargs,
 ) -> None: ...
 
+
 def write_metadata(
     schema: Schema,
     where: str | NativeFile,
@@ -315,6 +338,7 @@ def write_metadata(
     **kwargs,
 ) -> None: ...
 
+
 def read_metadata(
     where: str | Path | IO | NativeFile,
     memory_map: bool = False,
@@ -322,6 +346,7 @@ def read_metadata(
     filesystem: SupportedFileSystem | None = None,
 ) -> FileMetaData: ...
 
+
 def read_schema(
     where: str | Path | IO | NativeFile,
     memory_map: bool = False,
diff --git a/python/pyarrow-stubs/scalar.pyi b/python/pyarrow-stubs/scalar.pyi
index 0bcd97dd038..4563b97fef7 100644
--- a/python/pyarrow-stubs/scalar.pyi
+++ b/python/pyarrow-stubs/scalar.pyi
@@ -65,567 +65,267 @@ _AsPyTypeV = TypeVar("_AsPyTypeV")
 _DataType_co = TypeVar("_DataType_co", bound=DataType, covariant=True)
 
 class Scalar(_Weakrefable, Generic[_DataType_co]):
-    """
-    The base class for scalars.
-    """
+
     @property
-    def type(self) -> _DataType_co:
-        """
-        Data type of the Scalar object.
-        """
+    def type(self) -> _DataType_co: ...
+
     @property
-    def is_valid(self) -> bool:
-        """
-        Holds a valid (non-null) value.
-        """
+    def is_valid(self) -> bool: ...
+
     def cast(
         self,
         target_type: None | _DataTypeT,
         safe: bool = True,
         options: CastOptions | None = None,
         memory_pool: MemoryPool | None = None,
-    ) -> Self | Scalar[_DataTypeT]:
-        """
-        Cast scalar value to another data type.
-
-        See :func:`pyarrow.compute.cast` for usage.
-
-        Parameters
-        ----------
-        target_type : DataType, default None
-            Type to cast scalar to.
-        safe : boolean, default True
-            Whether to check for conversion errors such as overflow.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-        memory_pool : MemoryPool, optional
-            memory pool to use for allocations during function execution.
-
-        Returns
-        -------
-        scalar : A Scalar of the given target data type.
-        """
-    def validate(self, *, full: bool = False) -> None:
-        """
-        Perform validation checks.  An exception is raised if validation fails.
-
-        By default only cheap validation checks are run.  Pass `full=True`
-        for thorough validation checks (potentially O(n)).
-
-        Parameters
-        ----------
-        full : bool, default False
-            If True, run expensive checks, otherwise cheap checks only.
-
-        Raises
-        ------
-        ArrowInvalid
-        """
-    def equals(self, other: Scalar) -> bool:
-        """
-        Parameters
-        ----------
-        other : pyarrow.Scalar
-
-        Returns
-        -------
-        bool
-        """
-    def __hash__(self) -> int:
-        """
-        Return hash(self).
-        """
-    def as_py(self: Scalar[Any], *, maps_as_pydicts: Literal["lossy", "strict"] | None = None) -> Any:
-        """
-        Return this value as a Python representation.
-
-        Parameters
-        ----------
-        maps_as_pydicts : str, optional, default `None`
-            Valid values are `None`, 'lossy', or 'strict'.
-            The default behavior (`None`), is to convert Arrow Map arrays to
-            Python association lists (list-of-tuples) in the same order as the
-            Arrow Map, as in [(key1, value1), (key2, value2), ...].
-
-            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
-
-            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
-            The last seen value of a duplicate key will be in the Python dictionary.
-            If 'strict', this instead results in an exception being raised when detected.
-        """
+    ) -> Self | Scalar[_DataTypeT]: ...
+
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def equals(self, other: Scalar) -> bool: ...
+
+    def __hash__(self) -> int: ...
+
+    def as_py(self: Scalar[Any], *, maps_as_pydicts: Literal["lossy", "strict"] | None = None) -> Any: ...
+
 
 _NULL: TypeAlias = None
 NA = _NULL
 
-class NullScalar(Scalar[NullType]):
-    """
-    Concrete class for null scalars.
-    """
-class BooleanScalar(Scalar[BoolType]):
-    """
-    Concrete class for boolean scalars.
-    """
-class UInt8Scalar(Scalar[UInt8Type]):
-    """
-    Concrete class for uint8 scalars.
-    """
-class Int8Scalar(Scalar[Int8Type]):
-    """
-    Concrete class for int8 scalars.
-    """
-class UInt16Scalar(Scalar[UInt16Type]):
-    """
-    Concrete class for uint16 scalars.
-    """
-class Int16Scalar(Scalar[Int16Type]):
-    """
-    Concrete class for int16 scalars.
-    """
-class UInt32Scalar(Scalar[Uint32Type]):
-    """
-    Concrete class for uint32 scalars.
-    """
-class Int32Scalar(Scalar[Int32Type]):
-    """
-    Concrete class for int32 scalars.
-    """
-class UInt64Scalar(Scalar[UInt64Type]):
-    """
-    Concrete class for uint64 scalars.
-    """
-class Int64Scalar(Scalar[Int64Type]):
-    """
-    Concrete class for int64 scalars.
-    """
-class HalfFloatScalar(Scalar[Float16Type]):
-    """
-    Concrete class for float scalars.
-    """
-class FloatScalar(Scalar[Float32Type]):
-    """
-    Concrete class for float scalars.
-    """
-class DoubleScalar(Scalar[Float64Type]):
-    """
-    Concrete class for double scalars.
-    """
-class Decimal32Scalar(Scalar[Decimal32Type[_Precision, _Scale]]):
-    """
-    Concrete class for decimal32 scalars.
-    """
-class Decimal64Scalar(Scalar[Decimal64Type[_Precision, _Scale]]):
-    """
-    Concrete class for decimal64 scalars.
-    """
-class Decimal128Scalar(Scalar[Decimal128Type[_Precision, _Scale]]):
-    """
-    Concrete class for decimal128 scalars.
-    """
-class Decimal256Scalar(Scalar[Decimal256Type[_Precision, _Scale]]):
-    """
-    Concrete class for decimal256 scalars.
-    """
-class Date32Scalar(Scalar[Date32Type]):
-    """
-    Concrete class for date32 scalars.
-    """
+class NullScalar(Scalar[NullType]): ...
+
+class BooleanScalar(Scalar[BoolType]): ...
+
+class UInt8Scalar(Scalar[UInt8Type]): ...
+
+class Int8Scalar(Scalar[Int8Type]): ...
+
+class UInt16Scalar(Scalar[UInt16Type]): ...
+
+class Int16Scalar(Scalar[Int16Type]): ...
+
+class UInt32Scalar(Scalar[Uint32Type]): ...
+
+class Int32Scalar(Scalar[Int32Type]): ...
+
+class UInt64Scalar(Scalar[UInt64Type]): ...
+
+class Int64Scalar(Scalar[Int64Type]): ...
+
+class HalfFloatScalar(Scalar[Float16Type]): ...
+
+class FloatScalar(Scalar[Float32Type]): ...
+
+class DoubleScalar(Scalar[Float64Type]): ...
+
+class Decimal32Scalar(Scalar[Decimal32Type[_Precision, _Scale]]): ...
+
+class Decimal64Scalar(Scalar[Decimal64Type[_Precision, _Scale]]): ...
+
+class Decimal128Scalar(Scalar[Decimal128Type[_Precision, _Scale]]): ...
+
+class Decimal256Scalar(Scalar[Decimal256Type[_Precision, _Scale]]): ...
+
+class Date32Scalar(Scalar[Date32Type]): ...
+
 
 class Date64Scalar(Scalar[Date64Type]):
-    """
-    Concrete class for date64 scalars.
-    """
+
     @property
     def value(self) -> dt.date | None: ...
 
 class Time32Scalar(Scalar[Time32Type[_Time32Unit]]):
-    """
-    Concrete class for time32 scalars.
-    """
+
     @property
     def value(self) -> dt.time | None: ...
 
 class Time64Scalar(Scalar[Time64Type[_Time64Unit]]):
-    """
-    Concrete class for time64 scalars.
-    """
+
     @property
     def value(self) -> dt.time | None: ...
 
 class TimestampScalar(Scalar[TimestampType[_Unit, _Tz]]):
-    """
-    Concrete class for timestamp scalars.
-    """
+
     @property
     def value(self) -> int | None: ...
 
 class DurationScalar(Scalar[DurationType[_Unit]]):
-    """
-    Concrete class for duration scalars.
-    """
+
     @property
     def value(self) -> dt.timedelta | None: ...
 
 class MonthDayNanoIntervalScalar(Scalar[MonthDayNanoIntervalType]):
-    """
-    Concrete class for month, day, nanosecond interval scalars.
-    """
+
     @property
-    def value(self) -> MonthDayNano | None:
-        """
-        Same as self.as_py()
-        """
+    def value(self) -> MonthDayNano | None: ...
+
 
 class BinaryScalar(Scalar[BinaryType]):
-    """
-    Concrete class for binary-like scalars.
-    """
-    def as_buffer(self) -> Buffer:
-        """
-        Return a view over this value as a Buffer object.
-        """
+
+    def as_buffer(self) -> Buffer: ...
+
 
 class LargeBinaryScalar(Scalar[LargeBinaryType]):
-    """
-    """
-    def as_buffer(self) -> Buffer:
-        """
-        BinaryScalar.as_buffer(self)
 
-        Return a view over this value as a Buffer object.
-        """
+    def as_buffer(self) -> Buffer: ...
+
 
 class FixedSizeBinaryScalar(Scalar[FixedSizeBinaryType]):
-    """
-    """
-    def as_buffer(self) -> Buffer:
-        """
-        BinaryScalar.as_buffer(self)
 
-        Return a view over this value as a Buffer object.
-        """
+    def as_buffer(self) -> Buffer: ...
+
 
 class StringScalar(Scalar[StringType]):
-    """
-    Concrete class for string-like (utf8) scalars.
-    """
-    def as_buffer(self) -> Buffer:
-        """
-        BinaryScalar.as_buffer(self)
 
-        Return a view over this value as a Buffer object.
-        """
+    def as_buffer(self) -> Buffer: ...
+
 
 class LargeStringScalar(Scalar[LargeStringType]):
-    """
-    """
-    def as_buffer(self) -> Buffer:
-        """
-        BinaryScalar.as_buffer(self)
 
-        Return a view over this value as a Buffer object.
-        """
+    def as_buffer(self) -> Buffer: ...
+
 
 class BinaryViewScalar(Scalar[BinaryViewType]):
-    """
-    """
-    def as_buffer(self) -> Buffer:
-        """
-        BinaryScalar.as_buffer(self)
 
-        Return a view over this value as a Buffer object.
-        """
+    def as_buffer(self) -> Buffer: ...
+
 
 class StringViewScalar(Scalar[StringViewType]):
-    """
-    """
-    def as_buffer(self) -> Buffer:
-        """
-        BinaryScalar.as_buffer(self)
 
-        Return a view over this value as a Buffer object.
-        """
+    def as_buffer(self) -> Buffer: ...
+
 
 class ListScalar(Scalar[ListType[_DataTypeT]]):
-    """
-    Concrete class for list-like scalars.
-    """
+
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int:
-        """
-        Return the number of values.
-        """
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
-        """
-        Return the value at the given index.
-        """
-    def __iter__(self) -> Iterator[Array]:
-        """
-        Iterate over this element's values.
-        """
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
+
+    def __iter__(self) -> Iterator[Array]: ...
+
 
 class FixedSizeListScalar(Scalar[FixedSizeListType[_DataTypeT, _Size]]):
-    """
-    """
+
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int:
-        """
-        ListScalar.__len__(self)
+    def __len__(self) -> int: ...
 
-        Return the number of values.
-        """
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
-        """
-        ListScalar.__getitem__(self, i)
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
 
-        Return the value at the given index.
-        """
-    def __iter__(self) -> Iterator[Array]:
-        """
-        ListScalar.__iter__(self)
+    def __iter__(self) -> Iterator[Array]: ...
 
-        Iterate over this element's values.
-        """
 
 class LargeListScalar(Scalar[LargeListType[_DataTypeT]]):
-    """
-    """
+
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int:
-        """
-        ListScalar.__len__(self)
+    def __len__(self) -> int: ...
 
-        Return the number of values.
-        """
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
-        """
-        ListScalar.__getitem__(self, i)
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
 
-        Return the value at the given index.
-        """
-    def __iter__(self) -> Iterator[Array]:
-        """
-        ListScalar.__iter__(self)
+    def __iter__(self) -> Iterator[Array]: ...
 
-        Iterate over this element's values.
-        """
 
 class ListViewScalar(Scalar[ListViewType[_DataTypeT]]):
-    """
-    """
+
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int:
-        """
-        ListScalar.__len__(self)
+    def __len__(self) -> int: ...
 
-        Return the number of values.
-        """
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
-        """
-        ListScalar.__getitem__(self, i)
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
 
-        Return the value at the given index.
-        """
-    def __iter__(self) -> Iterator[Array]:
-        """
-        ListScalar.__iter__(self)
+    def __iter__(self) -> Iterator[Array]: ...
 
-        Iterate over this element's values.
-        """
 
 class LargeListViewScalar(Scalar[LargeListViewType[_DataTypeT]]):
-    """
-    """
+
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int:
-        """
-        ListScalar.__len__(self)
+    def __len__(self) -> int: ...
 
-        Return the number of values.
-        """
-    def __getitem__(self, i: int) -> Scalar[_DataTypeT]:
-        """
-        ListScalar.__getitem__(self, i)
+    def __getitem__(self, i: int) -> Scalar[_DataTypeT]: ...
 
-        Return the value at the given index.
-        """
-    def __iter__(self) -> Iterator[Array]:
-        """
-        ListScalar.__iter__(self)
+    def __iter__(self) -> Iterator[Array]: ...
 
-        Iterate over this element's values.
-        """
 
 class StructScalar(Scalar[StructType], collections.abc.Mapping[str, Scalar]):
-    """
-    Concrete class for struct scalars.
-    """
-    def __len__(self) -> int:
-        """
-        Return len(self).
-        """
-    def __iter__(self) -> Iterator[str]:
-        """
-        Implement iter(self).
-        """
-    def __getitem__(self, key: int | str) -> Scalar[Any]:
-        """
-        Return the child value for the given field.
-
-        Parameters
-        ----------
-        key : Union[int, str]
-            Index / position or name of the field.
-
-        Returns
-        -------
-        result : Scalar
-        """
+
+    def __len__(self) -> int: ...
+
+    def __iter__(self) -> Iterator[str]: ...
+
+    def __getitem__(self, key: int | str) -> Scalar[Any]: ...
+
     def _as_py_tuple(self) -> list[tuple[str, Any]]: ...
 
 class MapScalar(Scalar[MapType[_K, _ValueT]]):
-    """
-    Concrete class for map scalars.
-    """
+
     @property
     def values(self) -> Array | None: ...
-    def __len__(self) -> int:
-        """
-        ListScalar.__len__(self)
-
-        Return the number of values.
-        """
-    def __getitem__(self, i: int) -> tuple[Scalar[_K], _ValueT, Any]:
-        """
-        Return the value at the given index or key.
-        """
+    def __len__(self) -> int: ...
+
+    def __getitem__(self, i: int) -> tuple[Scalar[_K], _ValueT, Any]: ...
+
     def __iter__(
         self: Scalar[
             MapType[_BasicDataType[_AsPyTypeK], _BasicDataType[_AsPyTypeV]],]
             | Scalar[MapType[Any, _BasicDataType[_AsPyTypeV]]]
             | Scalar[MapType[_BasicDataType[_AsPyTypeK], Any]]
-    ) -> Iterator[tuple[_AsPyTypeK, _AsPyTypeV]] | Iterator[tuple[Any, _AsPyTypeV]] | Iterator[tuple[_AsPyTypeK, Any]]:
-        """
-        Iterate over this element's values.
-        """
+    ) -> Iterator[tuple[_AsPyTypeK, _AsPyTypeV]] | Iterator[tuple[Any, _AsPyTypeV]] | Iterator[tuple[_AsPyTypeK, Any]]: ...
+
 
 class DictionaryScalar(Scalar[DictionaryType[_IndexT, _BasicValueT]]):
-    """
-    Concrete class for dictionary-encoded scalars.
-    """
+
     @property
-    def index(self) -> Scalar[_IndexT]:
-        """
-        Return this value's underlying index as a scalar.
-        """
+    def index(self) -> Scalar[_IndexT]: ...
+
     @property
-    def value(self) -> Scalar[_BasicValueT]:
-        """
-        Return the encoded value as a scalar.
-        """
+    def value(self) -> Scalar[_BasicValueT]: ...
+
     @property
     def dictionary(self) -> Array: ...
 
 class RunEndEncodedScalar(Scalar[RunEndEncodedType[_RunEndType, _BasicValueT]]):
-    """
-    Concrete class for RunEndEncoded scalars.
-    """
+
     @property
-    def value(self) -> tuple[int, _BasicValueT] | None:
-        """
-        Return underlying value as a scalar.
-        """
+    def value(self) -> tuple[int, _BasicValueT] | None: ...
+
 
 class UnionScalar(Scalar[UnionType]):
-    """
-    Concrete class for Union scalars.
-    """
+
     @property
-    def value(self) -> Any | None:
-        """
-        Return underlying value as a scalar.
-        """
+    def value(self) -> Any | None: ...
+
     @property
-    def type_code(self) -> str:
-        """
-        Return the union type code for this scalar.
-        """
+    def type_code(self) -> str: ...
+
 
 class ExtensionScalar(Scalar[ExtensionType]):
-    """
-    Concrete class for Extension scalars.
-    """
+
     @property
-    def value(self) -> Any | None:
-        """
-        Return storage value as a scalar.
-        """
+    def value(self) -> Any | None: ...
+
     @staticmethod
-    def from_storage(typ: BaseExtensionType, value) -> ExtensionScalar:
-        """
-        Construct ExtensionScalar from type and storage value.
-
-        Parameters
-        ----------
-        typ : DataType
-            The extension type for the result scalar.
-        value : object
-            The storage value for the result scalar.
-
-        Returns
-        -------
-        ext_scalar : ExtensionScalar
-        """
-
-class Bool8Scalar(Scalar[Bool8Type]):
-    """
-    Concrete class for bool8 extension scalar.
-    """
-class UuidScalar(Scalar[UuidType]):
-    """
-    Concrete class for Uuid extension scalar.
-    """
-class JsonScalar(Scalar[JsonType]):
-    """
-    Concrete class for JSON extension scalar.
-    """
-class OpaqueScalar(Scalar[OpaqueType]):
-    """
-    Concrete class for opaque extension scalar.
-    """
+    def from_storage(typ: BaseExtensionType, value) -> ExtensionScalar: ...
+
+
+class Bool8Scalar(Scalar[Bool8Type]): ...
+
+class UuidScalar(Scalar[UuidType]): ...
+
+class JsonScalar(Scalar[JsonType]): ...
+
+class OpaqueScalar(Scalar[OpaqueType]): ...
+
 
 class FixedShapeTensorScalar(ExtensionScalar):
-    """
-    Concrete class for fixed shape tensor extension scalar.
-    """
-    def to_numpy(self) -> np.ndarray:
-        """
-        Convert fixed shape tensor scalar to a numpy.ndarray.
-
-        The resulting ndarray's shape matches the permuted shape of the
-        fixed shape tensor scalar.
-        The conversion is zero-copy.
-
-        Returns
-        -------
-        numpy.ndarray
-        """
-    def to_tensor(self) -> Tensor:
-        """
-        Convert fixed shape tensor extension scalar to a pyarrow.Tensor, using shape
-        and strides derived from corresponding FixedShapeTensorType.
-
-        The conversion is zero-copy.
-
-        Returns
-        -------
-        pyarrow.Tensor
-            Tensor represented stored in FixedShapeTensorScalar.
-        """
+
+    def to_numpy(self) -> np.ndarray: ...
+
+    def to_tensor(self) -> Tensor: ...
+
 
 def scalar(
     value: Any,
@@ -633,45 +333,8 @@ def scalar(
     *,
     from_pandas: bool | None = None,
     memory_pool: MemoryPool | None = None,
-) -> Scalar[_DataTypeT]:
-    """
-    Create a pyarrow.Scalar instance from a Python object.
-
-    Parameters
-    ----------
-    value : Any
-        Python object coercible to arrow's type system.
-    type : pyarrow.DataType
-        Explicit type to attempt to coerce to, otherwise will be inferred from
-        the value.
-    from_pandas : bool, default None
-        Use pandas's semantics for inferring nulls from values in
-        ndarray-like data. Defaults to False if not passed explicitly by user,
-        or True if a pandas object is passed in.
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the currently-set default
-        memory pool.
-
-    Returns
-    -------
-    scalar : pyarrow.Scalar
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-
-    >>> pa.scalar(42)
-    <pyarrow.Int64Scalar: 42>
-
-    >>> pa.scalar("string")
-    <pyarrow.StringScalar: 'string'>
-
-    >>> pa.scalar([1, 2])
-    <pyarrow.ListScalar: [1, 2]>
-
-    >>> pa.scalar([1, 2], type=pa.list_(pa.int16()))
-    <pyarrow.ListScalar: [1, 2]>
-    """
+) -> Scalar[_DataTypeT]: ...
+
 
 __all__ = [
     "Scalar",
diff --git a/python/pyarrow-stubs/table.pyi b/python/pyarrow-stubs/table.pyi
index a9b861e2b78..29784d274df 100644
--- a/python/pyarrow-stubs/table.pyi
+++ b/python/pyarrow-stubs/table.pyi
@@ -137,67 +137,15 @@ NarySelector: TypeAlias = list[str] | tuple[str, ...]
 ColumnSelector: TypeAlias = UnarySelector | NullarySelector | NarySelector
 
 class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
-    """
-    An array-like composed from a (possibly empty) collection of pyarrow.Arrays
-
-    Warnings
-    --------
-    Do not call this class's constructor directly.
-
-    Examples
-    --------
-    To construct a ChunkedArray object use :func:`pyarrow.chunked_array`:
-
-    >>> import pyarrow as pa
-    >>> pa.chunked_array([], type=pa.int8())
-    <pyarrow.lib.ChunkedArray object at ...>
-    [
-    ...
-    ]
-
-    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-    <pyarrow.lib.ChunkedArray object at ...>
-    [
-      [
-        2,
-        2,
-        4
-      ],
-      [
-        4,
-        5,
-        100
-      ]
-    ]
-    >>> isinstance(pa.chunked_array([[2, 2, 4], [4, 5, 100]]), pa.ChunkedArray)
-    True
-    """
+
 
     @property
     def data(self) -> Self: ...
     @property
-    def type(self: ChunkedArray[Scalar[_DataTypeT]]) -> _DataTypeT:
-        """
-        Return data type of a ChunkedArray.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.type
-        DataType(int64)
-        """
-    def length(self) -> int:
-        """
-        Return length of a ChunkedArray.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.length()
-        6
-        """
+    def type(self: ChunkedArray[Scalar[_DataTypeT]]) -> _DataTypeT: ...
+
+    def length(self) -> int: ...
+
     __len__ = length
     def to_string(
         self,
@@ -206,666 +154,57 @@ class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         window: int = 5,
         container_window: int = 2,
         skip_new_lines: bool = False,
-    ) -> str:
-        """
-        Render a "pretty-printed" string representation of the ChunkedArray
-
-        Parameters
-        ----------
-        indent : int
-            How much to indent right the content of the array,
-            by default ``0``.
-        window : int
-            How many items to preview within each chunk at the begin and end
-            of the chunk when the chunk is bigger than the window.
-            The other elements will be ellipsed.
-        container_window : int
-            How many chunks to preview at the begin and end
-            of the array when the array is bigger than the window.
-            The other elements will be ellipsed.
-            This setting also applies to list columns.
-        skip_new_lines : bool
-            If the array should be rendered as a single line of text
-            or if each element should be on its own line.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.to_string(skip_new_lines=True)
-        '[[2,2,4],[4,5,100]]'
-        """
+    ) -> str: ...
+
     format = to_string
-    def validate(self, *, full: bool = False) -> None:
-        """
-        Perform validation checks.  An exception is raised if validation fails.
-
-        By default only cheap validation checks are run.  Pass `full=True`
-        for thorough validation checks (potentially O(n)).
-
-        Parameters
-        ----------
-        full : bool, default False
-            If True, run expensive checks, otherwise cheap checks only.
-
-        Raises
-        ------
-        ArrowInvalid
-        """
+    def validate(self, *, full: bool = False) -> None: ...
+
     @property
-    def null_count(self) -> int:
-        """
-        Number of null entries
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.null_count
-        1
-        """
+    def null_count(self) -> int: ...
+
     @property
-    def nbytes(self) -> int:
-        """
-        Total number of bytes consumed by the elements of the chunked array.
-
-        In other words, the sum of bytes from all buffer ranges referenced.
-
-        Unlike `get_total_buffer_size` this method will account for array
-        offsets.
-
-        If buffers are shared between arrays then the shared
-        portion will only be counted multiple times.
-
-        The dictionary of dictionary arrays will always be counted in their
-        entirety even if the array only references a portion of the dictionary.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.nbytes
-        49
-        """
-    def get_total_buffer_size(self) -> int:
-        """
-        The sum of bytes in each buffer referenced by the chunked array.
-
-        An array may only reference a portion of a buffer.
-        This method will overestimate in this case and return the
-        byte size of the entire buffer.
-
-        If a buffer is referenced multiple times then it will
-        only be counted once.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.get_total_buffer_size()
-        49
-        """
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
     def __sizeof__(self) -> int: ...
-    def __getitem__(self, key: int | builtins.slice) -> Self | _Scalar_co:
-        """
-        Slice or return value at given index
-
-        Parameters
-        ----------
-        key : integer or slice
-            Slices with step not equal to 1 (or None) will produce a copy
-            rather than a zero-copy view
-
-        Returns
-        -------
-        value : Scalar (index) or ChunkedArray (slice)
-        """
+    def __getitem__(self, key: int | builtins.slice) -> Self | _Scalar_co: ...
+
     def getitem(self, i: int) -> Scalar: ...
-    def is_null(self, *, nan_is_null: bool = False) -> ChunkedArray[BooleanScalar]:
-        """
-        Return boolean array indicating the null values.
-
-        Parameters
-        ----------
-        nan_is_null : bool (optional, default False)
-            Whether floating-point NaN values should also be considered null.
-
-        Returns
-        -------
-        array : boolean Array or ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.is_null()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            false,
-            false,
-            false,
-            false,
-            true,
-            false
-          ]
-        ]
-        """
-    def is_nan(self) -> ChunkedArray[BooleanScalar]:
-        """
-        Return boolean array indicating the NaN values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> arr = pa.chunked_array([[2, np.nan, 4], [4, None, 100]])
-        >>> arr.is_nan()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            false,
-            true,
-            false,
-            false,
-            null,
-            false
-          ]
-        ]
-        """
-    def is_valid(self) -> ChunkedArray[BooleanScalar]:
-        """
-        Return boolean array indicating the non-null values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.is_valid()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            true,
-            true,
-            true
-          ],
-          [
-            true,
-            false,
-            true
-          ]
-        ]
-        """
-    def fill_null(self, fill_value: Scalar[_DataTypeT]) -> Self:
-        """
-        Replace each null element in values with fill_value.
-
-        See :func:`pyarrow.compute.fill_null` for full usage.
-
-        Parameters
-        ----------
-        fill_value : any
-            The replacement value for null entries.
-
-        Returns
-        -------
-        result : Array or ChunkedArray
-            A new array with nulls replaced by the given value.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> fill_value = pa.scalar(5, type=pa.int8())
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.fill_null(fill_value)
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4,
-            4,
-            5,
-            100
-          ]
-        ]
-        """
-    def equals(self, other: Self) -> bool:
-        """
-        Return whether the contents of two chunked arrays are equal.
-
-        Parameters
-        ----------
-        other : pyarrow.ChunkedArray
-            Chunked array to compare against.
-
-        Returns
-        -------
-        are_equal : bool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> animals = pa.chunked_array(
-        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
-        ... )
-        >>> n_legs.equals(n_legs)
-        True
-        >>> n_legs.equals(animals)
-        False
-        """
-    def to_numpy(self, zero_copy_only: bool = False) -> np.ndarray:
-        """
-        Return a NumPy copy of this array (experimental).
-
-        Parameters
-        ----------
-        zero_copy_only : bool, default False
-            Introduced for signature consistence with pyarrow.Array.to_numpy.
-            This must be False here since NumPy arrays' buffer must be contiguous.
-
-        Returns
-        -------
-        array : numpy.ndarray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.to_numpy()
-        array([  2,   2,   4,   4,   5, 100])
-        """
+    def is_null(self, *, nan_is_null: bool = False) -> ChunkedArray[BooleanScalar]: ...
+
+    def is_nan(self) -> ChunkedArray[BooleanScalar]: ...
+
+    def is_valid(self) -> ChunkedArray[BooleanScalar]: ...
+
+    def fill_null(self, fill_value: Scalar[_DataTypeT]) -> Self: ...
+
+    def equals(self, other: Self) -> bool: ...
+
+    def to_numpy(self, zero_copy_only: bool = False) -> np.ndarray: ...
+
     def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
     def cast(
         self,
         target_type: None | _CastAs = None,
         safe: bool | None = None,
         options: CastOptions | None = None,
-    ) -> Self | ChunkedArray[Scalar[_CastAs]]:
-        """
-        Cast array values to another data type
-
-        See :func:`pyarrow.compute.cast` for usage.
-
-        Parameters
-        ----------
-        target_type : DataType, None
-            Type to cast array to.
-        safe : boolean, default True
-            Whether to check for conversion errors such as overflow.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-
-        Returns
-        -------
-        cast : Array or ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs.type
-        DataType(int64)
-
-        Change the data type of an array:
-
-        >>> n_legs_seconds = n_legs.cast(pa.duration("s"))
-        >>> n_legs_seconds.type
-        DurationType(duration[s])
-        """
-    def dictionary_encode(self, null_encoding: NullEncoding = "mask") -> Self:
-        """
-        Compute dictionary-encoded representation of array.
-
-        See :func:`pyarrow.compute.dictionary_encode` for full usage.
-
-        Parameters
-        ----------
-        null_encoding : str, default "mask"
-            How to handle null entries.
-
-        Returns
-        -------
-        encoded : ChunkedArray
-            A dictionary-encoded version of this array.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> animals = pa.chunked_array(
-        ...     (["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"])
-        ... )
-        >>> animals.dictionary_encode()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog",
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              0,
-              1,
-              2
-            ],
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog",
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              3,
-              4,
-              5
-            ]
-        ]
-        """
-    def flatten(self, memory_pool: MemoryPool | None = None) -> list[ChunkedArray[Any]]:
-        """
-        Flatten this ChunkedArray.  If it has a struct type, the column is
-        flattened into one array per struct field.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        result : list of ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> c_arr = pa.chunked_array(n_legs.value_counts())
-        >>> c_arr
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          -- is_valid: all not null
-          -- child 0 type: int64
-            [
-              2,
-              4,
-              5,
-              100
-            ]
-          -- child 1 type: int64
-            [
-              2,
-              2,
-              1,
-              1
-            ]
-        ]
-        >>> c_arr.flatten()
-        [<pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            4,
-            5,
-            100
-          ]
-        ], <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            1,
-            1
-          ]
-        ]]
-        >>> c_arr.type
-        StructType(struct<values: int64, counts: int64>)
-        >>> n_legs.type
-        DataType(int64)
-        """
-    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Array[_Scalar_co]:
-        """
-        Flatten this ChunkedArray into a single non-chunked array.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        result : Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.combine_chunks()
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          2,
-          2,
-          4,
-          4,
-          5,
-          100
-        ]
-        """
-    def unique(self) -> ChunkedArray[_Scalar_co]:
-        """
-        Compute distinct elements in array
-
-        Returns
-        -------
-        pyarrow.Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.unique()
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          2,
-          4,
-          5,
-          100
-        ]
-        """
-    def value_counts(self) -> StructArray:
-        """
-        Compute counts of unique elements in array.
-
-        Returns
-        -------
-        An array of  <input type "Values", int64_t "Counts"> structs
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.value_counts()
-        <pyarrow.lib.StructArray object at ...>
-        -- is_valid: all not null
-        -- child 0 type: int64
-          [
-            2,
-            4,
-            5,
-            100
-          ]
-        -- child 1 type: int64
-          [
-            2,
-            2,
-            1,
-            1
-          ]
-        """
-    def slice(self, offset: int = 0, length: int | None = None) -> Self:
-        """
-        Compute zero-copy slice of this ChunkedArray
-
-        Parameters
-        ----------
-        offset : int, default 0
-            Offset from start of array to slice
-        length : int, default None
-            Length of slice (default is until end of batch starting from
-            offset)
-
-        Returns
-        -------
-        sliced : ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.slice(2, 2)
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            4
-          ],
-          [
-            4
-          ]
-        ]
-        """
-    def filter(self, mask: Mask, null_selection_behavior: NullSelectionBehavior = "drop") -> Self:
-        """
-        Select values from the chunked array.
-
-        See :func:`pyarrow.compute.filter` for full usage.
-
-        Parameters
-        ----------
-        mask : Array or array-like
-            The boolean mask to filter the chunked array with.
-        null_selection_behavior : str, default "drop"
-            How nulls in the mask should be handled.
-
-        Returns
-        -------
-        filtered : Array or ChunkedArray
-            An array of the same type, with only the elements selected by
-            the boolean mask.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> mask = pa.array([True, False, None, True, False, True])
-        >>> n_legs.filter(mask)
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2
-          ],
-          [
-            4,
-            100
-          ]
-        ]
-        >>> n_legs.filter(mask, null_selection_behavior="emit_null")
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            null
-          ],
-          [
-            4,
-            100
-          ]
-        ]
-        """
+    ) -> Self | ChunkedArray[Scalar[_CastAs]]: ...
+
+    def dictionary_encode(self, null_encoding: NullEncoding = "mask") -> Self: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[ChunkedArray[Any]]: ...
+
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Array[_Scalar_co]: ...
+
+    def unique(self) -> ChunkedArray[_Scalar_co]: ...
+
+    def value_counts(self) -> StructArray: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def filter(self, mask: Mask, null_selection_behavior: NullSelectionBehavior = "drop") -> Self: ...
+
     def index(
         self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
         value: Scalar[_DataTypeT] | _AsPyType,
@@ -873,444 +212,49 @@ class ChunkedArray(_PandasConvertible[pd.Series], Generic[_Scalar_co]):
         end: int | None = None,
         *,
         memory_pool: MemoryPool | None = None,
-    ) -> Int64Scalar:
-        """
-        Find the first index of a value.
-
-        See :func:`pyarrow.compute.index` for full usage.
-
-        Parameters
-        ----------
-        value : Scalar or object
-            The value to look for in the array.
-        start : int, optional
-            The start index where to look for `value`.
-        end : int, optional
-            The end index where to look for `value`.
-        memory_pool : MemoryPool, optional
-            A memory pool for potential memory allocations.
-
-        Returns
-        -------
-        index : Int64Scalar
-            The index of the value in the array (-1 if not found).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.index(4)
-        <pyarrow.Int64Scalar: 2>
-        >>> n_legs.index(4, start=3)
-        <pyarrow.Int64Scalar: 3>
-        """
-    def take(self, indices: Indices) -> Self:
-        """
-        Select values from the chunked array.
-
-        See :func:`pyarrow.compute.take` for full usage.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            The indices in the array whose values will be returned.
-
-        Returns
-        -------
-        taken : Array or ChunkedArray
-            An array with the same datatype, containing the taken values.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            4
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.take([1, 4, 5])
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            5,
-            100
-          ]
-        ]
-        """
-    def drop_null(self) -> Self:
-        """
-        Remove missing values from a chunked array.
-        See :func:`pyarrow.compute.drop_null` for full description.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            null
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.drop_null()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        """
-    def sort(self, order: Order = "ascending", **kwargs) -> Self:
-        """
-        Sort the ChunkedArray
-
-        Parameters
-        ----------
-        order : str, default "ascending"
-            Which order to sort values in.
-            Accepted values are "ascending", "descending".
-        **kwargs : dict, optional
-            Additional sorting options.
-            As allowed by :class:`SortOptions`
-
-        Returns
-        -------
-        result : ChunkedArray
-        """
-    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
-        """
-        Unify dictionaries across all chunks.
-
-        This method returns an equivalent chunked array, but where all
-        chunks share the same dictionary values.  Dictionary indices are
-        transposed accordingly.
-
-        If there are no dictionaries in the chunked array, it is returned
-        unchanged.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        result : ChunkedArray
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
-        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
-        >>> c_arr = pa.chunked_array([arr_1, arr_2])
-        >>> c_arr
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog"
-            ]
-          -- indices:
-            [
-              0,
-              1,
-              2
-            ],
-        ...
-          -- dictionary:
-            [
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              0,
-              1,
-              2
-            ]
-        ]
-        >>> c_arr.unify_dictionaries()
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog",
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              0,
-              1,
-              2
-            ],
-        ...
-          -- dictionary:
-            [
-              "Flamingo",
-              "Parrot",
-              "Dog",
-              "Horse",
-              "Brittle stars",
-              "Centipede"
-            ]
-          -- indices:
-            [
-              3,
-              4,
-              5
-            ]
-        ]
-        """
+    ) -> Int64Scalar: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
+    def drop_null(self) -> Self: ...
+
+    def sort(self, order: Order = "ascending", **kwargs) -> Self: ...
+
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
     @property
-    def num_chunks(self) -> int:
-        """
-        Number of underlying chunks.
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
-        >>> n_legs.num_chunks
-        2
-        """
-    def chunk(self, i: int) -> ChunkedArray[_Scalar_co]:
-        """
-        Select a chunk by its index.
-
-        Parameters
-        ----------
-        i : int
-
-        Returns
-        -------
-        pyarrow.Array
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
-        >>> n_legs.chunk(1)
-        <pyarrow.lib.Int64Array object at ...>
-        [
-          4,
-          5,
-          100
-        ]
-        """
+    def num_chunks(self) -> int: ...
+
+    def chunk(self, i: int) -> ChunkedArray[_Scalar_co]: ...
+
     @property
-    def chunks(self) -> list[Array[_Scalar_co]]:
-        """
-        Convert to a list of single-chunked arrays.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, None], [4, 5, 100]])
-        >>> n_legs
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            2,
-            null
-          ],
-          [
-            4,
-            5,
-            100
-          ]
-        ]
-        >>> n_legs.chunks
-        [<pyarrow.lib.Int64Array object at ...>
-        [
-          2,
-          2,
-          null
-        ], <pyarrow.lib.Int64Array object at ...>
-        [
-          4,
-          5,
-          100
-        ]]
-        """
+    def chunks(self) -> list[Array[_Scalar_co]]: ...
+
     def iterchunks(
         self: ArrayOrChunkedArray[_ScalarT],
-    ) -> Generator[Array, None, None]:
-        """
-        Convert to an iterator of ChunkArrays.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> for i in n_legs.iterchunks():
-        ...     print(i.null_count)
-        0
-        1
-
-        """
+    ) -> Generator[Array, None, None]: ...
+
     def __iter__(self) -> Iterator[_Scalar_co]: ...
     def to_pylist(
         self: ChunkedArray[Scalar[_BasicDataType[_AsPyType]]],
         *,
         maps_as_pydicts: Literal["lossy", "strict"] | None = None,
-    ) -> list[_AsPyType | None]:
-        """
-        Convert to a list of native Python objects.
-
-        Parameters
-        ----------
-        maps_as_pydicts : str, optional, default `None`
-            Valid values are `None`, 'lossy', or 'strict'.
-            The default behavior (`None`), is to convert Arrow Map arrays to
-            Python association lists (list-of-tuples) in the same order as the
-            Arrow Map, as in [(key1, value1), (key2, value2), ...].
-
-            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
-
-            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
-            The last seen value of a duplicate key will be in the Python dictionary.
-            If 'strict', this instead results in an exception being raised when detected.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, None, 100]])
-        >>> n_legs.to_pylist()
-        [2, 2, 4, 4, None, 100]
-        """
-    def __arrow_c_stream__(self, requested_schema=None) -> Any:
-        """
-        Export to a C ArrowArrayStream PyCapsule.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule, default None
-            The schema to which the stream should be casted, passed as a
-            PyCapsule containing a C ArrowSchema representation of the
-            requested schema.
-
-        Returns
-        -------
-        PyCapsule
-            A capsule containing a C ArrowArrayStream struct.
-        """
+    ) -> list[_AsPyType | None]: ...
+
+    def __arrow_c_stream__(self, requested_schema=None) -> Any: ...
+
     @classmethod
-    def _import_from_c_capsule(cls, stream) -> Self:
-        """
-        Import ChunkedArray from a C ArrowArrayStream PyCapsule.
-
-        Parameters
-        ----------
-        stream: PyCapsule
-            A capsule containing a C ArrowArrayStream PyCapsule.
-
-        Returns
-        -------
-        ChunkedArray
-        """
+    def _import_from_c_capsule(cls, stream) -> Self: ...
+
     @property
-    def is_cpu(self) -> bool:
-        """
-        Whether all chunks in the ChunkedArray are CPU-accessible.
-        """
+    def is_cpu(self) -> bool: ...
+
 
 def chunked_array(
     arrays: Iterable[NullableCollection[Any]] | Iterable[Iterable[Any] | SupportArrowStream | SupportArrowArray] | Iterable[Array[_ScalarT]],
     type: DataType | str | None = None,
-) -> ChunkedArray[Scalar[Any]] | ChunkedArray[_ScalarT]:
-    """
-    Construct chunked array from list of array-like objects
-
-    Parameters
-    ----------
-    arrays : Array, list of Array, or array-like
-        Must all be the same data type. Can be empty only if type also passed.
-        Any Arrow-compatible array that implements the Arrow PyCapsule Protocol
-        (has an ``__arrow_c_array__`` or ``__arrow_c_stream__`` method) can be
-        passed as well.
-    type : DataType or string coercible to DataType
-
-    Returns
-    -------
-    ChunkedArray
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> pa.chunked_array([], type=pa.int8())
-    <pyarrow.lib.ChunkedArray object at ...>
-    [
-    ...
-    ]
-
-    >>> pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-    <pyarrow.lib.ChunkedArray object at ...>
-    [
-      [
-        2,
-        2,
-        4
-      ],
-      [
-        4,
-        5,
-        100
-      ]
-    ]
-    """
+) -> ChunkedArray[Scalar[Any]] | ChunkedArray[_ScalarT]: ...
+
 
 _ColumnT = TypeVar("_ColumnT", bound=ArrayOrChunkedArray[Any])
 
@@ -1318,1408 +262,129 @@ class _Tabular(_PandasConvertible[pd.DataFrame], Generic[_ColumnT]):
     def __array__(self, dtype: np.dtype | None = None, copy: bool | None = None) -> np.ndarray: ...
     def __dataframe__(
         self, nan_as_null: bool = False, allow_copy: bool = True
-    ) -> _PyArrowDataFrame:
-        """
-        Return the dataframe interchange object implementing the interchange protocol.
-
-        Parameters
-        ----------
-        nan_as_null : bool, default False
-            Whether to tell the DataFrame to overwrite null values in the data
-            with ``NaN`` (or ``NaT``).
-        allow_copy : bool, default True
-            Whether to allow memory copying when exporting. If set to False
-            it would cause non-zero-copy exports to fail.
-
-        Returns
-        -------
-        DataFrame interchange object
-            The object which consuming library can use to ingress the dataframe.
-
-        Notes
-        -----
-        Details on the interchange protocol:
-        https://data-apis.org/dataframe-protocol/latest/index.html
-        `nan_as_null` currently has no effect; once support for nullable extension
-        dtypes is added, this value should be propagated to columns.
-        """
-    def __getitem__(self, key: int | str | slice) -> _ColumnT | Self:
-        """
-        Slice or return column at given index or column name
-
-        Parameters
-        ----------
-        key : integer, str, or slice
-            Slices with step not equal to 1 (or None) will produce a copy
-            rather than a zero-copy view
-
-        Returns
-        -------
-        Array (from RecordBatch) or ChunkedArray (from Table) for column input.
-        RecordBatch or Table for slice input.
-        """
+    ) -> _PyArrowDataFrame: ...
+
+    def __getitem__(self, key: int | str | slice) -> _ColumnT | Self: ...
+
     def __len__(self) -> int: ...
-    def column(self, i: int | str) -> _ColumnT:
-        """
-        Select single column from Table or RecordBatch.
-
-        Parameters
-        ----------
-        i : int or string
-            The index or name of the column to retrieve.
-
-        Returns
-        -------
-        column : Array (for RecordBatch) or ChunkedArray (for Table)
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Select a column by numeric index:
-
-        >>> table.column(0)
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            2,
-            4,
-            5,
-            100
-          ]
-        ]
-
-        Select a column by its name:
-
-        >>> table.column("animals")
-        <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            "Flamingo",
-            "Horse",
-            "Brittle stars",
-            "Centipede"
-          ]
-        ]
-        """
+    def column(self, i: int | str) -> _ColumnT: ...
+
     @property
-    def column_names(self) -> list[str]:
-        """
-        Names of the Table or RecordBatch columns.
-
-        Returns
-        -------
-        list of str
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> table = pa.Table.from_arrays(
-        ...     [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
-        ...     names=["n_legs", "animals"],
-        ... )
-        >>> table.column_names
-        ['n_legs', 'animals']
-        """
+    def column_names(self) -> list[str]: ...
+
     @property
-    def columns(self) -> list[_ColumnT]:
-        """
-        List of all columns in numerical order.
-
-        Returns
-        -------
-        columns : list of Array (for RecordBatch) or list of ChunkedArray (for Table)
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.columns
-        [<pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            null,
-            4,
-            5,
-            null
-          ]
-        ], <pyarrow.lib.ChunkedArray object at ...>
-        [
-          [
-            "Flamingo",
-            "Horse",
-            null,
-            "Centipede"
-          ]
-        ]]
-        """
-    def drop_null(self) -> Self:
-        """
-        Remove rows that contain missing values from a Table or RecordBatch.
-
-        See :func:`pyarrow.compute.drop_null` for full usage.
-
-        Returns
-        -------
-        Table or RecordBatch
-            A tabular object with the same schema, with rows containing
-            no missing values.
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [None, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", None, "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.drop_null()
-        pyarrow.Table
-        year: double
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2022,2021]]
-        n_legs: [[4,100]]
-        animals: [["Horse","Centipede"]]
-        """
-    def field(self, i: int | str) -> Field:
-        """
-        Select a schema field by its column name or numeric index.
-
-        Parameters
-        ----------
-        i : int or string
-            The index or name of the field to retrieve.
-
-        Returns
-        -------
-        Field
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.field(0)
-        pyarrow.Field<n_legs: int64>
-        >>> table.field(1)
-        pyarrow.Field<animals: string>
-        """
+    def columns(self) -> list[_ColumnT]: ...
+
+    def drop_null(self) -> Self: ...
+
+    def field(self, i: int | str) -> Field: ...
+
     @classmethod
     def from_pydict(
         cls,
         mapping: Mapping[str, ArrayOrChunkedArray[Any] | list[Any] | np.ndarray],
         schema: Schema | None = None,
         metadata: Mapping[str | bytes, str | bytes] | None = None,
-    ) -> Self:
-        """
-        Construct a Table or RecordBatch from Arrow arrays or columns.
-
-        Parameters
-        ----------
-        mapping : dict or Mapping
-            A mapping of strings to Arrays or Python lists.
-        schema : Schema, default None
-            If not passed, will be inferred from the Mapping values.
-        metadata : dict or Mapping, default None
-            Optional metadata for the schema (if inferred).
-
-        Returns
-        -------
-        Table or RecordBatch
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 4, 5, 100])
-        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-        >>> pydict = {"n_legs": n_legs, "animals": animals}
-
-        Construct a Table from a dictionary of arrays:
-
-        >>> pa.Table.from_pydict(pydict)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        >>> pa.Table.from_pydict(pydict).schema
-        n_legs: int64
-        animals: string
-
-        Construct a Table from a dictionary of arrays with metadata:
-
-        >>> my_metadata = {"n_legs": "Number of legs per animal"}
-        >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-
-        Construct a Table from a dictionary of arrays with pyarrow schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> pa.Table.from_pydict(pydict, schema=my_schema).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-        """
+    ) -> Self: ...
+
     @classmethod
     def from_pylist(
         cls,
         mapping: Sequence[Mapping[str, Any]],
         schema: Schema | None = None,
         metadata: Mapping[str | bytes, str | bytes] | None = None,
-    ) -> Self:
-        """
-        Construct a Table or RecordBatch from list of rows / dictionaries.
-
-        Parameters
-        ----------
-        mapping : list of dicts of rows
-            A mapping of strings to row values.
-        schema : Schema, default None
-            If not passed, will be inferred from the first row of the
-            mapping values.
-        metadata : dict or Mapping, default None
-            Optional metadata for the schema (if inferred).
-
-        Returns
-        -------
-        Table or RecordBatch
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
-
-        Construct a Table from a list of rows:
-
-        >>> pa.Table.from_pylist(pylist)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4]]
-        animals: [["Flamingo","Dog"]]
-
-        Construct a Table from a list of rows with metadata:
-
-        >>> my_metadata = {"n_legs": "Number of legs per animal"}
-        >>> pa.Table.from_pylist(pylist, metadata=my_metadata).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-
-        Construct a Table from a list of rows with pyarrow schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-        """
-    def itercolumns(self) -> Generator[_ColumnT, None, None]:
-        """
-        Iterator over all columns in their numerical order.
-
-        Yields
-        ------
-        Array (for RecordBatch) or ChunkedArray (for Table)
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> for i in table.itercolumns():
-        ...     print(i.null_count)
-        2
-        1
-        """
+    ) -> Self: ...
+
+    def itercolumns(self) -> Generator[_ColumnT, None, None]: ...
+
     @property
     def num_columns(self) -> int: ...
     @property
     def num_rows(self) -> int: ...
     @property
-    def shape(self) -> tuple[int, int]:
-        """
-        Dimensions of the table or record batch: (#rows, #columns).
-
-        Returns
-        -------
-        (int, int)
-            Number of rows and number of columns.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table.shape
-        (4, 2)
-        """
+    def shape(self) -> tuple[int, int]: ...
+
     @property
     def schema(self) -> Schema: ...
     @property
     def nbytes(self) -> int: ...
-    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> Self:
-        """
-        Sort the Table or RecordBatch by one or multiple columns.
-
-        Parameters
-        ----------
-        sorting : str or list[tuple(name, order)]
-            Name of the column to use to sort (ascending), or
-            a list of multiple sorting conditions where
-            each entry is a tuple with column name
-            and sorting order ("ascending" or "descending")
-        **kwargs : dict, optional
-            Additional sorting options.
-            As allowed by :class:`SortOptions`
-
-        Returns
-        -------
-        Table or RecordBatch
-            A new tabular object sorted according to the sort keys.
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pandas as pd
-        >>> import pyarrow as pa
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.sort_by("animal")
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        year: [[2019,2021,2021,2020,2022,2022]]
-        n_legs: [[5,100,4,2,4,2]]
-        animal: [["Brittle stars","Centipede","Dog","Flamingo","Horse","Parrot"]]
-        """
-    def take(self, indices: Indices) -> Self:
-        """
-        Select rows from a Table or RecordBatch.
-
-        See :func:`pyarrow.compute.take` for full usage.
-
-        Parameters
-        ----------
-        indices : Array or array-like
-            The indices in the tabular object whose rows will be returned.
-
-        Returns
-        -------
-        Table or RecordBatch
-            A tabular object with the same schema, containing the taken rows.
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.take([1, 3])
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2022,2021]]
-        n_legs: [[4,100]]
-        animals: [["Horse","Centipede"]]
-        """
+    def sort_by(self, sorting: str | list[tuple[str, Order]], **kwargs) -> Self: ...
+
+    def take(self, indices: Indices) -> Self: ...
+
     def filter(
         self, mask: Mask | Expression, null_selection_behavior: NullSelectionBehavior = "drop"
-    ) -> Self:
-        """
-        Select rows from the table or record batch based on a boolean mask.
-
-        The Table can be filtered based on a mask, which will be passed to
-        :func:`pyarrow.compute.filter` to perform the filtering, or it can
-        be filtered through a boolean :class:`.Expression`
-
-        Parameters
-        ----------
-        mask : Array or array-like or .Expression
-            The boolean mask or the :class:`.Expression` to filter the table with.
-        null_selection_behavior : str, default "drop"
-            How nulls in the mask should be handled, does nothing if
-            an :class:`.Expression` is used.
-
-        Returns
-        -------
-        filtered : Table or RecordBatch
-            A tabular object of the same schema, with only the rows selected
-            by applied filtering
-
-        Examples
-        --------
-        Using a Table (works similarly for RecordBatch):
-
-        >>> import pyarrow as pa
-        >>> table = pa.table(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-
-        Define an expression and select rows:
-
-        >>> import pyarrow.compute as pc
-        >>> expr = pc.field("year") <= 2020
-        >>> table.filter(expr)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2020,2019]]
-        n_legs: [[2,5]]
-        animals: [["Flamingo","Brittle stars"]]
-
-        Define a mask and select rows:
-
-        >>> mask = [True, True, False, None]
-        >>> table.filter(mask)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2020,2022]]
-        n_legs: [[2,4]]
-        animals: [["Flamingo","Horse"]]
-        >>> table.filter(mask, null_selection_behavior="emit_null")
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2020,2022,null]]
-        n_legs: [[2,4,null]]
-        animals: [["Flamingo","Horse",null]]
-        """
+    ) -> Self: ...
+
     def to_pydict(
         self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
-    ) -> dict[str, list[Any]]:
-        """
-        Convert the Table or RecordBatch to a dict or OrderedDict.
-
-        Parameters
-        ----------
-        maps_as_pydicts : str, optional, default `None`
-            Valid values are `None`, 'lossy', or 'strict'.
-            The default behavior (`None`), is to convert Arrow Map arrays to
-            Python association lists (list-of-tuples) in the same order as the
-            Arrow Map, as in [(key1, value1), (key2, value2), ...].
-
-            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
-
-            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
-            The last seen value of a duplicate key will be in the Python dictionary.
-            If 'strict', this instead results in an exception being raised when detected.
-
-        Returns
-        -------
-        dict
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> table = pa.Table.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> table.to_pydict()
-        {'n_legs': [2, 2, 4, 4, 5, 100], 'animals': ['Flamingo', 'Parrot', ..., 'Centipede']}
-        """
+    ) -> dict[str, list[Any]]: ...
+
     def to_pylist(
         self, *, maps_as_pydicts: Literal["lossy", "strict"] | None = None
-    ) -> list[dict[str, Any]]:
-        """
-        Convert the Table or RecordBatch to a list of rows / dictionaries.
-
-        Parameters
-        ----------
-        maps_as_pydicts : str, optional, default `None`
-            Valid values are `None`, 'lossy', or 'strict'.
-            The default behavior (`None`), is to convert Arrow Map arrays to
-            Python association lists (list-of-tuples) in the same order as the
-            Arrow Map, as in [(key1, value1), (key2, value2), ...].
-
-            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
-
-            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
-            The last seen value of a duplicate key will be in the Python dictionary.
-            If 'strict', this instead results in an exception being raised when detected.
-
-        Returns
-        -------
-        list
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> data = [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]]
-        >>> table = pa.table(data, names=["n_legs", "animals"])
-        >>> table.to_pylist()
-        [{'n_legs': 2, 'animals': 'Flamingo'}, {'n_legs': 4, 'animals': 'Horse'}, ...
-        """
-    def to_string(self, *, show_metadata: bool = False, preview_cols: int = 0) -> str:
-        """
-        Return human-readable string representation of Table or RecordBatch.
-
-        Parameters
-        ----------
-        show_metadata : bool, default False
-            Display Field-level and Schema-level KeyValueMetadata.
-        preview_cols : int, default 0
-            Display values of the columns for the first N columns.
-
-        Returns
-        -------
-        str
-        """
+    ) -> list[dict[str, Any]]: ...
+
+    def to_string(self, *, show_metadata: bool = False, preview_cols: int = 0) -> str: ...
+
     def remove_column(self, i: int) -> Self: ...
-    def drop_columns(self, columns: str | list[str]) -> Self:
-        """
-        Drop one or more columns and return a new Table or RecordBatch.
-
-        Parameters
-        ----------
-        columns : str or list[str]
-            Field name(s) referencing existing column(s).
-
-        Raises
-        ------
-        KeyError
-            If any of the passed column names do not exist.
-
-        Returns
-        -------
-        Table or RecordBatch
-            A tabular object without the column(s).
-
-        Examples
-        --------
-        Table (works similarly for RecordBatch)
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Drop one column:
-
-        >>> table.drop_columns("animals")
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[2,4,5,100]]
-
-        Drop one or more columns:
-
-        >>> table.drop_columns(["n_legs", "animals"])
-        pyarrow.Table
-        ...
-        ----
-        """
+    def drop_columns(self, columns: str | list[str]) -> Self: ...
+
     def add_column(
         self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
     ) -> Self: ...
     def append_column(
         self, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
-    ) -> Self:
-        """
-        Append column at end of columns.
-
-        Parameters
-        ----------
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array or value coercible to array
-            Column data.
-
-        Returns
-        -------
-        Table or RecordBatch
-            New table or record batch with the passed column added.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Append column at the end:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> table.append_column("year", [year])
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        year: int64
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        year: [[2021,2022,2019,2021]]
-        """
+    ) -> Self: ...
+
 
 class RecordBatch(_Tabular[Array]):
-    """
-    Batch of rows of columns of equal length
-
-    Warnings
-    --------
-    Do not call this class's constructor directly, use one of the
-    ``RecordBatch.from_*`` functions instead.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
-    >>> names = ["n_legs", "animals"]
-
-    Constructing a RecordBatch from arrays:
-
-    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,2,4,4,5,100]
-    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-    >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
-       n_legs        animals
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-
-    Constructing a RecordBatch from pandas DataFrame:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022],
-    ...         "month": [3, 5, 7, 9],
-    ...         "day": [1, 5, 9, 13],
-    ...         "n_legs": [2, 4, 5, 100],
-    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> pa.RecordBatch.from_pandas(df)
-    pyarrow.RecordBatch
-    year: int64
-    month: int64
-    day: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [2020,2022,2021,2022]
-    month: [3,5,7,9]
-    day: [1,5,9,13]
-    n_legs: [2,4,5,100]
-    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-    >>> pa.RecordBatch.from_pandas(df).to_pandas()
-       year  month  day  n_legs        animals
-    0  2020      3    1       2       Flamingo
-    1  2022      5    5       4          Horse
-    2  2021      7    9       5  Brittle stars
-    3  2022      9   13     100      Centipede
-
-    Constructing a RecordBatch from pylist:
-
-    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"n_legs": 4, "animals": "Dog"}]
-    >>> pa.RecordBatch.from_pylist(pylist).to_pandas()
-       n_legs   animals
-    0       2  Flamingo
-    1       4       Dog
-
-    You can also construct a RecordBatch using :func:`pyarrow.record_batch`:
-
-    >>> pa.record_batch([n_legs, animals], names=names).to_pandas()
-       n_legs        animals
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-
-    >>> pa.record_batch(df)
-    pyarrow.RecordBatch
-    year: int64
-    month: int64
-    day: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [2020,2022,2021,2022]
-    month: [3,5,7,9]
-    day: [1,5,9,13]
-    n_legs: [2,4,5,100]
-    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-    """
-
-    def validate(self, *, full: bool = False) -> None:
-        """
-        Perform validation checks.  An exception is raised if validation fails.
-
-        By default only cheap validation checks are run.  Pass `full=True`
-        for thorough validation checks (potentially O(n)).
-
-        Parameters
-        ----------
-        full : bool, default False
-            If True, run expensive checks, otherwise cheap checks only.
-
-        Raises
-        ------
-        ArrowInvalid
-        """
+
+
+    def validate(self, *, full: bool = False) -> None: ...
+
     def replace_schema_metadata(
         self, metadata: dict[str | bytes, str | bytes] | None = None
-    ) -> Self:
-        """
-        Create shallow copy of record batch by replacing schema
-        key-value metadata with the indicated new metadata (which may be None,
-        which deletes any existing metadata
-
-        Parameters
-        ----------
-        metadata : dict, default None
-
-        Returns
-        -------
-        shallow_copy : RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-
-        Constructing a RecordBatch with schema and metadata:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64())], metadata={"n_legs": "Number of legs per animal"}
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs], schema=my_schema)
-        >>> batch.schema
-        n_legs: int64
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-
-        Shallow copy of a RecordBatch with deleted schema metadata:
-
-        >>> batch.replace_schema_metadata().schema
-        n_legs: int64
-        """
+    ) -> Self: ...
+
     @property
-    def num_columns(self) -> int:
-        """
-        Number of columns
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.num_columns
-        2
-        """
+    def num_columns(self) -> int: ...
+
 
     @property
-    def num_rows(self) -> int:
-        """
-        Number of rows
-
-        Due to the definition of a RecordBatch, all columns have the same
-        number of rows.
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.num_rows
-        6
-        """
+    def num_rows(self) -> int: ...
+
     @property
-    def schema(self) -> Schema:
-        """
-        Schema of the RecordBatch and its columns
-
-        Returns
-        -------
-        pyarrow.Schema
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.schema
-        n_legs: int64
-        animals: string
-        """
+    def schema(self) -> Schema: ...
+
     @property
-    def nbytes(self) -> int:
-        """
-        Total number of bytes consumed by the elements of the record batch.
-
-        In other words, the sum of bytes from all buffer ranges referenced.
-
-        Unlike `get_total_buffer_size` this method will account for array
-        offsets.
-
-        If buffers are shared between arrays then the shared
-        portion will only be counted multiple times.
-
-        The dictionary of dictionary arrays will always be counted in their
-        entirety even if the array only references a portion of the dictionary.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.nbytes
-        116
-        """
-    def get_total_buffer_size(self) -> int:
-        """
-        The sum of bytes in each buffer referenced by the record batch
-
-        An array may only reference a portion of a buffer.
-        This method will overestimate in this case and return the
-        byte size of the entire buffer.
-
-        If a buffer is referenced multiple times then it will
-        only be counted once.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.get_total_buffer_size()
-        120
-        """
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
 
     def __sizeof__(self) -> int: ...
     def add_column(
         self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list
-    ) -> Self:
-        """
-        Add column to RecordBatch at position i.
-
-        A new record batch is returned with the column added, the original record batch
-        object is left unchanged.
-
-        Parameters
-        ----------
-        i : int
-            Index to place the column at.
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array or value coercible to array
-            Column data.
-
-        Returns
-        -------
-        RecordBatch
-            New record batch with the passed column added.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-
-        Add column:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> batch.add_column(0, "year", year)
-        pyarrow.RecordBatch
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [2021,2022,2019,2021]
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-
-        Original record batch is left unchanged:
-
-        >>> batch
-        pyarrow.RecordBatch
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-        """
-    def remove_column(self, i: int) -> Self:
-        """
-        Create new RecordBatch with the indicated column removed.
-
-        Parameters
-        ----------
-        i : int
-            Index of column to remove.
-
-        Returns
-        -------
-        Table
-            New record batch without the column.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-        >>> batch.remove_column(1)
-        pyarrow.RecordBatch
-        n_legs: int64
-        ----
-        n_legs: [2,4,5,100]
-        """
-    def set_column(self, i: int, field_: str | Field, column: Array | list) -> Self:
-        """
-        Replace column in RecordBatch at position.
-
-        Parameters
-        ----------
-        i : int
-            Index to place the column at.
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array or value coercible to array
-            Column data.
-
-        Returns
-        -------
-        RecordBatch
-            New record batch with the passed column set.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-
-        Replace a column:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> batch.set_column(1, "year", year)
-        pyarrow.RecordBatch
-        n_legs: int64
-        year: int64
-        ----
-        n_legs: [2,4,5,100]
-        year: [2021,2022,2019,2021]
-        """
-    def rename_columns(self, names: list[str] | dict[str, str]) -> Self:
-        """
-        Create new record batch with columns renamed to provided names.
-
-        Parameters
-        ----------
-        names : list[str] or dict[str, str]
-            List of new column names or mapping of old column names to new column names.
-
-            If a mapping of old to new column names is passed, then all columns which are
-            found to match a provided old column name will be renamed to the new column name.
-            If any column names are not found in the mapping, a KeyError will be raised.
-
-        Raises
-        ------
-        KeyError
-            If any of the column names passed in the names mapping do not exist.
-
-        Returns
-        -------
-        RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-        >>> new_names = ["n", "name"]
-        >>> batch.rename_columns(new_names)
-        pyarrow.RecordBatch
-        n: int64
-        name: string
-        ----
-        n: [2,4,5,100]
-        name: ["Flamingo","Horse","Brittle stars","Centipede"]
-        >>> new_names = {"n_legs": "n", "animals": "name"}
-        >>> batch.rename_columns(new_names)
-        pyarrow.RecordBatch
-        n: int64
-        name: string
-        ----
-        n: [2,4,5,100]
-        name: ["Flamingo","Horse","Brittle stars","Centipede"]
-        """
-    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer:
-        """
-        Write RecordBatch to Buffer as encapsulated IPC message, which does not
-        include a Schema.
-
-        To reconstruct a RecordBatch from the encapsulated IPC message Buffer
-        returned by this function, a Schema must be passed separately. See
-        Examples.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            Uses default memory pool if not specified
-
-        Returns
-        -------
-        serialized : Buffer
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> buf = batch.serialize()
-        >>> buf
-        <pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
-
-        Reconstruct RecordBatch from IPC message Buffer and original Schema
-
-        >>> pa.ipc.read_record_batch(buf, batch.schema)
-        pyarrow.RecordBatch
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [2,2,4,4,5,100]
-        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-        """
-    def slice(self, offset: int = 0, length: int | None = None) -> Self:
-        """
-        Compute zero-copy slice of this RecordBatch
-
-        Parameters
-        ----------
-        offset : int, default 0
-            Offset from start of record batch to slice
-        length : int, default None
-            Length of slice (default is until end of batch starting from
-            offset)
-
-        Returns
-        -------
-        sliced : RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch.to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       2         Parrot
-        2       4            Dog
-        3       4          Horse
-        4       5  Brittle stars
-        5     100      Centipede
-        >>> batch.slice(offset=3).to_pandas()
-           n_legs        animals
-        0       4          Horse
-        1       5  Brittle stars
-        2     100      Centipede
-        >>> batch.slice(length=2).to_pandas()
-           n_legs   animals
-        0       2  Flamingo
-        1       2    Parrot
-        >>> batch.slice(offset=3, length=1).to_pandas()
-           n_legs animals
-        0       4   Horse
-        """
-    def equals(self, other: Self, check_metadata: bool = False) -> bool:
-        """
-        Check if contents of two record batches are equal.
-
-        Parameters
-        ----------
-        other : pyarrow.RecordBatch
-            RecordBatch to compare against.
-        check_metadata : bool, default False
-            Whether schema metadata equality should be checked as well.
-
-        Returns
-        -------
-        are_equal : bool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], names=["n_legs", "animals"])
-        >>> batch_0 = pa.record_batch([])
-        >>> batch_1 = pa.RecordBatch.from_arrays(
-        ...     [n_legs, animals],
-        ...     names=["n_legs", "animals"],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> batch.equals(batch)
-        True
-        >>> batch.equals(batch_0)
-        False
-        >>> batch.equals(batch_1)
-        True
-        >>> batch.equals(batch_1, check_metadata=True)
-        False
-        """
-    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
-        """
-        Select columns of the RecordBatch.
-
-        Returns a new RecordBatch with the specified columns, and metadata
-        preserved.
-
-        Parameters
-        ----------
-        columns : list-like
-            The column names or integer indices to select.
-
-        Returns
-        -------
-        RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> batch = pa.record_batch([n_legs, animals], names=["n_legs", "animals"])
-
-        Select columns my indices:
-
-        >>> batch.select([1])
-        pyarrow.RecordBatch
-        animals: string
-        ----
-        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-
-        Select columns by names:
-
-        >>> batch.select(["n_legs"])
-        pyarrow.RecordBatch
-        n_legs: int64
-        ----
-        n_legs: [2,2,4,4,5,100]
-        """
+    ) -> Self: ...
+
+    def remove_column(self, i: int) -> Self: ...
+
+    def set_column(self, i: int, field_: str | Field, column: Array | list) -> Self: ...
+
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self: ...
+
+    def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def equals(self, other: Self, check_metadata: bool = False) -> bool: ...
+
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self: ...
+
     def cast(
         self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
-    ) -> Self:
-        """
-        Cast record batch values to another schema.
-
-        Parameters
-        ----------
-        target_schema : Schema
-            Schema to cast to, the names and order of fields must match.
-        safe : bool, default True
-            Check for overflows or other unsafe conversions.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-
-        Returns
-        -------
-        RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> batch = pa.RecordBatch.from_pandas(df)
-        >>> batch.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
-
-        Define new schema and cast batch values:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
-        ... )
-        >>> batch.cast(target_schema=my_schema)
-        pyarrow.RecordBatch
-        n_legs: duration[s]
-        animals: string
-        ----
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-        """
+    ) -> Self: ...
+
     @classmethod
     def from_arrays(
         cls,
@@ -2727,72 +392,8 @@ class RecordBatch(_Tabular[Array]):
         names: list[str] | None = None,
         schema: Schema | None = None,
         metadata: Mapping[str | bytes, str | bytes] | None = None,
-    ) -> Self:
-        """
-        Construct a RecordBatch from multiple pyarrow.Arrays
-
-        Parameters
-        ----------
-        arrays : list of pyarrow.Array
-            One for each field in RecordBatch
-        names : list of str, optional
-            Names for the batch fields. If not passed, schema must be passed
-        schema : Schema, default None
-            Schema for the created batch. If not passed, names must be passed
-        metadata : dict or Mapping, default None
-            Optional metadata for the schema (if inferred).
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> names = ["n_legs", "animals"]
-
-        Construct a RecordBatch from pyarrow Arrays using names:
-
-        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
-        pyarrow.RecordBatch
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [2,2,4,4,5,100]
-        animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-        >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       2         Parrot
-        2       4            Dog
-        3       4          Horse
-        4       5  Brittle stars
-        5     100      Centipede
-
-        Construct a RecordBatch from pyarrow Arrays using schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       2         Parrot
-        2       4            Dog
-        3       4          Horse
-        4       5  Brittle stars
-        5     100      Centipede
-        >>> pa.RecordBatch.from_arrays([n_legs, animals], schema=my_schema).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-        """
+    ) -> Self: ...
+
     @classmethod
     def from_pandas(
         cls,
@@ -2801,387 +402,52 @@ class RecordBatch(_Tabular[Array]):
         preserve_index: bool | None = None,
         nthreads: int | None = None,
         columns: list[str] | None = None,
-    ) -> Self:
-        """
-        Convert pandas.DataFrame to an Arrow RecordBatch
-
-        Parameters
-        ----------
-        df : pandas.DataFrame
-        schema : pyarrow.Schema, optional
-            The expected schema of the RecordBatch. This can be used to
-            indicate the type of columns if we cannot infer it automatically.
-            If passed, the output will have exactly this schema. Columns
-            specified in the schema that are not found in the DataFrame columns
-            or its index will raise an error. Additional columns or index
-            levels in the DataFrame which are not specified in the schema will
-            be ignored.
-        preserve_index : bool, optional
-            Whether to store the index as an additional column in the resulting
-            ``RecordBatch``. The default of None will store the index as a
-            column, except for RangeIndex which is stored as metadata only. Use
-            ``preserve_index=True`` to force it to be stored as a column.
-        nthreads : int, default None
-            If greater than 1, convert columns to Arrow in parallel using
-            indicated number of threads. By default, this follows
-            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
-        columns : list, optional
-           List of column to be converted. If None, use all columns.
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-
-
-        Examples
-        --------
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022],
-        ...         "month": [3, 5, 7, 9],
-        ...         "day": [1, 5, 9, 13],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-
-        Convert pandas DataFrame to RecordBatch:
-
-        >>> import pyarrow as pa
-        >>> pa.RecordBatch.from_pandas(df)
-        pyarrow.RecordBatch
-        year: int64
-        month: int64
-        day: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [2020,2022,2021,2022]
-        month: [3,5,7,9]
-        day: [1,5,9,13]
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-
-        Convert pandas DataFrame to RecordBatch using schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> pa.RecordBatch.from_pandas(df, schema=my_schema)
-        pyarrow.RecordBatch
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [2,4,5,100]
-        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-
-        Convert pandas DataFrame to RecordBatch specifying columns:
-
-        >>> pa.RecordBatch.from_pandas(df, columns=["n_legs"])
-        pyarrow.RecordBatch
-        n_legs: int64
-        ----
-        n_legs: [2,4,5,100]
-        """
+    ) -> Self: ...
+
     @classmethod
     def from_struct_array(
         cls, struct_array: StructArray | ChunkedArray[StructScalar]
-    ) -> Self:
-        """
-        Construct a RecordBatch from a StructArray.
-
-        Each field in the StructArray will become a column in the resulting
-        ``RecordBatch``.
-
-        Parameters
-        ----------
-        struct_array : StructArray
-            Array to construct the record batch from.
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
-        >>> pa.RecordBatch.from_struct_array(struct).to_pandas()
-          animals  n_legs    year
-        0  Parrot       2     NaN
-        1    None       4  2022.0
-        """
-    def to_struct_array(self) -> StructArray:
-        """
-        Convert to a struct array.
-        """
+    ) -> Self: ...
+
+    def to_struct_array(self) -> StructArray: ...
+
     def to_tensor(
         self,
         null_to_nan: bool = False,
         row_major: bool = True,
         memory_pool: MemoryPool | None = None,
-    ) -> Tensor:
-        """
-        Convert to a :class:`~pyarrow.Tensor`.
-
-        RecordBatches that can be converted have fields of type signed or unsigned
-        integer or float, including all bit-widths.
-
-        ``null_to_nan`` is ``False`` by default and this method will raise an error in case
-        any nulls are present. RecordBatches with nulls can be converted with ``null_to_nan``
-        set to ``True``. In this case null values are converted to ``NaN`` and integer type
-        arrays are promoted to the appropriate float type.
-
-        Parameters
-        ----------
-        null_to_nan : bool, default False
-            Whether to write null values in the result as ``NaN``.
-        row_major : bool, default True
-            Whether resulting Tensor is row-major or column-major
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> batch = pa.record_batch(
-        ...     [
-        ...         pa.array([1, 2, 3, 4, None], type=pa.int32()),
-        ...         pa.array([10, 20, 30, 40, None], type=pa.float32()),
-        ...     ],
-        ...     names=["a", "b"],
-        ... )
-
-        >>> batch
-        pyarrow.RecordBatch
-        a: int32
-        b: float
-        ----
-        a: [1,2,3,4,null]
-        b: [10,20,30,40,null]
-
-        Convert a RecordBatch to row-major Tensor with null values
-        written as ``NaN``s
-
-        >>> batch.to_tensor(null_to_nan=True)
-        <pyarrow.Tensor>
-        type: double
-        shape: (5, 2)
-        strides: (16, 8)
-        >>> batch.to_tensor(null_to_nan=True).to_numpy()
-        array([[ 1., 10.],
-               [ 2., 20.],
-               [ 3., 30.],
-               [ 4., 40.],
-               [nan, nan]])
-
-        Convert a RecordBatch to column-major Tensor
-
-        >>> batch.to_tensor(null_to_nan=True, row_major=False)
-        <pyarrow.Tensor>
-        type: double
-        shape: (5, 2)
-        strides: (8, 40)
-        >>> batch.to_tensor(null_to_nan=True, row_major=False).to_numpy()
-        array([[ 1., 10.],
-               [ 2., 20.],
-               [ 3., 30.],
-               [ 4., 40.],
-               [nan, nan]])
-        """
-    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0):
-        """
-        Export to a C ArrowArray struct, given its pointer.
-
-        If a C ArrowSchema struct pointer is also given, the record batch
-        schema is exported to it at the same time.
-
-        Parameters
-        ----------
-        out_ptr: int
-            The raw pointer to a C ArrowArray struct.
-        out_schema_ptr: int (optional)
-            The raw pointer to a C ArrowSchema struct.
-
-        Be careful: if you don't pass the ArrowArray struct to a consumer,
-        array memory will leak.  This is a low-level function intended for
-        expert users.
-        """
+    ) -> Tensor: ...
+
+    def _export_to_c(self, out_ptr: int, out_schema_ptr: int = 0): ...
+
     @classmethod
-    def _import_from_c(cls, in_ptr: int, schema: Schema) -> Self:
-        """
-        Import RecordBatch from a C ArrowArray struct, given its pointer
-        and the imported schema.
-
-        Parameters
-        ----------
-        in_ptr: int
-            The raw pointer to a C ArrowArray struct.
-        type: Schema or int
-            Either a Schema object, or the raw pointer to a C ArrowSchema
-            struct.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_array__(self, requested_schema=None):
-        """
-        Get a pair of PyCapsules containing a C ArrowArray representation of the object.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule | None
-            A PyCapsule containing a C ArrowSchema representation of a requested
-            schema. PyArrow will attempt to cast the batch to this schema.
-            If None, the batch will be returned as-is, with a schema matching the
-            one returned by :meth:`__arrow_c_schema__()`.
-
-        Returns
-        -------
-        Tuple[PyCapsule, PyCapsule]
-            A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
-            respectively.
-        """
-    def __arrow_c_stream__(self, requested_schema=None):
-        """
-        Export the batch as an Arrow C stream PyCapsule.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule, default None
-            The schema to which the stream should be casted, passed as a
-            PyCapsule containing a C ArrowSchema representation of the
-            requested schema.
-            Currently, this is not supported and will raise a
-            NotImplementedError if the schema doesn't match the current schema.
-
-        Returns
-        -------
-        PyCapsule
-        """
+    def _import_from_c(cls, in_ptr: int, schema: Schema) -> Self: ...
+
+    def __arrow_c_array__(self, requested_schema=None): ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
     @classmethod
-    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self:
-        """
-        Import RecordBatch from a pair of PyCapsules containing a C ArrowSchema
-        and ArrowArray, respectively.
-
-        Parameters
-        ----------
-        schema_capsule : PyCapsule
-            A PyCapsule containing a C ArrowSchema representation of the schema.
-        array_capsule : PyCapsule
-            A PyCapsule containing a C ArrowArray representation of the array.
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-        """
-    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None:
-        """
-        Export to a C ArrowDeviceArray struct, given its pointer.
-
-        If a C ArrowSchema struct pointer is also given, the record batch
-        schema is exported to it at the same time.
-
-        Parameters
-        ----------
-        out_ptr: int
-            The raw pointer to a C ArrowDeviceArray struct.
-        out_schema_ptr: int (optional)
-            The raw pointer to a C ArrowSchema struct.
-
-        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
-        array memory will leak.  This is a low-level function intended for
-        expert users.
-        """
+    def _import_from_c_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+
+    def _export_to_c_device(self, out_ptr: int, out_schema_ptr: int = 0) -> None: ...
+
     @classmethod
-    def _import_from_c_device(cls, in_ptr: int, schema: Schema) -> Self:
-        """
-        Import RecordBatch from a C ArrowDeviceArray struct, given its pointer
-        and the imported schema.
-
-        Parameters
-        ----------
-        in_ptr: int
-            The raw pointer to a C ArrowDeviceArray struct.
-        type: Schema or int
-            Either a Schema object, or the raw pointer to a C ArrowSchema
-            struct.
-
-        This is a low-level function intended for expert users.
-        """
-    def __arrow_c_device_array__(self, requested_schema=None, **kwargs):
-        """
-        Get a pair of PyCapsules containing a C ArrowDeviceArray representation
-        of the object.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule | None
-            A PyCapsule containing a C ArrowSchema representation of a requested
-            schema. PyArrow will attempt to cast the batch to this data type.
-            If None, the batch will be returned as-is, with a type matching the
-            one returned by :meth:`__arrow_c_schema__()`.
-        kwargs
-            Currently no additional keyword arguments are supported, but
-            this method will accept any keyword with a value of ``None``
-            for compatibility with future keywords.
-
-        Returns
-        -------
-        Tuple[PyCapsule, PyCapsule]
-            A pair of PyCapsules containing a C ArrowSchema and ArrowDeviceArray,
-            respectively.
-        """
+    def _import_from_c_device(cls, in_ptr: int, schema: Schema) -> Self: ...
+
+    def __arrow_c_device_array__(self, requested_schema=None, **kwargs): ...
+
     @classmethod
-    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self:
-        """
-        Import RecordBatch from a pair of PyCapsules containing a
-        C ArrowSchema and ArrowDeviceArray, respectively.
-
-        Parameters
-        ----------
-        schema_capsule : PyCapsule
-            A PyCapsule containing a C ArrowSchema representation of the schema.
-        array_capsule : PyCapsule
-            A PyCapsule containing a C ArrowDeviceArray representation of the array.
-
-        Returns
-        -------
-        pyarrow.RecordBatch
-        """
+    def _import_from_c_device_capsule(cls, schema_capsule, array_capsule) -> Self: ...
+
     @property
-    def device_type(self) -> DeviceAllocationType:
-        """
-        The device type where the arrays in the RecordBatch reside.
-
-        Returns
-        -------
-        DeviceAllocationType
-        """
+    def device_type(self) -> DeviceAllocationType: ...
+
     @property
-    def is_cpu(self) -> bool:
-        """
-        Whether the RecordBatch's arrays are CPU-accessible.
-        """
-    def copy_to(self, destination: MemoryManager | Device) -> Self:
-        """
-        Copy the entire RecordBatch to destination device.
-
-        This copies each column of the record batch to create
-        a new record batch where all underlying buffers for the columns have
-        been copied to the destination MemoryManager.
-
-        Parameters
-        ----------
-        destination : pyarrow.MemoryManager or pyarrow.Device
-            The destination device to copy the array to.
-
-        Returns
-        -------
-        RecordBatch
-        """
+    def is_cpu(self) -> bool: ...
+
+    def copy_to(self, destination: MemoryManager | Device) -> Self: ...
+
 
 def table_to_blocks(options, table: Table, categories, extension_columns): ...
 
@@ -3197,523 +463,30 @@ JoinType: TypeAlias = Literal[
 ]
 
 class Table(_Tabular[ChunkedArray[Any]]):
-    """
-    A collection of top-level named, equal length Arrow arrays.
-
-    Warnings
-    --------
-    Do not call this class's constructor directly, use one of the ``from_*``
-    methods instead.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> n_legs = pa.array([2, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-    >>> names = ["n_legs", "animals"]
-
-    Construct a Table from arrays:
-
-    >>> pa.Table.from_arrays([n_legs, animals], names=names)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from a RecordBatch:
-
-    >>> batch = pa.record_batch([n_legs, animals], names=names)
-    >>> pa.Table.from_batches([batch])
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from pandas DataFrame:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "year": [2020, 2022, 2019, 2021],
-    ...         "n_legs": [2, 4, 5, 100],
-    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> pa.Table.from_pandas(df)
-    pyarrow.Table
-    year: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [[2020,2022,2019,2021]]
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from a dictionary of arrays:
-
-    >>> pydict = {"n_legs": n_legs, "animals": animals}
-    >>> pa.Table.from_pydict(pydict)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    >>> pa.Table.from_pydict(pydict).schema
-    n_legs: int64
-    animals: string
-
-    Construct a Table from a dictionary of arrays with metadata:
-
-    >>> my_metadata = {"n_legs": "Number of legs per animal"}
-    >>> pa.Table.from_pydict(pydict, metadata=my_metadata).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'Number of legs per animal'
-
-    Construct a Table from a list of rows:
-
-    >>> pylist = [{"n_legs": 2, "animals": "Flamingo"}, {"year": 2021, "animals": "Centipede"}]
-    >>> pa.Table.from_pylist(pylist)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,null]]
-    animals: [["Flamingo","Centipede"]]
-
-    Construct a Table from a list of rows with pyarrow schema:
-
-    >>> my_schema = pa.schema(
-    ...     [
-    ...         pa.field("year", pa.int64()),
-    ...         pa.field("n_legs", pa.int64()),
-    ...         pa.field("animals", pa.string()),
-    ...     ],
-    ...     metadata={"year": "Year of entry"},
-    ... )
-    >>> pa.Table.from_pylist(pylist, schema=my_schema).schema
-    year: int64
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    year: 'Year of entry'
-
-    Construct a Table with :func:`pyarrow.table`:
-
-    >>> pa.table([n_legs, animals], names=names)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-    """
-
-    def validate(self, *, full: bool = False) -> None:
-        """
-        Perform validation checks.  An exception is raised if validation fails.
-
-        By default only cheap validation checks are run.  Pass `full=True`
-        for thorough validation checks (potentially O(n)).
-
-        Parameters
-        ----------
-        full : bool, default False
-            If True, run expensive checks, otherwise cheap checks only.
-
-        Raises
-        ------
-        ArrowInvalid
-        """
-    def slice(self, offset: int = 0, length: int | None = None) -> Self:
-        """
-        Compute zero-copy slice of this Table.
-
-        Parameters
-        ----------
-        offset : int, default 0
-            Offset from start of table to slice.
-        length : int, default None
-            Length of slice (default is until end of table starting from
-            offset).
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.slice(length=3)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2020,2022,2019]]
-        n_legs: [[2,4,5]]
-        animals: [["Flamingo","Horse","Brittle stars"]]
-        >>> table.slice(offset=2)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2019,2021]]
-        n_legs: [[5,100]]
-        animals: [["Brittle stars","Centipede"]]
-        >>> table.slice(offset=2, length=1)
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2019]]
-        n_legs: [[5]]
-        animals: [["Brittle stars"]]
-        """
-    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self:
-        """
-        Select columns of the Table.
-
-        Returns a new Table with the specified columns, and metadata
-        preserved.
-
-        Parameters
-        ----------
-        columns : list-like
-            The column names or integer indices to select.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.select([0, 1])
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        ----
-        year: [[2020,2022,2019,2021]]
-        n_legs: [[2,4,5,100]]
-        >>> table.select(["year"])
-        pyarrow.Table
-        year: int64
-        ----
-        year: [[2020,2022,2019,2021]]
-        """
+
+
+    def validate(self, *, full: bool = False) -> None: ...
+
+    def slice(self, offset: int = 0, length: int | None = None) -> Self: ...
+
+    def select(self, columns: Iterable[str] | Iterable[int] | NDArray[np.str_]) -> Self: ...
+
     def replace_schema_metadata(
         self, metadata: dict[str | bytes, str | bytes] | None = None
-    ) -> Self:
-        """
-        Create shallow copy of table by replacing schema
-        key-value metadata with the indicated new metadata (which may be None),
-        which deletes any existing metadata.
-
-        Parameters
-        ----------
-        metadata : dict, default None
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2019, 2021],
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Constructing a Table with pyarrow schema and metadata:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"n_legs": "Number of legs per animal"},
-        ... )
-        >>> table = pa.table(df, my_schema)
-        >>> table.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-        pandas: ...
-
-        Create a shallow copy of a Table with deleted schema metadata:
-
-        >>> table.replace_schema_metadata().schema
-        n_legs: int64
-        animals: string
-
-        Create a shallow copy of a Table with new schema metadata:
-
-        >>> metadata = {"animals": "Which animal"}
-        >>> table.replace_schema_metadata(metadata=metadata).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        animals: 'Which animal'
-        """
-    def flatten(self, memory_pool: MemoryPool | None = None) -> Self:
-        """
-        Flatten this Table.
-
-        Each column with a struct type is flattened
-        into one column per struct field.  Other columns are left unchanged.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
-        >>> month = pa.array([4, 6])
-        >>> table = pa.Table.from_arrays([struct, month], names=["a", "month"])
-        >>> table
-        pyarrow.Table
-        a: struct<animals: string, n_legs: int64, year: int64>
-          child 0, animals: string
-          child 1, n_legs: int64
-          child 2, year: int64
-        month: int64
-        ----
-        a: [
-          -- is_valid: all not null
-          -- child 0 type: string
-        ["Parrot",null]
-          -- child 1 type: int64
-        [2,4]
-          -- child 2 type: int64
-        [null,2022]]
-        month: [[4,6]]
-
-        Flatten the columns with struct field:
-
-        >>> table.flatten()
-        pyarrow.Table
-        a.animals: string
-        a.n_legs: int64
-        a.year: int64
-        month: int64
-        ----
-        a.animals: [["Parrot",null]]
-        a.n_legs: [[2,4]]
-        a.year: [[null,2022]]
-        month: [[4,6]]
-        """
-    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Self:
-        """
-        Make a new table by combining the chunks this table has.
-
-        All the underlying chunks in the ChunkedArray of each column are
-        concatenated into zero or one chunk.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-        >>> animals = pa.chunked_array(
-        ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
-        ... )
-        >>> names = ["n_legs", "animals"]
-        >>> table = pa.table([n_legs, animals], names=names)
-        >>> table
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,2,4],[4,5,100]]
-        animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
-        >>> table.combine_chunks()
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,2,4,4,5,100]]
-        animals: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
-        """
-    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self:
-        """
-        Unify dictionaries across all chunks.
-
-        This method returns an equivalent table, but where all chunks of
-        each column share the same dictionary values.  Dictionary indices
-        are transposed accordingly.
-
-        Columns without dictionaries are returned unchanged.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
-        >>> arr_2 = pa.array(["Horse", "Brittle stars", "Centipede"]).dictionary_encode()
-        >>> c_arr = pa.chunked_array([arr_1, arr_2])
-        >>> table = pa.table([c_arr], names=["animals"])
-        >>> table
-        pyarrow.Table
-        animals: dictionary<values=string, indices=int32, ordered=0>
-        ----
-        animals: [  -- dictionary:
-        ["Flamingo","Parrot","Dog"]  -- indices:
-        [0,1,2],  -- dictionary:
-        ["Horse","Brittle stars","Centipede"]  -- indices:
-        [0,1,2]]
-
-        Unify dictionaries across both chunks:
-
-        >>> table.unify_dictionaries()
-        pyarrow.Table
-        animals: dictionary<values=string, indices=int32, ordered=0>
-        ----
-        animals: [  -- dictionary:
-        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
-        [0,1,2],  -- dictionary:
-        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
-        [3,4,5]]
-        """
-    def equals(self, other: Self, check_metadata: bool = False) -> Self:
-        """
-        Check if contents of two tables are equal.
-
-        Parameters
-        ----------
-        other : pyarrow.Table
-            Table to compare against.
-        check_metadata : bool, default False
-            Whether schema metadata equality should be checked as well.
-
-        Returns
-        -------
-        bool
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-        >>> animals = pa.array(
-        ...     ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]
-        ... )
-        >>> names = ["n_legs", "animals"]
-        >>> table = pa.Table.from_arrays([n_legs, animals], names=names)
-        >>> table_0 = pa.Table.from_arrays([])
-        >>> table_1 = pa.Table.from_arrays(
-        ...     [n_legs, animals], names=names, metadata={"n_legs": "Number of legs per animal"}
-        ... )
-        >>> table.equals(table)
-        True
-        >>> table.equals(table_0)
-        False
-        >>> table.equals(table_1)
-        True
-        >>> table.equals(table_1, check_metadata=True)
-        False
-        """
+    ) -> Self: ...
+
+    def flatten(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def combine_chunks(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def unify_dictionaries(self, memory_pool: MemoryPool | None = None) -> Self: ...
+
+    def equals(self, other: Self, check_metadata: bool = False) -> Self: ...
+
     def cast(
         self, target_schema: Schema, safe: bool | None = None, options: CastOptions | None = None
-    ) -> Self:
-        """
-        Cast table values to another schema.
-
-        Parameters
-        ----------
-        target_schema : Schema
-            Schema to cast to, the names and order of fields must match.
-        safe : bool, default True
-            Check for overflows or other unsafe conversions.
-        options : CastOptions, default None
-            Additional checks pass by CastOptions
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
-
-        Define new schema and cast table values:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.duration("s")), pa.field("animals", pa.string())]
-        ... )
-        >>> table.cast(target_schema=my_schema)
-        pyarrow.Table
-        n_legs: duration[s]
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
+    ) -> Self: ...
+
     @classmethod
     def from_pandas(
         cls,
@@ -3723,70 +496,8 @@ class Table(_Tabular[ChunkedArray[Any]]):
         nthreads: int | None = None,
         columns: list[str] | None = None,
         safe: bool = True,
-    ) -> Self:
-        """
-        Convert pandas.DataFrame to an Arrow Table.
-
-        The column types in the resulting Arrow Table are inferred from the
-        dtypes of the pandas.Series in the DataFrame. In the case of non-object
-        Series, the NumPy dtype is translated to its Arrow equivalent. In the
-        case of `object`, we need to guess the datatype by looking at the
-        Python objects in this Series.
-
-        Be aware that Series of the `object` dtype don't carry enough
-        information to always lead to a meaningful Arrow type. In the case that
-        we cannot infer a type, e.g. because the DataFrame is of length 0 or
-        the Series only contains None/nan objects, the type is set to
-        null. This behavior can be avoided by constructing an explicit schema
-        and passing it to this function.
-
-        Parameters
-        ----------
-        df : pandas.DataFrame
-        schema : pyarrow.Schema, optional
-            The expected schema of the Arrow Table. This can be used to
-            indicate the type of columns if we cannot infer it automatically.
-            If passed, the output will have exactly this schema. Columns
-            specified in the schema that are not found in the DataFrame columns
-            or its index will raise an error. Additional columns or index
-            levels in the DataFrame which are not specified in the schema will
-            be ignored.
-        preserve_index : bool, optional
-            Whether to store the index as an additional column in the resulting
-            ``Table``. The default of None will store the index as a column,
-            except for RangeIndex which is stored as metadata only. Use
-            ``preserve_index=True`` to force it to be stored as a column.
-        nthreads : int, default None
-            If greater than 1, convert columns to Arrow in parallel using
-            indicated number of threads. By default, this follows
-            :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
-        columns : list, optional
-           List of column to be converted. If None, use all columns.
-        safe : bool, default True
-           Check for overflows or other unsafe conversions.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> pa.Table.from_pandas(df)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
+    ) -> Self: ...
+
     @classmethod
     def from_arrays(
         cls,
@@ -3794,630 +505,55 @@ class Table(_Tabular[ChunkedArray[Any]]):
         names: list[str] | None = None,
         schema: Schema | None = None,
         metadata: Mapping[str | bytes, str | bytes] | None = None,
-    ) -> Self:
-        """
-        Construct a Table from Arrow arrays.
-
-        Parameters
-        ----------
-        arrays : list of pyarrow.Array or pyarrow.ChunkedArray
-            Equal-length arrays that should form the table.
-        names : list of str, optional
-            Names for the table columns. If not passed, schema must be passed.
-        schema : Schema, default None
-            Schema for the created table. If not passed, names must be passed.
-        metadata : dict or Mapping, default None
-            Optional metadata for the schema (if inferred).
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 4, 5, 100])
-        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-        >>> names = ["n_legs", "animals"]
-
-        Construct a Table from arrays:
-
-        >>> pa.Table.from_arrays([n_legs, animals], names=names)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-        Construct a Table from arrays with metadata:
-
-        >>> my_metadata = {"n_legs": "Number of legs per animal"}
-        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        >>> pa.Table.from_arrays([n_legs, animals], names=names, metadata=my_metadata).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        n_legs: 'Number of legs per animal'
-
-        Construct a Table from arrays with pyarrow schema:
-
-        >>> my_schema = pa.schema(
-        ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-        ...     metadata={"animals": "Name of the animal species"},
-        ... )
-        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema)
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        >>> pa.Table.from_arrays([n_legs, animals], schema=my_schema).schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        animals: 'Name of the animal species'
-        """
+    ) -> Self: ...
+
     @classmethod
     def from_struct_array(
         cls, struct_array: StructArray | ChunkedArray[StructScalar]
-    ) -> Self:
-        """
-        Construct a Table from a StructArray.
-
-        Each field in the StructArray will become a column in the resulting
-        ``Table``.
-
-        Parameters
-        ----------
-        struct_array : StructArray or ChunkedArray
-            Array to construct the table from.
-
-        Returns
-        -------
-        pyarrow.Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> struct = pa.array([{"n_legs": 2, "animals": "Parrot"}, {"year": 2022, "n_legs": 4}])
-        >>> pa.Table.from_struct_array(struct).to_pandas()
-          animals  n_legs    year
-        0  Parrot       2     NaN
-        1    None       4  2022.0
-        """
+    ) -> Self: ...
+
     def to_struct_array(
         self, max_chunksize: int | None = None
-    ) -> ChunkedArray[StructScalar]:
-        """
-        Convert to a chunked array of struct type.
-
-        Parameters
-        ----------
-        max_chunksize : int, default None
-            Maximum number of rows for ChunkedArray chunks. Individual chunks
-            may be smaller depending on the chunk layout of individual columns.
-
-        Returns
-        -------
-        ChunkedArray
-        """
+    ) -> ChunkedArray[StructScalar]: ...
+
     @classmethod
-    def from_batches(cls, batches: Iterable[RecordBatch], schema: Schema | None = None) -> Self:
-        """
-        Construct a Table from a sequence or iterator of Arrow RecordBatches.
-
-        Parameters
-        ----------
-        batches : sequence or iterator of RecordBatch
-            Sequence of RecordBatch to be converted, all schemas must be equal.
-        schema : Schema, default None
-            If not passed, will be inferred from the first RecordBatch.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> n_legs = pa.array([2, 4, 5, 100])
-        >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-        >>> names = ["n_legs", "animals"]
-        >>> batch = pa.record_batch([n_legs, animals], names=names)
-        >>> batch.to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       4          Horse
-        2       5  Brittle stars
-        3     100      Centipede
-
-        Construct a Table from a RecordBatch:
-
-        >>> pa.Table.from_batches([batch])
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-        Construct a Table from a sequence of RecordBatches:
-
-        >>> pa.Table.from_batches([batch, batch])
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100],[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
-    def to_batches(self, max_chunksize: int | None = None) -> list[RecordBatch]:
-        """
-        Convert Table to a list of RecordBatch objects.
-
-        Note that this method is zero-copy, it merely exposes the same data
-        under a different API.
-
-        Parameters
-        ----------
-        max_chunksize : int, default None
-            Maximum number of rows for each RecordBatch chunk. Individual chunks
-            may be smaller depending on the chunk layout of individual columns.
-
-        Returns
-        -------
-        list[RecordBatch]
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Convert a Table to a RecordBatch:
-
-        >>> table.to_batches()[0].to_pandas()
-           n_legs        animals
-        0       2       Flamingo
-        1       4          Horse
-        2       5  Brittle stars
-        3     100      Centipede
-
-        Convert a Table to a list of RecordBatches:
-
-        >>> table.to_batches(max_chunksize=2)[0].to_pandas()
-           n_legs   animals
-        0       2  Flamingo
-        1       4     Horse
-        >>> table.to_batches(max_chunksize=2)[1].to_pandas()
-           n_legs        animals
-        0       5  Brittle stars
-        1     100      Centipede
-        """
-    def to_reader(self, max_chunksize: int | None = None) -> RecordBatchReader:
-        """
-        Convert the Table to a RecordBatchReader.
-
-        Note that this method is zero-copy, it merely exposes the same data
-        under a different API.
-
-        Parameters
-        ----------
-        max_chunksize : int, default None
-            Maximum number of rows for each RecordBatch chunk. Individual chunks
-            may be smaller depending on the chunk layout of individual columns.
-
-        Returns
-        -------
-        RecordBatchReader
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Convert a Table to a RecordBatchReader:
-
-        >>> table.to_reader()
-        <pyarrow.lib.RecordBatchReader object at ...>
-
-        >>> reader = table.to_reader()
-        >>> reader.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
-        >>> reader.read_all()
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
+    def from_batches(cls, batches: Iterable[RecordBatch], schema: Schema | None = None) -> Self: ...
+
+    def to_batches(self, max_chunksize: int | None = None) -> list[RecordBatch]: ...
+
+    def to_reader(self, max_chunksize: int | None = None) -> RecordBatchReader: ...
+
     @property
-    def schema(self) -> Schema:
-        """
-        Schema of the table and its columns.
-
-        Returns
-        -------
-        Schema
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.schema
-        n_legs: int64
-        animals: string
-        -- schema metadata --
-        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, "' ...
-        """
+    def schema(self) -> Schema: ...
+
     @property
-    def num_columns(self) -> int:
-        """
-        Number of columns in this table.
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.num_columns
-        2
-        """
+    def num_columns(self) -> int: ...
+
     @property
-    def num_rows(self) -> int:
-        """
-        Number of rows in this table.
-
-        Due to the definition of a table, all columns have the same number of
-        rows.
-
-        Returns
-        -------
-        int
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.num_rows
-        4
-        """
+    def num_rows(self) -> int: ...
+
     @property
-    def nbytes(self) -> int:
-        """
-        Total number of bytes consumed by the elements of the table.
-
-        In other words, the sum of bytes from all buffer ranges referenced.
-
-        Unlike `get_total_buffer_size` this method will account for array
-        offsets.
-
-        If buffers are shared between arrays then the shared
-        portion will only be counted multiple times.
-
-        The dictionary of dictionary arrays will always be counted in their
-        entirety even if the array only references a portion of the dictionary.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.nbytes
-        72
-        """
-    def get_total_buffer_size(self) -> int:
-        """
-        The sum of bytes in each buffer referenced by the table.
-
-        An array may only reference a portion of a buffer.
-        This method will overestimate in this case and return the
-        byte size of the entire buffer.
-
-        If a buffer is referenced multiple times then it will
-        only be counted once.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {"n_legs": [None, 4, 5, None], "animals": ["Flamingo", "Horse", None, "Centipede"]}
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.get_total_buffer_size()
-        76
-        """
+    def nbytes(self) -> int: ...
+
+    def get_total_buffer_size(self) -> int: ...
+
     def __sizeof__(self) -> int: ...
     def add_column(
         self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
-    ) -> Self:
-        """
-        Add column to Table at position.
-
-        A new table is returned with the column added, the original table
-        object is left unchanged.
-
-        Parameters
-        ----------
-        i : int
-            Index to place the column at.
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array, list of Array, or values coercible to arrays
-            Column data.
-
-        Returns
-        -------
-        Table
-            New table with the passed column added.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Add column:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> table.add_column(0, "year", [year])
-        pyarrow.Table
-        year: int64
-        n_legs: int64
-        animals: string
-        ----
-        year: [[2021,2022,2019,2021]]
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-        Original table is left unchanged:
-
-        >>> table
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
-    def remove_column(self, i: int) -> Self:
-        """
-        Create new Table with the indicated column removed.
-
-        Parameters
-        ----------
-        i : int
-            Index of column to remove.
-
-        Returns
-        -------
-        Table
-            New table without the column.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.remove_column(1)
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[2,4,5,100]]
-        """
+    ) -> Self: ...
+
+    def remove_column(self, i: int) -> Self: ...
+
     def set_column(
         self, i: int, field_: str | Field, column: ArrayOrChunkedArray[Any] | list[list[Any]]
-    ) -> Self:
-        """
-        Replace column in Table at position.
-
-        Parameters
-        ----------
-        i : int
-            Index to place the column at.
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array, list of Array, or values coercible to arrays
-            Column data.
-
-        Returns
-        -------
-        Table
-            New table with the passed column set.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-
-        Replace a column:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> table.set_column(1, "year", [year])
-        pyarrow.Table
-        n_legs: int64
-        year: int64
-        ----
-        n_legs: [[2,4,5,100]]
-        year: [[2021,2022,2019,2021]]
-        """
-    def rename_columns(self, names: list[str] | dict[str, str]) -> Self:
-        """
-        Create new table with columns renamed to provided names.
-
-        Parameters
-        ----------
-        names : list[str] or dict[str, str]
-            List of new column names or mapping of old column names to new column names.
-
-            If a mapping of old to new column names is passed, then all columns which are
-            found to match a provided old column name will be renamed to the new column name.
-            If any column names are not found in the mapping, a KeyError will be raised.
-
-        Raises
-        ------
-        KeyError
-            If any of the column names passed in the names mapping do not exist.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "n_legs": [2, 4, 5, 100],
-        ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> new_names = ["n", "name"]
-        >>> table.rename_columns(new_names)
-        pyarrow.Table
-        n: int64
-        name: string
-        ----
-        n: [[2,4,5,100]]
-        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        >>> new_names = {"n_legs": "n", "animals": "name"}
-        >>> table.rename_columns(new_names)
-        pyarrow.Table
-        n: int64
-        name: string
-        ----
-        n: [[2,4,5,100]]
-        name: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        """
-    def drop(self, columns: str | list[str]) -> Self:
-        """
-        Drop one or more columns and return a new table.
-
-        Alias of Table.drop_columns, but kept for backwards compatibility.
-
-        Parameters
-        ----------
-        columns : str or list[str]
-            Field name(s) referencing existing column(s).
-
-        Returns
-        -------
-        Table
-            New table without the column(s).
-        """
-    def group_by(self, keys: str | list[str], use_threads: bool = True) -> TableGroupBy:
-        """
-        Declare a grouping over the columns of the table.
-
-        Resulting grouping can then be used to perform aggregations
-        with a subsequent ``aggregate()`` method.
-
-        Parameters
-        ----------
-        keys : str or list[str]
-            Name of the columns that should be used as the grouping key.
-        use_threads : bool, default True
-            Whether to use multithreading or not. When set to True (the
-            default), no stable ordering of the output is guaranteed.
-
-        Returns
-        -------
-        TableGroupBy
-
-        See Also
-        --------
-        TableGroupBy.aggregate
-
-        Examples
-        --------
-        >>> import pandas as pd
-        >>> import pyarrow as pa
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "year": [2020, 2022, 2021, 2022, 2019, 2021],
-        ...         "n_legs": [2, 2, 4, 4, 5, 100],
-        ...         "animal": ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"],
-        ...     }
-        ... )
-        >>> table = pa.Table.from_pandas(df)
-        >>> table.group_by("year").aggregate([("n_legs", "sum")])
-        pyarrow.Table
-        year: int64
-        n_legs_sum: int64
-        ----
-        year: [[2020,2022,2021,2019]]
-        n_legs_sum: [[2,6,104,5]]
-        """
+    ) -> Self: ...
+
+    def rename_columns(self, names: list[str] | dict[str, str]) -> Self: ...
+
+    def drop(self, columns: str | list[str]) -> Self: ...
+
+    def group_by(self, keys: str | list[str], use_threads: bool = True) -> TableGroupBy: ...
+
     def join(
         self,
         right_table: Self,
@@ -4428,110 +564,8 @@ class Table(_Tabular[ChunkedArray[Any]]):
         right_suffix: str | None = None,
         coalesce_keys: bool = True,
         use_threads: bool = True,
-    ) -> Self:
-        """
-        Perform a join between this table and another one.
-
-        Result of the join will be a new Table, where further
-        operations can be applied.
-
-        Parameters
-        ----------
-        right_table : Table
-            The table to join to the current one, acting as the right table
-            in the join operation.
-        keys : str or list[str]
-            The columns from current table that should be used as keys
-            of the join operation left side.
-        right_keys : str or list[str], default None
-            The columns from the right_table that should be used as keys
-            on the join operation right side.
-            When ``None`` use the same key names as the left table.
-        join_type : str, default "left outer"
-            The kind of join that should be performed, one of
-            ("left semi", "right semi", "left anti", "right anti",
-            "inner", "left outer", "right outer", "full outer")
-        left_suffix : str, default None
-            Which suffix to add to left column names. This prevents confusion
-            when the columns in left and right tables have colliding names.
-        right_suffix : str, default None
-            Which suffix to add to the right column names. This prevents confusion
-            when the columns in left and right tables have colliding names.
-        coalesce_keys : bool, default True
-            If the duplicated keys should be omitted from one of the sides
-            in the join result.
-        use_threads : bool, default True
-            Whether to use multithreading or not.
-
-        Returns
-        -------
-        Table
-
-        Examples
-        --------
-        >>> import pandas as pd
-        >>> import pyarrow as pa
-        >>> df1 = pd.DataFrame({"id": [1, 2, 3], "year": [2020, 2022, 2019]})
-        >>> df2 = pd.DataFrame(
-        ...     {"id": [3, 4], "n_legs": [5, 100], "animal": ["Brittle stars", "Centipede"]}
-        ... )
-        >>> t1 = pa.Table.from_pandas(df1)
-        >>> t2 = pa.Table.from_pandas(df2)
-
-        Left outer join:
-
-        >>> t1.join(t2, "id").combine_chunks().sort_by("year")
-        pyarrow.Table
-        id: int64
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        id: [[3,1,2]]
-        year: [[2019,2020,2022]]
-        n_legs: [[5,null,null]]
-        animal: [["Brittle stars",null,null]]
-
-        Full outer join:
-
-        >>> t1.join(t2, "id", join_type="full outer").combine_chunks().sort_by("year")
-        pyarrow.Table
-        id: int64
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        id: [[3,1,2,4]]
-        year: [[2019,2020,2022,null]]
-        n_legs: [[5,null,null,100]]
-        animal: [["Brittle stars",null,null,"Centipede"]]
-
-        Right outer join:
-
-        >>> t1.join(t2, "id", join_type="right outer").combine_chunks().sort_by("year")
-        pyarrow.Table
-        year: int64
-        id: int64
-        n_legs: int64
-        animal: string
-        ----
-        year: [[2019,null]]
-        id: [[3,4]]
-        n_legs: [[5,100]]
-        animal: [["Brittle stars","Centipede"]]
-
-        Right anti join
-
-        >>> t1.join(t2, "id", join_type="right anti")
-        pyarrow.Table
-        id: int64
-        n_legs: int64
-        animal: string
-        ----
-        id: [[4]]
-        n_legs: [[100]]
-        animal: [["Centipede"]]
-        """
+    ) -> Self: ...
+
     def join_asof(
         self,
         right_table: Self,
@@ -4540,109 +574,13 @@ class Table(_Tabular[ChunkedArray[Any]]):
         tolerance: int,
         right_on: str | list[str] | None = None,
         right_by: str | list[str] | None = None,
-    ) -> Self:
-        """
-        Perform an asof join between this table and another one.
-
-        This is similar to a left-join except that we match on nearest key rather
-        than equal keys. Both tables must be sorted by the key. This type of join
-        is most useful for time series data that are not perfectly aligned.
-
-        Optionally match on equivalent keys with "by" before searching with "on".
-
-        Result of the join will be a new Table, where further
-        operations can be applied.
-
-        Parameters
-        ----------
-        right_table : Table
-            The table to join to the current one, acting as the right table
-            in the join operation.
-        on : str
-            The column from current table that should be used as the "on" key
-            of the join operation left side.
-
-            An inexact match is used on the "on" key, i.e. a row is considered a
-            match if and only if left_on - tolerance <= right_on <= left_on.
-
-            The input dataset must be sorted by the "on" key. Must be a single
-            field of a common type.
-
-            Currently, the "on" key must be an integer, date, or timestamp type.
-        by : str or list[str]
-            The columns from current table that should be used as the keys
-            of the join operation left side. The join operation is then done
-            only for the matches in these columns.
-        tolerance : int
-            The tolerance for inexact "on" key matching. A right row is considered
-            a match with the left row ``right.on - left.on <= tolerance``. The
-            ``tolerance`` may be:
-
-            - negative, in which case a past-as-of-join occurs;
-            - or positive, in which case a future-as-of-join occurs;
-            - or zero, in which case an exact-as-of-join occurs.
-
-            The tolerance is interpreted in the same units as the "on" key.
-        right_on : str or list[str], default None
-            The columns from the right_table that should be used as the on key
-            on the join operation right side.
-            When ``None`` use the same key name as the left table.
-        right_by : str or list[str], default None
-            The columns from the right_table that should be used as keys
-            on the join operation right side.
-            When ``None`` use the same key names as the left table.
-
-        Returns
-        -------
-        Table
-
-        Example
-        --------
-        >>> import pyarrow as pa
-        >>> t1 = pa.table({"id": [1, 3, 2, 3, 3], "year": [2020, 2021, 2022, 2022, 2023]})
-        >>> t2 = pa.table(
-        ...     {
-        ...         "id": [3, 4],
-        ...         "year": [2020, 2021],
-        ...         "n_legs": [5, 100],
-        ...         "animal": ["Brittle stars", "Centipede"],
-        ...     }
-        ... )
-
-        >>> t1.join_asof(t2, on="year", by="id", tolerance=-2)
-        pyarrow.Table
-        id: int64
-        year: int64
-        n_legs: int64
-        animal: string
-        ----
-        id: [[1,3,2,3,3]]
-        year: [[2020,2021,2022,2022,2023]]
-        n_legs: [[null,5,null,5,null]]
-        animal: [[null,"Brittle stars",null,"Brittle stars",null]]
-        """
-    def __arrow_c_stream__(self, requested_schema=None):
-        """
-        Export the table as an Arrow C stream PyCapsule.
-
-        Parameters
-        ----------
-        requested_schema : PyCapsule, default None
-            The schema to which the stream should be casted, passed as a
-            PyCapsule containing a C ArrowSchema representation of the
-            requested schema.
-            Currently, this is not supported and will raise a
-            NotImplementedError if the schema doesn't match the current schema.
-
-        Returns
-        -------
-        PyCapsule
-        """
+    ) -> Self: ...
+
+    def __arrow_c_stream__(self, requested_schema=None): ...
+
     @property
-    def is_cpu(self) -> bool:
-        """
-        Whether all ChunkedArrays are CPU-accessible.
-        """
+    def is_cpu(self) -> bool: ...
+
 
 def record_batch(
     data: dict[str, list[Any] | Array[Any]]
@@ -4653,138 +591,8 @@ def record_batch(
     names: list[str] | None = None,
     schema: Schema | None = None,
     metadata: Mapping[str | bytes, str | bytes] | None = None,
-) -> RecordBatch:
-    """
-    Create a pyarrow.RecordBatch from another Python data structure or sequence
-    of arrays.
-
-    Parameters
-    ----------
-    data : dict, list, pandas.DataFrame, Arrow-compatible table
-        A mapping of strings to Arrays or Python lists, a list of Arrays,
-        a pandas DataFame, or any tabular object implementing the
-        Arrow PyCapsule Protocol (has an ``__arrow_c_array__`` or
-        ``__arrow_c_device_array__`` method).
-    names : list, default None
-        Column names if list of arrays passed as data. Mutually exclusive with
-        'schema' argument.
-    schema : Schema, default None
-        The expected schema of the RecordBatch. If not passed, will be inferred
-        from the data. Mutually exclusive with 'names' argument.
-    metadata : dict or Mapping, default None
-        Optional metadata for the schema (if schema not passed).
-
-    Returns
-    -------
-    RecordBatch
-
-    See Also
-    --------
-    RecordBatch.from_arrays, RecordBatch.from_pandas, table
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> n_legs = pa.array([2, 2, 4, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
-    >>> names = ["n_legs", "animals"]
-
-    Construct a RecordBatch from a python dictionary:
-
-    >>> pa.record_batch({"n_legs": n_legs, "animals": animals})
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,2,4,4,5,100]
-    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-    >>> pa.record_batch({"n_legs": n_legs, "animals": animals}).to_pandas()
-       n_legs        animals
-    0       2       Flamingo
-    1       2         Parrot
-    2       4            Dog
-    3       4          Horse
-    4       5  Brittle stars
-    5     100      Centipede
-
-    Creating a RecordBatch from a list of arrays with names:
-
-    >>> pa.record_batch([n_legs, animals], names=names)
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,2,4,4,5,100]
-    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-
-    Creating a RecordBatch from a list of arrays with names and metadata:
-
-    >>> my_metadata = {"n_legs": "How many legs does an animal have?"}
-    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata)
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,2,4,4,5,100]
-    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-    >>> pa.record_batch([n_legs, animals], names=names, metadata=my_metadata).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'How many legs does an animal have?'
-
-    Creating a RecordBatch from a pandas DataFrame:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "year": [2020, 2022, 2021, 2022],
-    ...         "month": [3, 5, 7, 9],
-    ...         "day": [1, 5, 9, 13],
-    ...         "n_legs": [2, 4, 5, 100],
-    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> pa.record_batch(df)
-    pyarrow.RecordBatch
-    year: int64
-    month: int64
-    day: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [2020,2022,2021,2022]
-    month: [3,5,7,9]
-    day: [1,5,9,13]
-    n_legs: [2,4,5,100]
-    animals: ["Flamingo","Horse","Brittle stars","Centipede"]
-
-    >>> pa.record_batch(df).to_pandas()
-       year  month  day  n_legs        animals
-    0  2020      3    1       2       Flamingo
-    1  2022      5    5       4          Horse
-    2  2021      7    9       5  Brittle stars
-    3  2022      9   13     100      Centipede
-
-    Creating a RecordBatch from a pandas DataFrame with schema:
-
-    >>> my_schema = pa.schema(
-    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-    ...     metadata={"n_legs": "Number of legs per animal"},
-    ... )
-    >>> pa.record_batch(df, my_schema).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'Number of legs per animal'
-    pandas: ...
-    >>> pa.record_batch(df, my_schema).to_pandas()
-       n_legs        animals
-    0       2       Flamingo
-    1       4          Horse
-    2       5  Brittle stars
-    3     100      Centipede
-    """
+) -> RecordBatch: ...
+
 
 def table(
     data: dict[str, list[Any] | Array[Any]]
@@ -4797,223 +605,19 @@ def table(
     schema: Schema | None = None,
     metadata: Mapping[str | bytes, str | bytes] | None = None,
     nthreads: int | None = None,
-) -> Table:
-    """
-    Create a pyarrow.Table from a Python data structure or sequence of arrays.
-
-    Parameters
-    ----------
-    data : dict, list, pandas.DataFrame, Arrow-compatible table
-        A mapping of strings to Arrays or Python lists, a list of arrays or
-        chunked arrays, a pandas DataFame, or any tabular object implementing
-        the Arrow PyCapsule Protocol (has an ``__arrow_c_array__``,
-        ``__arrow_c_device_array__`` or ``__arrow_c_stream__`` method).
-    names : list, default None
-        Column names if list of arrays passed as data. Mutually exclusive with
-        'schema' argument.
-    schema : Schema, default None
-        The expected schema of the Arrow Table. If not passed, will be inferred
-        from the data. Mutually exclusive with 'names' argument.
-        If passed, the output will have exactly this schema (raising an error
-        when columns are not found in the data and ignoring additional data not
-        specified in the schema, when data is a dict or DataFrame).
-    metadata : dict or Mapping, default None
-        Optional metadata for the schema (if schema not passed).
-    nthreads : int, default None
-        For pandas.DataFrame inputs: if greater than 1, convert columns to
-        Arrow in parallel using indicated number of threads. By default,
-        this follows :func:`pyarrow.cpu_count` (may use up to system CPU count
-        threads).
-
-    Returns
-    -------
-    Table
-
-    See Also
-    --------
-    Table.from_arrays, Table.from_pandas, Table.from_pydict
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> n_legs = pa.array([2, 4, 5, 100])
-    >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
-    >>> names = ["n_legs", "animals"]
-
-    Construct a Table from a python dictionary:
-
-    >>> pa.table({"n_legs": n_legs, "animals": animals})
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from arrays:
-
-    >>> pa.table([n_legs, animals], names=names)
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from arrays with metadata:
-
-    >>> my_metadata = {"n_legs": "Number of legs per animal"}
-    >>> pa.table([n_legs, animals], names=names, metadata=my_metadata).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'Number of legs per animal'
-
-    Construct a Table from pandas DataFrame:
-
-    >>> import pandas as pd
-    >>> df = pd.DataFrame(
-    ...     {
-    ...         "year": [2020, 2022, 2019, 2021],
-    ...         "n_legs": [2, 4, 5, 100],
-    ...         "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
-    ...     }
-    ... )
-    >>> pa.table(df)
-    pyarrow.Table
-    year: int64
-    n_legs: int64
-    animals: string
-    ----
-    year: [[2020,2022,2019,2021]]
-    n_legs: [[2,4,5,100]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-
-    Construct a Table from pandas DataFrame with pyarrow schema:
-
-    >>> my_schema = pa.schema(
-    ...     [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())],
-    ...     metadata={"n_legs": "Number of legs per animal"},
-    ... )
-    >>> pa.table(df, my_schema).schema
-    n_legs: int64
-    animals: string
-    -- schema metadata --
-    n_legs: 'Number of legs per animal'
-    pandas: '{"index_columns": [], "column_indexes": [{"name": null, ...
-
-    Construct a Table from chunked arrays:
-
-    >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
-    >>> animals = pa.chunked_array(
-    ...     [["Flamingo", "Parrot", "Dog"], ["Horse", "Brittle stars", "Centipede"]]
-    ... )
-    >>> table = pa.table([n_legs, animals], names=names)
-    >>> table
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,2,4],[4,5,100]]
-    animals: [["Flamingo","Parrot","Dog"],["Horse","Brittle stars","Centipede"]]
-    """
+) -> Table: ...
+
 
 def concat_tables(
     tables: Iterable[Table],
     memory_pool: MemoryPool | None = None,
     promote_options: Literal["none", "default", "permissive"] = "none",
     **kwargs: Any,
-) -> Table:
-    """
-    Concatenate pyarrow.Table objects.
-
-    If promote_options="none", a zero-copy concatenation will be performed. The schemas
-    of all the Tables must be the same (except the metadata), otherwise an
-    exception will be raised. The result Table will share the metadata with the
-    first table.
-
-    If promote_options="default", any null type arrays will be casted to the type of other
-    arrays in the column of the same name. If a table is missing a particular
-    field, null values of the appropriate type will be generated to take the
-    place of the missing field. The new schema will share the metadata with the
-    first table. Each field in the new schema will share the metadata with the
-    first table which has the field defined. Note that type promotions may
-    involve additional allocations on the given ``memory_pool``.
-
-    If promote_options="permissive", the behavior of default plus types will be promoted
-    to the common denominator that fits all the fields.
-
-    Parameters
-    ----------
-    tables : iterable of pyarrow.Table objects
-        Pyarrow tables to concatenate into a single Table.
-    memory_pool : MemoryPool, default None
-        For memory allocations, if required, otherwise use default pool.
-    promote_options : str, default none
-        Accepts strings "none", "default" and "permissive".
-    **kwargs : dict, optional
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> t1 = pa.table(
-    ...     [
-    ...         pa.array([2, 4, 5, 100]),
-    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
-    ...     ],
-    ...     names=["n_legs", "animals"],
-    ... )
-    >>> t2 = pa.table([pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"])
-    >>> pa.concat_tables([t1, t2])
-    pyarrow.Table
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [[2,4,5,100],[2,4]]
-    animals: [["Flamingo","Horse","Brittle stars","Centipede"],["Parrot","Dog"]]
-
-    """
+) -> Table: ...
+
 
 class TableGroupBy:
-    """
-    A grouping of columns in a table on which to perform aggregations.
-
-    Parameters
-    ----------
-    table : pyarrow.Table
-        Input table to execute the aggregation on.
-    keys : str or list[str]
-        Name of the grouped columns.
-    use_threads : bool, default True
-        Whether to use multithreading or not. When set to True (the default),
-        no stable ordering of the output is guaranteed.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> t = pa.table(
-    ...     [
-    ...         pa.array(["a", "a", "b", "b", "c"]),
-    ...         pa.array([1, 2, 3, 4, 5]),
-    ...     ],
-    ...     names=["keys", "values"],
-    ... )
-
-    Grouping of columns:
-
-    >>> pa.TableGroupBy(t, "keys")
-    <pyarrow.lib.TableGroupBy object at ...>
-
-    Perform aggregations:
-
-    >>> pa.TableGroupBy(t, "keys").aggregate([("values", "sum")])
-    pyarrow.Table
-    keys: string
-    values_sum: int64
-    ----
-    keys: [["a","b","c"]]
-    values_sum: [[3,7,5]]
-    """
+
 
     keys: str | list[str]
     def __init__(self, table: Table, keys: str | list[str], use_threads: bool = True): ...
@@ -5023,138 +627,16 @@ class TableGroupBy:
             tuple[ColumnSelector, Aggregation]
             | tuple[ColumnSelector, Aggregation, AggregateOptions | None]
         ],
-    ) -> Table:
-        """
-        Perform an aggregation over the grouped columns of the table.
-
-        Parameters
-        ----------
-        aggregations : list[tuple(str, str)] or \
-list[tuple(str, str, FunctionOptions)]
-            List of tuples, where each tuple is one aggregation specification
-            and consists of: aggregation column name followed
-            by function name and optionally aggregation function option.
-            Pass empty list to get a single row for each group.
-            The column name can be a string, an empty list or a list of
-            column names, for unary, nullary and n-ary aggregation functions
-            respectively.
-
-            For the list of function names and respective aggregation
-            function options see :ref:`py-grouped-aggrs`.
-
-        Returns
-        -------
-        Table
-            Results of the aggregation functions.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.table([
-        ...       pa.array(["a", "a", "b", "b", "c"]),
-        ...       pa.array([1, 2, 3, 4, 5]),
-        ... ], names=["keys", "values"])
-
-        Sum the column "values" over the grouped column "keys":
-
-        >>> t.group_by("keys").aggregate([("values", "sum")])
-        pyarrow.Table
-        keys: string
-        values_sum: int64
-        ----
-        keys: [["a","b","c"]]
-        values_sum: [[3,7,5]]
-
-        Count the rows over the grouped column "keys":
-
-        >>> t.group_by("keys").aggregate([([], "count_all")])
-        pyarrow.Table
-        keys: string
-        count_all: int64
-        ----
-        keys: [["a","b","c"]]
-        count_all: [[2,2,1]]
-
-        Do multiple aggregations:
-
-        >>> t.group_by("keys").aggregate([
-        ...    ("values", "sum"),
-        ...    ("keys", "count")
-        ... ])
-        pyarrow.Table
-        keys: string
-        values_sum: int64
-        keys_count: int64
-        ----
-        keys: [["a","b","c"]]
-        values_sum: [[3,7,5]]
-        keys_count: [[2,2,1]]
-
-        Count the number of non-null values for column "values"
-        over the grouped column "keys":
-
-        >>> import pyarrow.compute as pc
-        >>> t.group_by(["keys"]).aggregate([
-        ...    ("values", "count", pc.CountOptions(mode="only_valid"))
-        ... ])
-        pyarrow.Table
-        keys: string
-        values_count: int64
-        ----
-        keys: [["a","b","c"]]
-        values_count: [[2,2,1]]
-
-        Get a single row for each group in column "keys":
-
-        >>> t.group_by("keys").aggregate([])
-        pyarrow.Table
-        keys: string
-        ----
-        keys: [["a","b","c"]]
-        """
+    ) -> Table: ...
+
     def _table(self) -> Table: ...
     @property
     def _use_threads(self) -> bool: ...
 
 def concat_batches(
     recordbatches: Iterable[RecordBatch], memory_pool: MemoryPool | None = None
-) -> RecordBatch:
-    """
-    Concatenate pyarrow.RecordBatch objects.
-
-    All recordbatches must share the same Schema,
-    the operation implies a copy of the data to merge
-    the arrays of the different RecordBatches.
-
-    Parameters
-    ----------
-    recordbatches : iterable of pyarrow.RecordBatch objects
-        Pyarrow record batches to concatenate into a single RecordBatch.
-    memory_pool : MemoryPool, default None
-        For memory allocations, if required, otherwise use default pool.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> t1 = pa.record_batch(
-    ...     [
-    ...         pa.array([2, 4, 5, 100]),
-    ...         pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"]),
-    ...     ],
-    ...     names=["n_legs", "animals"],
-    ... )
-    >>> t2 = pa.record_batch(
-    ...     [pa.array([2, 4]), pa.array(["Parrot", "Dog"])], names=["n_legs", "animals"]
-    ... )
-    >>> pa.concat_batches([t1, t2])
-    pyarrow.RecordBatch
-    n_legs: int64
-    animals: string
-    ----
-    n_legs: [2,4,5,100,2,4]
-    animals: ["Flamingo","Horse","Brittle stars","Centipede","Parrot","Dog"]
-
-    """
+) -> RecordBatch: ...
+
 
 __all__ = [
     "ChunkedArray",
diff --git a/python/pyarrow-stubs/tensor.pyi b/python/pyarrow-stubs/tensor.pyi
index 7e9b86ea1cd..471f0ec1e98 100644
--- a/python/pyarrow-stubs/tensor.pyi
+++ b/python/pyarrow-stubs/tensor.pyi
@@ -29,219 +29,44 @@ from scipy.sparse import coo_matrix, csr_matrix
 from sparse import COO  # type: ignore
 
 class Tensor(_Weakrefable):
-    """
-    A n-dimensional array a.k.a Tensor.
-
-    Examples
-    --------
-    >>> import pyarrow as pa
-    >>> import numpy as np
-    >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-    >>> pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-    <pyarrow.Tensor>
-    type: int32
-    shape: (2, 3)
-    strides: (12, 4)
-    """
+
 
     @classmethod
-    def from_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
-        """
-        Create a Tensor from a numpy array.
-
-        Parameters
-        ----------
-        obj : numpy.ndarray
-            The source numpy array
-        dim_names : list, optional
-            Names of each dimension of the Tensor.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        <pyarrow.Tensor>
-        type: int32
-        shape: (2, 3)
-        strides: (12, 4)
-        """
-    def to_numpy(self) -> np.ndarray:
-        """
-        Convert arrow::Tensor to numpy.ndarray with zero copy
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> tensor.to_numpy()
-        array([[  2,   2,   4],
-               [  4,   5, 100]], dtype=int32)
-        """
-    def equals(self, other: Tensor) -> bool:
-        """
-        Return true if the tensors contains exactly equal data.
-
-        Parameters
-        ----------
-        other : Tensor
-            The other tensor to compare for equality.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> y = np.array([[2, 2, 4], [4, 5, 10]], np.int32)
-        >>> tensor2 = pa.Tensor.from_numpy(y, dim_names=["a","b"])
-        >>> tensor.equals(tensor)
-        True
-        >>> tensor.equals(tensor2)
-        False
-        """
-    def dim_name(self, i: int) -> str:
-        """
-        Returns the name of the i-th tensor dimension.
-
-        Parameters
-        ----------
-        i : int
-            The physical index of the tensor dimension.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> tensor.dim_name(0)
-        'dim1'
-        >>> tensor.dim_name(1)
-        'dim2'
-        """
-    @property
-    def dim_names(self) -> list[str]:
-        """
-        Names of this tensor dimensions.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> tensor.dim_names
-        ['dim1', 'dim2']
-        """
-    @property
-    def is_mutable(self) -> bool:
-        """
-        Is this tensor mutable or immutable.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> tensor.is_mutable
-        True
-        """
-    @property
-    def is_contiguous(self) -> bool:
-        """
-        Is this tensor contiguous in memory.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> tensor.is_contiguous
-        True
-        """
-    @property
-    def ndim(self) -> int:
-        """
-        The dimension (n) of this tensor.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> tensor.ndim
-        2
-        """
-    @property
-    def size(self) -> str:
-        """
-        The size of this tensor.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> tensor.size
-        6
-        """
-    @property
-    def shape(self) -> tuple[int, ...]:
-        """
-        The shape of this tensor.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> tensor.shape
-        (2, 3)
-        """
-    @property
-    def strides(self) -> tuple[int, ...]:
-        """
-        Strides of this tensor.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import numpy as np
-        >>> x = np.array([[2, 2, 4], [4, 5, 100]], np.int32)
-        >>> tensor = pa.Tensor.from_numpy(x, dim_names=["dim1","dim2"])
-        >>> tensor.strides
-        (12, 4)
-        """
+    def from_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
+    def to_numpy(self) -> np.ndarray: ...
+
+    def equals(self, other: Tensor) -> bool: ...
+
+    def dim_name(self, i: int) -> str: ...
+
+    @property
+    def dim_names(self) -> list[str]: ...
+
+    @property
+    def is_mutable(self) -> bool: ...
+
+    @property
+    def is_contiguous(self) -> bool: ...
+
+    @property
+    def ndim(self) -> int: ...
+
+    @property
+    def size(self) -> str: ...
+
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+
+    @property
+    def strides(self) -> tuple[int, ...]: ...
+
 
 class SparseCOOTensor(_Weakrefable):
-    """
-    A sparse COO tensor.
-    """
+
     @classmethod
-    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
-        """
-        Convert numpy.ndarray to arrow::SparseCOOTensor
-
-        Parameters
-        ----------
-        obj : numpy.ndarray
-            Data used to populate the rows.
-        dim_names : list[str], optional
-            Names of the dimensions.
-
-        Returns
-        -------
-        pyarrow.SparseCOOTensor
-        """
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
 
     @classmethod
     def from_numpy(
@@ -250,80 +75,27 @@ class SparseCOOTensor(_Weakrefable):
         coords: np.ndarray,
         shape: tuple[int, ...],
         dim_names: list[str] | None = None,
-    ) -> Self:
-        """
-        Create arrow::SparseCOOTensor from numpy.ndarrays
-
-        Parameters
-        ----------
-        data : numpy.ndarray
-            Data used to populate the rows.
-        coords : numpy.ndarray
-            Coordinates of the data.
-        shape : tuple
-            Shape of the tensor.
-        dim_names : list, optional
-            Names of the dimensions.
-        """
+    ) -> Self: ...
+
     @classmethod
-    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
-        """
-        Convert scipy.sparse.coo_array or scipy.sparse.coo_matrix to arrow::SparseCOOTensor
-
-        Parameters
-        ----------
-        obj : scipy.sparse.coo_array or scipy.sparse.coo_matrix
-            The scipy array or matrix that should be converted.
-        dim_names : list, optional
-            Names of the dimensions.
-        """
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self: ...
+
     @classmethod
-    def from_pydata_sparse(cls, obj: COO, dim_names: list[str] | None = None) -> Self:
-        """
-        Convert pydata/sparse.COO to arrow::SparseCOOTensor.
-
-        Parameters
-        ----------
-        obj : pydata.sparse.COO
-            The sparse multidimensional array that should be converted.
-        dim_names : list, optional
-            Names of the dimensions.
-        """
+    def from_pydata_sparse(cls, obj: COO, dim_names: list[str] | None = None) -> Self: ...
+
     @classmethod
-    def from_tensor(cls, obj: Tensor) -> Self:
-        """
-        Convert arrow::Tensor to arrow::SparseCOOTensor.
-
-        Parameters
-        ----------
-        obj : Tensor
-            The tensor that should be converted.
-        """
-    def to_numpy(self) -> tuple[np.ndarray, np.ndarray]:
-        """
-        Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy.
-        """
-    def to_scipy(self) -> coo_matrix:
-        """
-        Convert arrow::SparseCOOTensor to scipy.sparse.coo_array.
-        """
-    def to_pydata_sparse(self) -> COO:
-        """
-        Convert arrow::SparseCOOTensor to pydata/sparse.COO.
-        """
-    def to_tensor(self) -> Tensor:
-        """
-        Convert arrow::SparseCOOTensor to arrow::Tensor.
-        """
-    def equals(self, other: Self) -> bool:
-        """
-        Return true if sparse tensors contains exactly equal data.
-
-        Parameters
-        ----------
-        other : SparseCOOTensor
-            The other tensor to compare for equality.
-        """
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> coo_matrix: ...
+
+    def to_pydata_sparse(self) -> COO: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
     @property
     def is_mutable(self) -> bool: ...
     @property
@@ -332,19 +104,8 @@ class SparseCOOTensor(_Weakrefable):
     def size(self) -> str: ...
     @property
     def shape(self) -> tuple[int, ...]: ...
-    def dim_name(self, i: int) -> str:
-        """
-        Returns the name of the i-th tensor dimension.
-
-        Parameters
-        ----------
-        i : int
-            The physical index of the tensor dimension.
+    def dim_name(self, i: int) -> str: ...
 
-        Returns
-        -------
-        str
-        """
     @property
     def dim_names(self) -> list[str]: ...
     @property
@@ -353,26 +114,11 @@ class SparseCOOTensor(_Weakrefable):
     def has_canonical_format(self) -> bool: ...
 
 class SparseCSRMatrix(_Weakrefable):
-    """
-    A sparse CSR matrix.
-    """
+
 
     @classmethod
-    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
-        """
-        Convert numpy.ndarray to arrow::SparseCSRMatrix
-
-        Parameters
-        ----------
-        obj : numpy.ndarray
-            The dense numpy array that should be converted.
-        dim_names : list, optional
-            The names of the dimensions.
-
-        Returns
-        -------
-        pyarrow.SparseCSRMatrix
-        """
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
     @classmethod
     def from_numpy(
         cls,
@@ -381,67 +127,22 @@ class SparseCSRMatrix(_Weakrefable):
         indices: np.ndarray,
         shape: tuple[int, ...],
         dim_names: list[str] | None = None,
-    ) -> Self:
-        """
-        Create arrow::SparseCSRMatrix from numpy.ndarrays.
-
-        Parameters
-        ----------
-        data : numpy.ndarray
-            Data used to populate the sparse matrix.
-        indptr : numpy.ndarray
-            Range of the rows,
-            The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
-        indices : numpy.ndarray
-            Column indices of the corresponding non-zero values.
-        shape : tuple
-            Shape of the matrix.
-        dim_names : list, optional
-            Names of the dimensions.
-        """
+    ) -> Self: ...
+
     @classmethod
-    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
-        """
-        Convert scipy.sparse.csr_array or scipy.sparse.csr_matrix to arrow::SparseCSRMatrix.
-
-        Parameters
-        ----------
-        obj : scipy.sparse.csr_array or scipy.sparse.csr_matrix
-            The scipy matrix that should be converted.
-        dim_names : list, optional
-            Names of the dimensions.
-        """
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self: ...
+
     @classmethod
-    def from_tensor(cls, obj: Tensor) -> Self:
-        """
-        Convert arrow::Tensor to arrow::SparseCSRMatrix.
-
-        Parameters
-        ----------
-        obj : Tensor
-            The dense tensor that should be converted.
-        """
-    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-        """
-        Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy.
-        """
-    def to_scipy(self) -> csr_matrix:
-        """
-        Convert arrow::SparseCSRMatrix to scipy.sparse.csr_array.
-        """
-    def to_tensor(self) -> Tensor:
-        """
-        Convert arrow::SparseCSRMatrix to arrow::Tensor.
-        """
-    def equals(self, other: Self) -> bool:
-        """
-        Return true if sparse tensors contains exactly equal data.
-
-        Parameters
-        ----------
-        other : SparseCSRMatrix
-            The other tensor to compare for equality.
-        """
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> csr_matrix: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
     @property
     def is_mutable(self) -> bool: ...
     @property
@@ -450,45 +151,19 @@ class SparseCSRMatrix(_Weakrefable):
     def size(self) -> str: ...
     @property
     def shape(self) -> tuple[int, ...]: ...
-    def dim_name(self, i: int) -> str:
-        """
-        Returns the name of the i-th tensor dimension.
-
-        Parameters
-        ----------
-        i : int
-            The physical index of the tensor dimension.
+    def dim_name(self, i: int) -> str: ...
 
-        Returns
-        -------
-        str
-        """
     @property
     def dim_names(self) -> list[str]: ...
     @property
     def non_zero_length(self) -> int: ...
 
 class SparseCSCMatrix(_Weakrefable):
-    """
-    A sparse CSC matrix.
-    """
+
 
     @classmethod
-    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
-        """
-        Convert numpy.ndarray to arrow::SparseCSCMatrix
-
-        Parameters
-        ----------
-        obj : numpy.ndarray
-            Data used to populate the rows.
-        dim_names : list[str], optional
-            Names of the dimensions.
-
-        Returns
-        -------
-        pyarrow.SparseCSCMatrix
-        """
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
     @classmethod
     def from_numpy(
         cls,
@@ -497,67 +172,22 @@ class SparseCSCMatrix(_Weakrefable):
         indices: np.ndarray,
         shape: tuple[int, ...],
         dim_names: list[str] | None = None,
-    ) -> Self:
-        """
-        Create arrow::SparseCSCMatrix from numpy.ndarrays
-
-        Parameters
-        ----------
-        data : numpy.ndarray
-            Data used to populate the sparse matrix.
-        indptr : numpy.ndarray
-            Range of the rows,
-            The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data.
-        indices : numpy.ndarray
-            Column indices of the corresponding non-zero values.
-        shape : tuple
-            Shape of the matrix.
-        dim_names : list, optional
-            Names of the dimensions.
-        """
+    ) -> Self: ...
+
     @classmethod
-    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self:
-        """
-        Convert scipy.sparse.csc_array or scipy.sparse.csc_matrix to arrow::SparseCSCMatrix
-
-        Parameters
-        ----------
-        obj : scipy.sparse.csc_array or scipy.sparse.csc_matrix
-            The scipy matrix that should be converted.
-        dim_names : list, optional
-            Names of the dimensions.
-        """
+    def from_scipy(cls, obj: csr_matrix, dim_names: list[str] | None = None) -> Self: ...
+
     @classmethod
-    def from_tensor(cls, obj: Tensor) -> Self:
-        """
-        Convert arrow::Tensor to arrow::SparseCSCMatrix
-
-        Parameters
-        ----------
-        obj : Tensor
-            The dense tensor that should be converted.
-        """
-    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-        """
-        Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy
-        """
-    def to_scipy(self) -> csr_matrix:
-        """
-        Convert arrow::SparseCSCMatrix to scipy.sparse.csc_array
-        """
-    def to_tensor(self) -> Tensor:
-        """
-        Convert arrow::SparseCSCMatrix to arrow::Tensor
-        """
-    def equals(self, other: Self) -> bool:
-        """
-        Return true if sparse tensors contains exactly equal data
-
-        Parameters
-        ----------
-        other : SparseCSCMatrix
-            The other tensor to compare for equality.
-        """
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_scipy(self) -> csr_matrix: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
     @property
     def is_mutable(self) -> bool: ...
     @property
@@ -566,52 +196,19 @@ class SparseCSCMatrix(_Weakrefable):
     def size(self) -> str: ...
     @property
     def shape(self) -> tuple[int, ...]: ...
-    def dim_name(self, i: int) -> str:
-        """
-        Returns the name of the i-th tensor dimension.
-
-        Parameters
-        ----------
-        i : int
-            The physical index of the tensor dimension.
+    def dim_name(self, i: int) -> str: ...
 
-        Returns
-        -------
-        str
-        """
     @property
     def dim_names(self) -> list[str]: ...
     @property
     def non_zero_length(self) -> int: ...
 
 class SparseCSFTensor(_Weakrefable):
-    """
-    A sparse CSF tensor.
 
-    CSF is a generalization of compressed sparse row (CSR) index.
-
-    CSF index recursively compresses each dimension of a tensor into a set
-    of prefix trees. Each path from a root to leaf forms one tensor
-    non-zero index. CSF is implemented with two arrays of buffers and one
-    arrays of integers.
-    """
 
     @classmethod
-    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self:
-        """
-        Convert numpy.ndarray to arrow::SparseCSFTensor
-
-        Parameters
-        ----------
-        obj : numpy.ndarray
-            Data used to populate the rows.
-        dim_names : list[str], optional
-            Names of the dimensions.
-
-        Returns
-        -------
-        pyarrow.SparseCSFTensor
-        """
+    def from_dense_numpy(cls, obj: np.ndarray, dim_names: list[str] | None = None) -> Self: ...
+
     @classmethod
     def from_numpy(
         cls,
@@ -621,59 +218,17 @@ class SparseCSFTensor(_Weakrefable):
         shape: tuple[int, ...],
         axis_order: list[int] | None = None,
         dim_names: list[str] | None = None,
-    ) -> Self:
-        """
-        Create arrow::SparseCSFTensor from numpy.ndarrays
-
-        Parameters
-        ----------
-        data : numpy.ndarray
-            Data used to populate the sparse tensor.
-        indptr : numpy.ndarray
-            The sparsity structure.
-            Each two consecutive dimensions in a tensor correspond to
-            a buffer in indices.
-            A pair of consecutive values at `indptr[dim][i]`
-            `indptr[dim][i + 1]` signify a range of nodes in
-            `indices[dim + 1]` who are children of `indices[dim][i]` node.
-        indices : numpy.ndarray
-            Stores values of nodes.
-            Each tensor dimension corresponds to a buffer in indptr.
-        shape : tuple
-            Shape of the matrix.
-        axis_order : list, optional
-            the sequence in which dimensions were traversed to
-            produce the prefix tree.
-        dim_names : list, optional
-            Names of the dimensions.
-        """
+    ) -> Self: ...
+
     @classmethod
-    def from_tensor(cls, obj: Tensor) -> Self:
-        """
-        Convert arrow::Tensor to arrow::SparseCSFTensor
-
-        Parameters
-        ----------
-        obj : Tensor
-            The dense tensor that should be converted.
-        """
-    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-        """
-        Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy
-        """
-    def to_tensor(self) -> Tensor:
-        """
-        Convert arrow::SparseCSFTensor to arrow::Tensor
-        """
-    def equals(self, other: Self) -> bool:
-        """
-        Return true if sparse tensors contains exactly equal data
-
-        Parameters
-        ----------
-        other : SparseCSFTensor
-            The other tensor to compare for equality.
-        """
+    def from_tensor(cls, obj: Tensor) -> Self: ...
+
+    def to_numpy(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]: ...
+
+    def to_tensor(self) -> Tensor: ...
+
+    def equals(self, other: Self) -> bool: ...
+
     @property
     def is_mutable(self) -> bool: ...
     @property
@@ -682,19 +237,8 @@ class SparseCSFTensor(_Weakrefable):
     def size(self) -> str: ...
     @property
     def shape(self) -> tuple[int, ...]: ...
-    def dim_name(self, i: int) -> str:
-        """
-        Returns the name of the i-th tensor dimension.
-
-        Parameters
-        ----------
-        i : int
-            The physical index of the tensor dimension.
-
-        Returns
-        -------
-        str
-        """
+    def dim_name(self, i: int) -> str: ...
+
     @property
     def dim_names(self) -> list[str]: ...
     @property
diff --git a/python/pyarrow-stubs/types.pyi b/python/pyarrow-stubs/types.pyi
index 98181f6acc2..def5e3771ab 100644
--- a/python/pyarrow-stubs/types.pyi
+++ b/python/pyarrow-stubs/types.pyi
@@ -87,7 +87,8 @@ _Decimal: TypeAlias = (
 _Date: TypeAlias = Date32Type | Date64Type
 _Time: TypeAlias = Time32Type[Any] | Time64Type[Any]
 _Interval: TypeAlias = MonthDayNanoIntervalType
-_Temporal: TypeAlias = TimestampType[Any, Any] | DurationType[Any] | _Time | _Date | _Interval
+_Temporal: TypeAlias = TimestampType[Any,
+                                     Any] | DurationType[Any] | _Time | _Date | _Interval
 _Union: TypeAlias = SparseUnionType | DenseUnionType
 _Nested: TypeAlias = (
     ListType[Any]
@@ -100,6 +101,7 @@ _Nested: TypeAlias = (
     | _Union
 )
 
+
 def is_null(t: DataType) -> TypeIs[NullType]: ...
 def is_boolean(t: DataType) -> TypeIs[BoolType]: ...
 def is_integer(t: DataType) -> TypeIs[_Integer]: ...
@@ -157,6 +159,7 @@ def is_boolean_value(obj: Any) -> bool: ...
 def is_integer_value(obj: Any) -> bool: ...
 def is_float_value(obj: Any) -> bool: ...
 
+
 __all__ = [
     "is_binary",
     "is_binary_view",
diff --git a/python/pyarrow-stubs/util.pyi b/python/pyarrow-stubs/util.pyi
index 5c9687bb83f..db74524d77d 100644
--- a/python/pyarrow-stubs/util.pyi
+++ b/python/pyarrow-stubs/util.pyi
@@ -22,9 +22,11 @@ from typing import Any, Protocol, Sequence, TypeVar
 _F = TypeVar("_F", bound=Callable)
 _N = TypeVar("_N")
 
+
 class _DocStringComponents(Protocol):
     _docstring_components: list[str]
 
+
 def doc(
     *docstrings: str | _DocStringComponents | Callable | None, **params: Any
 ) -> Callable[[_F], _F]: ...
@@ -32,6 +34,8 @@ def _is_iterable(obj) -> bool: ...
 def _is_path_like(path) -> bool: ...
 def _stringify_path(path: str | PathLike) -> str: ...
 def product(seq: Sequence[_N]) -> _N: ...
+
+
 def get_contiguous_span(
     shape: tuple[int, ...], strides: tuple[int, ...], itemsize: int
 ) -> tuple[int, int]: ...

From b1f43b2f72747a9881060439cbf503723537662e Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Wed, 17 Sep 2025 21:48:08 +0200
Subject: [PATCH 20/26] ReplaceEllipsis to replace ellipsis with docstrings

---
 dev/update_stub_docstrings.py | 40 +++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py
index 17f7e8e1aa1..2ad7a877f34 100644
--- a/dev/update_stub_docstrings.py
+++ b/dev/update_stub_docstrings.py
@@ -117,6 +117,46 @@ def leave_SimpleString(self, original_node, updated_node):
         return updated_node
 
 
+class ReplaceEllipsis(libcst.CSTTransformer):
+    def __init__(self, package, namespace):
+        self.stack = [namespace] if namespace else []
+        self.indentation = 0
+        self.package = package
+
+    def _get_docstring(self, name, indentation):
+        # print(name)
+        try:
+            obj = self.package.get_member(name)
+            if obj.has_docstring:
+                indentation_prefix = indentation * "    "
+                docstring = indent(obj.docstring.value, indentation_prefix)
+                docstring = f'"""\n{docstring}\n{indentation_prefix}"""'
+                # print(f"{name} has {len(docstring)} long docstring.")
+                return docstring
+        except KeyError:
+            print(f"{name} has no docstring.")
+            return ""
+
+    def visit_FunctionDef(self, node):
+        self.stack.append(node.name.value)
+        self.indentation += 1
+
+    def leave_FunctionDef(self, original_node, updated_node):
+        node_name = ".".join(self.stack)
+        indentation = self.indentation
+        self.stack.pop()
+        self.indentation -= 1
+
+        if isinstance(updated_node.body.body[0].value, libcst.Ellipsis):
+            print(node_name)
+            docstring = self._get_docstring(node_name, indentation)
+            if docstring and len(docstring) > 0:
+                new_docstring = libcst.SimpleString(value=docstring)
+                new_body = updated_node.body.with_changes(body=[libcst.Expr(value=new_docstring)])
+                return updated_node.with_changes(body=new_body)
+        return updated_node
+
+
 @click.command()
 @click.option('--pyarrow_folder', '-f', type=click.Path(resolve_path=True))
 def update_stub_files(pyarrow_folder):

From 9db449971741b81c0734d5900fd7f8640fe56edc Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 19 Sep 2025 12:52:57 +0200
Subject: [PATCH 21/26] minor fixes

---
 python/pyarrow-stubs/_parquet.pyi | 2 +-
 python/pyarrow-stubs/lib.pyi      | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/pyarrow-stubs/_parquet.pyi b/python/pyarrow-stubs/_parquet.pyi
index ce499fd1c16..35ee2b41fde 100644
--- a/python/pyarrow-stubs/_parquet.pyi
+++ b/python/pyarrow-stubs/_parquet.pyi
@@ -127,7 +127,7 @@ class Statistics(_Weakrefable):
     @property
     def has_min_max(self) -> bool: ...
     @property
-    def hash_null_count(self) -> bool: ...
+    def has_null_count(self) -> bool: ...
     @property
     def has_distinct_count(self) -> bool: ...
     @property
diff --git a/python/pyarrow-stubs/lib.pyi b/python/pyarrow-stubs/lib.pyi
index eea11a2e8f1..43c40b61cf8 100644
--- a/python/pyarrow-stubs/lib.pyi
+++ b/python/pyarrow-stubs/lib.pyi
@@ -19,7 +19,6 @@
 from typing import NamedTuple
 
 from .array import *
-# from .benchmark import *
 from .builder import *
 from .compat import *
 from .config import *

From 33fbbb90013ff449c09476d519a7511599a95a0c Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 19 Sep 2025 19:27:30 +0200
Subject: [PATCH 22/26] add ellipsis to _ipc.pyi

---
 python/pyarrow-stubs/_ipc.pyi | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/python/pyarrow-stubs/_ipc.pyi b/python/pyarrow-stubs/_ipc.pyi
index 23d770070e7..6e83541bf5c 100644
--- a/python/pyarrow-stubs/_ipc.pyi
+++ b/python/pyarrow-stubs/_ipc.pyi
@@ -36,6 +36,7 @@ from ._types import DictionaryMemo, KeyValueMetadata
 
 
 class MetadataVersion(enum.IntEnum):
+    ...
     V1 = enum.auto()
     V2 = enum.auto()
     V3 = enum.auto()
@@ -44,7 +45,7 @@ class MetadataVersion(enum.IntEnum):
 
 
 class WriteStats(NamedTuple):
-
+    ...
     num_messages: int
     num_record_batches: int
     num_dictionary_batches: int
@@ -53,7 +54,7 @@ class WriteStats(NamedTuple):
 
 
 class ReadStats(NamedTuple):
-
+    ...
     num_messages: int
     num_record_batches: int
     num_dictionary_batches: int
@@ -62,7 +63,7 @@ class ReadStats(NamedTuple):
 
 
 class IpcReadOptions(_Weakrefable):
-
+    ...
     ensure_native_endian: bool
     use_threads: bool
     included_fields: list[int]
@@ -77,7 +78,7 @@ class IpcReadOptions(_Weakrefable):
 
 
 class IpcWriteOptions(_Weakrefable):
-
+    ...
     metadata_version: MetadataVersion
     allow_64bit: bool
     use_legacy_format: bool
@@ -100,7 +101,7 @@ class IpcWriteOptions(_Weakrefable):
 
 
 class Message(_Weakrefable):
-
+    ...
     @property
     def type(self) -> str: ...
     @property
@@ -120,7 +121,7 @@ class Message(_Weakrefable):
 
 
 class MessageReader(_Weakrefable):
-
+    ...
     @classmethod
     def open_stream(cls, source: bytes | NativeFile |
                     IOBase | SupportPyBuffer) -> Self: ...
@@ -135,7 +136,7 @@ class MessageReader(_Weakrefable):
 
 
 class _CRecordBatchWriter(_Weakrefable):
-
+    ...
     def write(self, table_or_batch: Table | RecordBatch): ...
 
     def write_batch(
@@ -155,6 +156,7 @@ class _CRecordBatchWriter(_Weakrefable):
 
 
 class _RecordBatchStreamWriter(_CRecordBatchWriter):
+    ...
     @property
     def _use_legacy_format(self) -> bool: ...
     @property
@@ -164,11 +166,12 @@ class _RecordBatchStreamWriter(_CRecordBatchWriter):
 
 
 class _ReadPandasMixin:
+    ...
     def read_pandas(self, **options) -> pd.DataFrame: ...
 
 
 class RecordBatchReader(_Weakrefable):
-
+    ...
     def __iter__(self) -> Self: ...
     def read_next_batch(self) -> RecordBatch: ...
 
@@ -211,6 +214,7 @@ class RecordBatchReader(_Weakrefable):
 
 
 class _RecordBatchStreamReader(RecordBatchReader):
+    ...
     @property
     def stats(self) -> ReadStats: ...
 
@@ -220,12 +224,13 @@ class _RecordBatchFileWriter(_RecordBatchStreamWriter):
 
 
 class RecordBatchWithMetadata(NamedTuple):
-
+    ...
     batch: RecordBatch
     custom_metadata: KeyValueMetadata
 
 
 class _RecordBatchFileReader(_Weakrefable):
+    ...
     @property
     def num_record_batches(self) -> int: ...
 

From 86e7ba4a76db025ebae5ec9212578867365b7fb0 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 19 Sep 2025 20:10:29 +0200
Subject: [PATCH 23/26] docstring update script

---
 dev/update_stub_docstrings.py | 188 +++++++++++++---------------------
 1 file changed, 74 insertions(+), 114 deletions(-)

diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py
index 2ad7a877f34..dceda807545 100644
--- a/dev/update_stub_docstrings.py
+++ b/dev/update_stub_docstrings.py
@@ -30,157 +30,117 @@
 import click
 # TODO: perhaps replace griffe with importlib
 import griffe
+from griffe import AliasResolutionError
 import libcst
 
 
-class DocUpdater(libcst.CSTTransformer):
-    def __init__(self, package, namespace):
-        self.stack = [namespace] if namespace else []
-        self._docstring = None
-        self.indentation = 0
-        self.package = package
+def _get_docstring(name, package, indentation):
+    # print("extract_docstrings", name)
+    try:
+        obj = package.get_member(name)
+    except (KeyError, ValueError, AliasResolutionError):
+        # Some cython __init__ symbols can't be found
+        # e.g. pyarrow.lib.OSFile.__init__
+        stack = name.split(".")
+        parent_name = ".".join(stack[:-1])
 
-    def _get_docstring(self, name):
-        # print("extract_docstrings", name)
         try:
-            obj = self.package.get_member(name)
-        except KeyError:
-            # Some cython __init__ symbols can't be found
-            # e.g. pyarrow.lib.OSFile.__init__
-            parent_name = ".".join(self.stack[:-1])
-
-            try:
-                obj = self.package.get_member(parent_name).all_members[self.stack[-1]]
-            except KeyError:
-                # print(f"{name} not found in {self.package.name}, it's probably ok.")
-                return None
-
-        if obj.has_docstring:
-            docstring = obj.docstring.value
-            # remove signature if present in docstring
-            if docstring.startswith(obj.name) or (
-                (hasattr(obj.parent, "name") and
-                    docstring.startswith(f"{obj.parent.name}.{obj.name}"))):
-                return "\n".join(docstring.splitlines()[2:])
-            else:
-                return docstring
-        return None
+            obj = package.get_member(parent_name).all_members[stack[-1]]
+        except (KeyError, ValueError, AliasResolutionError):
+            print(f"{name} not found in {package.name}, it's probably ok.")
+            return None
+
+    if obj.has_docstring:
+        docstring = obj.docstring.value
+        # remove signature if present in docstring
+        if docstring.startswith(obj.name) or (
+            (hasattr(obj.parent, "name") and
+                docstring.startswith(f"{obj.parent.name}.{obj.name}"))):
+            docstring = "\n".join(docstring.splitlines()[2:])
+        indentation_prefix = indentation * "    "
+        docstring = indent(docstring + '\n"""', indentation_prefix)
+        docstring = '"""\n' + docstring
+
+        return docstring
+    return None
+
+def _has_ellipsis(node):
+    if hasattr(node.body.body[0], "value") and isinstance(node.body.body[0].value, libcst.Ellipsis):
+        return True
+    return False
 
-    def visit_ClassDef(self, node):
-        # TODO: class docstrings?
-        self.stack.append(node.name.value)
-        self.indentation += 1
-        node_name = ".".join(self.stack)
-        docstring = self._get_docstring(node_name)
-
-        if docstring:
-            if not node.get_docstring(clean=False):
-                print("Missing docstring (in annotations) for:", node_name)
-                return False
-            self._docstring = f'"""{node.get_docstring(clean=False)}"""'
-            return True
-        return False
-
-    def visit_FunctionDef(self, node):
-        self.stack.append(node.name.value)
-        self.indentation += 1
-        node_name = ".".join(self.stack)
-        docstring = self._get_docstring(node_name)
 
-        if docstring:
-            if not node.get_docstring(clean=False):
-                print("Missing docstring (in annotations) for:", node_name)
-                return False
-            self._docstring = f'"""{node.get_docstring(clean=False)}"""'
-            return True
-        return False
+class ReplaceEllipsis(libcst.CSTTransformer):
+    def __init__(self, package, namespace):
+        self.package = package
+        self.base_namespace = namespace
+        self.stack = []
+        self.indentation = 0
 
-    def leave_ClassDef(self, original_node, updated_node):
-        self.stack.pop()
-        self.indentation -= 1
-        return updated_node
+    def _replace_ellipsis(self, original_node, updated_node):
+        name = ".".join(self.stack)
+        if self.base_namespace:
+            name = self.base_namespace + "." + name
 
-    def leave_FunctionDef(self, original_node, updated_node):
+        if _has_ellipsis(updated_node):
+            docstring = _get_docstring(name, self.package, self.indentation)
+            if docstring is not None and len(docstring) > 0:
+                new_docstring = libcst.SimpleString(value=docstring)
+                new_body = [
+                    libcst.SimpleWhitespace(self.indentation * "    "),
+                    libcst.Expr(value=new_docstring),
+                    libcst.Newline()
+                ]
+                new_body = libcst.IndentedBlock(body=new_body)
+                updated_node = updated_node.with_changes(body=new_body)
         self.stack.pop()
         self.indentation -= 1
         return updated_node
 
-    def leave_SimpleString(self, original_node, updated_node):
-        node_name = ".".join(self.stack)
-
-        if original_node.value == self._docstring:
-            indentation = self.indentation * "    "
-            indented_docstring = indent(self._get_docstring(node_name), indentation)
-            docstring = f'"""\n{indented_docstring}\n{indentation}"""'
-            return updated_node.with_changes(value=docstring)
-
-        return updated_node
-
-
-class ReplaceEllipsis(libcst.CSTTransformer):
-    def __init__(self, package, namespace):
-        self.stack = [namespace] if namespace else []
-        self.indentation = 0
-        self.package = package
-
-    def _get_docstring(self, name, indentation):
-        # print(name)
-        try:
-            obj = self.package.get_member(name)
-            if obj.has_docstring:
-                indentation_prefix = indentation * "    "
-                docstring = indent(obj.docstring.value, indentation_prefix)
-                docstring = f'"""\n{docstring}\n{indentation_prefix}"""'
-                # print(f"{name} has {len(docstring)} long docstring.")
-                return docstring
-        except KeyError:
-            print(f"{name} has no docstring.")
-            return ""
+    def visit_ClassDef(self, node):
+        self.stack.append(node.name.value)
+        self.indentation += 1
+    def leave_ClassDef(self, original_node, updated_node):
+        return self._replace_ellipsis(original_node, updated_node)
 
     def visit_FunctionDef(self, node):
         self.stack.append(node.name.value)
         self.indentation += 1
-
     def leave_FunctionDef(self, original_node, updated_node):
-        node_name = ".".join(self.stack)
-        indentation = self.indentation
-        self.stack.pop()
-        self.indentation -= 1
-
-        if isinstance(updated_node.body.body[0].value, libcst.Ellipsis):
-            print(node_name)
-            docstring = self._get_docstring(node_name, indentation)
-            if docstring and len(docstring) > 0:
-                new_docstring = libcst.SimpleString(value=docstring)
-                new_body = updated_node.body.with_changes(body=[libcst.Expr(value=new_docstring)])
-                return updated_node.with_changes(body=new_body)
-        return updated_node
+        return self._replace_ellipsis(original_node, updated_node)
 
 
 @click.command()
 @click.option('--pyarrow_folder', '-f', type=click.Path(resolve_path=True))
-def update_stub_files(pyarrow_folder):
+def add_docs_to_stub_files(pyarrow_folder):
     print("Updating docstrings of stub files in:", pyarrow_folder)
     package = griffe.load("pyarrow", try_relative_path=True,
                           force_inspection=True, resolve_aliases=True)
+    lib_modules = ["array", "builder", "compat", "config", "device", "error", "io",
+                   "_ipc", "memory", "pandas_shim", "scalar", "table", "tensor", "_types"]
 
     for stub_file in Path(pyarrow_folder).rglob('*.pyi'):
         if stub_file.name == "_stubs_typing.pyi":
             continue
-
         print(f"[{stub_file}]")
 
         with open(stub_file, 'r') as f:
             tree = libcst.parse_module(f.read())
 
-        if stub_file.name != "__init__.pyi":
-            modified_tree = tree.visit(DocUpdater(package, "lib"))
-        else:
-            modified_tree = tree.visit(DocUpdater(package, None))
+        module = stub_file.with_suffix('').name
+        if module in lib_modules:
+            module = "lib"
+        elif stub_file.parent.name in ["parquet", "interchange"]:
+            module = f"{stub_file.parent.name}.{module}"
+        elif module == "__init__":
+            module = ""
+
+        modified_tree = tree.visit(ReplaceEllipsis(package, module))
         with open(stub_file, "w") as f:
             f.write(modified_tree.code)
+        print("\n")
 
 
 if __name__ == "__main__":
     docstrings_map = {}
-    update_stub_files(obj={})
+    add_docs_to_stub_files(obj={})

From 133af4d0c5bd82ef821f15fbc9583c94a5a83934 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Fri, 19 Sep 2025 23:50:38 +0200
Subject: [PATCH 24/26] move ipc around

---
 dev/update_stub_docstrings.py      | 4 ++--
 python/pyarrow-stubs/_cuda.pyi     | 2 +-
 python/pyarrow/{_ipc.py => ipc.py} | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)
 rename python/pyarrow/{_ipc.py => ipc.py} (99%)

diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py
index dceda807545..0a9ce41e1f6 100644
--- a/dev/update_stub_docstrings.py
+++ b/dev/update_stub_docstrings.py
@@ -122,12 +122,12 @@ def add_docs_to_stub_files(pyarrow_folder):
     for stub_file in Path(pyarrow_folder).rglob('*.pyi'):
         if stub_file.name == "_stubs_typing.pyi":
             continue
-        print(f"[{stub_file}]")
+        module = stub_file.with_suffix('').name
+        print(f"[{stub_file} {module}]")
 
         with open(stub_file, 'r') as f:
             tree = libcst.parse_module(f.read())
 
-        module = stub_file.with_suffix('').name
         if module in lib_modules:
             module = "lib"
         elif stub_file.parent.name in ["parquet", "interchange"]:
diff --git a/python/pyarrow-stubs/_cuda.pyi b/python/pyarrow-stubs/_cuda.pyi
index 3ec866ad668..929f448f396 100644
--- a/python/pyarrow-stubs/_cuda.pyi
+++ b/python/pyarrow-stubs/_cuda.pyi
@@ -19,7 +19,7 @@ from typing import Any
 
 import cuda  # type: ignore[import-not-found]
 
-from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-untyped]
+from numba.cuda.cudadrv import driver as _numba_driver  # type: ignore[import-not-found]
 
 from . import lib
 from ._stubs_typing import ArrayLike
diff --git a/python/pyarrow/_ipc.py b/python/pyarrow/ipc.py
similarity index 99%
rename from python/pyarrow/_ipc.py
rename to python/pyarrow/ipc.py
index 4e236678788..19d3d46f3ba 100644
--- a/python/pyarrow/_ipc.py
+++ b/python/pyarrow/ipc.py
@@ -278,3 +278,4 @@ def deserialize_pandas(buf, *, use_threads=True):
     with pa.RecordBatchStreamReader(buffer_reader) as reader:
         table = reader.read_all()
     return table.to_pandas(use_threads=use_threads)
+

From 5e488be497386db649880ff66684c7fe17fbb937 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Sat, 20 Sep 2025 16:36:15 +0200
Subject: [PATCH 25/26] improve script

---
 .github/workflows/python.yml  |   2 +-
 dev/update_stub_docstrings.py | 108 +++++++++++++++++++++++++++-------
 python/pyarrow-stubs/_fs.pyi  |   2 +-
 python/pyarrow/ipc.py         |   1 -
 4 files changed, 90 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 700218024a5..59bcba0837c 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -140,7 +140,7 @@ jobs:
 
       - name: Type check with mypy and pyright
         run: |-
-            python -m pip install mypy pyright scipy-stubs pandas-stubs types-python-dateutil types-psutil types-requests griffe libcst
+            python -m pip install mypy pyright scipy-stubs pandas-stubs types-python-dateutil types-psutil types-requests griffe libcst types-cffi
             pushd python;
             # pip install -e .
             mypy pyarrow-stubs pyarrow/tests/test_array.py pyarrow/tests/test_io.py
diff --git a/dev/update_stub_docstrings.py b/dev/update_stub_docstrings.py
index 0a9ce41e1f6..7eb1ee2925d 100644
--- a/dev/update_stub_docstrings.py
+++ b/dev/update_stub_docstrings.py
@@ -21,7 +21,7 @@
 # Usage
 # =====
 #
-# python ../dev/update_stub_docstrings.py -f ./pyarrow/
+# python ./dev/update_stub_docstrings.py -f ./python/pyarrow-stubs
 
 
 from pathlib import Path
@@ -32,6 +32,7 @@
 import griffe
 from griffe import AliasResolutionError
 import libcst
+from libcst import matchers as m
 
 
 def _get_docstring(name, package, indentation):
@@ -52,23 +53,21 @@ def _get_docstring(name, package, indentation):
 
     if obj.has_docstring:
         docstring = obj.docstring.value
-        # remove signature if present in docstring
+        # Remove signature if present in docstring
         if docstring.startswith(obj.name) or (
             (hasattr(obj.parent, "name") and
                 docstring.startswith(f"{obj.parent.name}.{obj.name}"))):
             docstring = "\n".join(docstring.splitlines()[2:])
+        # Skip empty docstrings
+        if docstring.strip() == "":
+            return None
+        # Indent docstring
         indentation_prefix = indentation * "    "
         docstring = indent(docstring + '\n"""', indentation_prefix)
         docstring = '"""\n' + docstring
-
         return docstring
     return None
 
-def _has_ellipsis(node):
-    if hasattr(node.body.body[0], "value") and isinstance(node.body.body[0].value, libcst.Ellipsis):
-        return True
-    return False
-
 
 class ReplaceEllipsis(libcst.CSTTransformer):
     def __init__(self, package, namespace):
@@ -77,37 +76,105 @@ def __init__(self, package, namespace):
         self.stack = []
         self.indentation = 0
 
-    def _replace_ellipsis(self, original_node, updated_node):
+    # Insert module level docstring if _clone_signature is used
+    def leave_Module(self, original_node, updated_node):
+        new_body = []
+        clone_matcher = m.SimpleStatementLine(
+            body=[m.Assign(
+                value=m.Call(func=m.Name(value="_clone_signature"))
+            ), m.ZeroOrMore()]
+        )
+        for statement in updated_node.body:
+            new_body.append(statement)
+            if m.matches(statement, clone_matcher):
+                name = statement.body[0].targets[0].target.value
+                if self.base_namespace:
+                    name = f"{self.base_namespace}.{name}"
+                docstring = _get_docstring(name, self.package, 0)
+                if docstring is not None:
+                    new_expr = libcst.Expr(value=libcst.SimpleString(docstring))
+                    new_line = libcst.SimpleStatementLine(body=[new_expr])
+                    new_body.append(new_line)
+
+        return updated_node.with_changes(body=new_body)
+
+    def visit_ClassDef(self, node):
+        self.stack.append(node.name.value)
+        self.indentation += 1
+
+    def leave_ClassDef(self, original_node, updated_node):
         name = ".".join(self.stack)
         if self.base_namespace:
             name = self.base_namespace + "." + name
 
-        if _has_ellipsis(updated_node):
+        class_matcher_1 = m.ClassDef(
+            name=m.Name(),
+            body=m.IndentedBlock(
+                body=[m.SimpleStatementLine(
+                    body=[m.Expr(m.Ellipsis()), m.ZeroOrMore()]
+                ), m.ZeroOrMore()]
+            )
+        )
+        class_matcher_2 = m.ClassDef(
+            name=m.Name(),
+            body=m.IndentedBlock(
+                body=[m.FunctionDef(), m.ZeroOrMore()]
+            )
+        )
+
+        if m.matches(updated_node, class_matcher_1):
             docstring = _get_docstring(name, self.package, self.indentation)
-            if docstring is not None and len(docstring) > 0:
+            if docstring is not None:
+                new_node = libcst.SimpleString(value=docstring)
+                updated_node = updated_node.deep_replace(
+                    updated_node.body.body[0].body[0].value, new_node)
+
+        if m.matches(updated_node, class_matcher_2):
+            docstring = _get_docstring(name, self.package, self.indentation)
+            if docstring is not None:
                 new_docstring = libcst.SimpleString(value=docstring)
                 new_body = [
                     libcst.SimpleWhitespace(self.indentation * "    "),
                     libcst.Expr(value=new_docstring),
                     libcst.Newline()
-                ]
+                ] + list(updated_node.body.body)
                 new_body = libcst.IndentedBlock(body=new_body)
                 updated_node = updated_node.with_changes(body=new_body)
+
         self.stack.pop()
         self.indentation -= 1
         return updated_node
 
-    def visit_ClassDef(self, node):
-        self.stack.append(node.name.value)
-        self.indentation += 1
-    def leave_ClassDef(self, original_node, updated_node):
-        return self._replace_ellipsis(original_node, updated_node)
-
     def visit_FunctionDef(self, node):
         self.stack.append(node.name.value)
         self.indentation += 1
+
     def leave_FunctionDef(self, original_node, updated_node):
-        return self._replace_ellipsis(original_node, updated_node)
+        name = ".".join(self.stack)
+        if self.base_namespace:
+            name = self.base_namespace + "." + name
+
+        function_matcher = m.FunctionDef(
+            name=m.Name(),
+            body=m.SimpleStatementSuite(
+                body=[m.Expr(
+                    m.Ellipsis()
+                )]))
+        if m.matches(original_node, function_matcher):
+            docstring = _get_docstring(name, self.package, self.indentation)
+            if docstring is not None:
+                new_docstring = libcst.SimpleString(value=docstring)
+                new_body = [
+                    libcst.SimpleWhitespace(self.indentation * "    "),
+                    libcst.Expr(value=new_docstring),
+                    libcst.Newline()
+                ]
+                new_body = libcst.IndentedBlock(body=new_body)
+                updated_node = updated_node.with_changes(body=new_body)
+
+        self.stack.pop()
+        self.indentation -= 1
+        return updated_node
 
 
 @click.command()
@@ -117,7 +184,8 @@ def add_docs_to_stub_files(pyarrow_folder):
     package = griffe.load("pyarrow", try_relative_path=True,
                           force_inspection=True, resolve_aliases=True)
     lib_modules = ["array", "builder", "compat", "config", "device", "error", "io",
-                   "_ipc", "memory", "pandas_shim", "scalar", "table", "tensor", "_types"]
+                   "_ipc", "memory", "pandas_shim", "scalar", "table", "tensor",
+                   "_types"]
 
     for stub_file in Path(pyarrow_folder).rglob('*.pyi'):
         if stub_file.name == "_stubs_typing.pyi":
diff --git a/python/pyarrow-stubs/_fs.pyi b/python/pyarrow-stubs/_fs.pyi
index 42ea8543738..59f803b801e 100644
--- a/python/pyarrow-stubs/_fs.pyi
+++ b/python/pyarrow-stubs/_fs.pyi
@@ -32,7 +32,7 @@ else:
 
 from typing import Union, overload
 
-from fsspec import AbstractFileSystem  # type: ignore[import-untyped]
+from fsspec import AbstractFileSystem  # type: ignore[import-not-found]
 
 from .lib import NativeFile, _Weakrefable
 
diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index 19d3d46f3ba..4e236678788 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -278,4 +278,3 @@ def deserialize_pandas(buf, *, use_threads=True):
     with pa.RecordBatchStreamReader(buffer_reader) as reader:
         table = reader.read_all()
     return table.to_pandas(use_threads=use_threads)
-

From b4e326a1c94c47d36a9a07136140895f967c5d24 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Sat, 20 Sep 2025 17:38:26 +0200
Subject: [PATCH 26/26] change to CI

---
 .github/workflows/python.yml | 14 +++++++-------
 python/pyarrow-stubs/_fs.pyi |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 59bcba0837c..f28e1a65739 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -140,13 +140,13 @@ jobs:
 
       - name: Type check with mypy and pyright
         run: |-
-            python -m pip install mypy pyright scipy-stubs pandas-stubs types-python-dateutil types-psutil types-requests griffe libcst types-cffi
-            pushd python;
-            # pip install -e .
-            mypy pyarrow-stubs pyarrow/tests/test_array.py pyarrow/tests/test_io.py
-            pyright pyarrow-stubs
-            # python ../dev/update_stub_docstrings.py -f ./pyarrow
-            # git status --porcelain=1
+            python -m pip install mypy pyright griffe libcst scipy-stubs pandas-stubs types-python-dateutil types-psutil types-requests griffe libcst types-cffi
+            pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pyarrow
+            cd python
+            mypy ./pyarrow-stubs ./pyarrow/tests/test_array.py ./pyarrow/tests/test_io.py
+            pyright ./pyarrow-stubs
+            cd ..
+            python ./dev/update_stub_docstrings.py -f ./python/pyarrow-stubs
 
   macos:
     name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} Python 3
diff --git a/python/pyarrow-stubs/_fs.pyi b/python/pyarrow-stubs/_fs.pyi
index 59f803b801e..9ec5c543c58 100644
--- a/python/pyarrow-stubs/_fs.pyi
+++ b/python/pyarrow-stubs/_fs.pyi
@@ -32,7 +32,7 @@ else:
 
 from typing import Union, overload
 
-from fsspec import AbstractFileSystem  # type: ignore[import-not-found]
+from fsspec import AbstractFileSystem  # type: ignore
 
 from .lib import NativeFile, _Weakrefable