Skip to content

Bug: panic when use datafusion listingtable and provider schema #5522

@joseph-isaacs

Description

@joseph-isaacs

Discussed in #5516

Originally posted by haohuaijin November 25, 2025

Issue Description

use Datafusion 50.0.0
Vortex 0.56.0

when use datafusion listingtable with vortex and provider shcmea, i got the panic(detail reprodcude in below)

    let config = ListingTableConfig::new(table_url)
        .with_listing_options(
            ListingOptions::new(format).with_session_config_options(ctx.state().config()),
        )
        .with_schema(Arc::new(arrow_schema));
thread 'main' panicked at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/arrow-schema-56.2.0/src/schema.rs:375:10:
index out of bounds: the len is 1 but the index is 1
stack backtrace:
   0: __rustc::rust_begin_unwind
             at /rustc/29483883eed69d5fb4db01964cdf2af4d86e9cb2/library/std/src/panicking.rs:697:5
   1: core::panicking::panic_fmt
             at /rustc/29483883eed69d5fb4db01964cdf2af4d86e9cb2/library/core/src/panicking.rs:75:14
   2: core::panicking::panic_bounds_check
             at /rustc/29483883eed69d5fb4db01964cdf2af4d86e9cb2/library/core/src/panicking.rs:280:5
   3: arrow_schema::schema::Schema::field
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/arrow-schema-56.2.0/src/schema.rs:375:10
   4: <vortex_datafusion::persistent::opener::VortexOpener as datafusion_datasource::file_stream::FileOpener>::open::{{closure}}::{{closure}}
             at ./vortex-datafusion/src/persistent/opener.rs:253:48
   5: core::iter::adapters::map::map_fold::{{closure}}
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/map.rs:88:28
   6: <core::slice::iter::Iter<T> as core::iter::traits::iterator::Iterator>::fold
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/slice/iter/macros.rs:255:27
   7: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/map.rs:128:19
   8: core::iter::traits::iterator::Iterator::for_each
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:827:14
   9: alloc::vec::Vec<T,A>::extend_trusted
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/mod.rs:3611:26
  10: <alloc::vec::Vec<T,A> as alloc::vec::spec_extend::SpecExtend<T,I>>::spec_extend
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/spec_extend.rs:29:14
  11: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/spec_from_iter_nested.rs:62:16
  12: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/spec_from_iter.rs:34:9
  13: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/mod.rs:3470:9
  14: core::iter::traits::iterator::Iterator::collect
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:2027:9
  15: <vortex_datafusion::persistent::opener::VortexOpener as datafusion_datasource::file_stream::FileOpener>::open::{{closure}}
             at ./vortex-datafusion/src/persistent/opener.rs:256:18
  16: <tracing::instrument::Instrumented<T> as core::future::future::Future>::poll
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tracing-0.1.41/src/instrument.rs:321:15
  17: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/future/future.rs:124:9
  18: futures_util::future::future::FutureExt::poll_unpin
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/futures-util-0.3.31/src/future/future/mod.rs:558:24
  19: datafusion_datasource::file_stream::FileStream::poll_inner
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-datasource-50.3.0/src/file_stream.rs:159:42
  20: <datafusion_datasource::file_stream::FileStream as futures_core::stream::Stream>::poll_next
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-datasource-50.3.0/src/file_stream.rs:332:27
  21: futures_util::stream::stream::StreamExt::poll_next_unpin
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/futures-util-0.3.31/src/stream/stream/mod.rs:1638:24
  22: <datafusion_physical_plan::coop::CooperativeStream<T> as futures_core::stream::Stream>::poll_next
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-physical-plan-50.3.0/src/coop.rs:148:36
  23: datafusion_physical_plan::stream::BatchSplitStream::poll_upstream
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-physical-plan-50.3.0/src/stream.rs:653:42
  24: <datafusion_physical_plan::stream::BatchSplitStream as futures_core::stream::Stream>::poll_next
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-physical-plan-50.3.0/src/stream.rs:687:14
  25: <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/futures-core-0.3.31/src/stream.rs:130:33
  26: <S as futures_core::stream::TryStream>::try_poll_next
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/futures-core-0.3.31/src/stream.rs:206:14
  27: <futures_util::stream::try_stream::try_collect::TryCollect<St,C> as core::future::future::Future>::poll
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/futures-util-0.3.31/src/stream/try_stream/try_collect.rs:46:47
  28: datafusion_physical_plan::common::collect::{{closure}}
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-physical-plan-50.3.0/src/common.rs:43:36
  29: datafusion_physical_plan::execution_plan::collect::{{closure}}
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-physical-plan-50.3.0/src/execution_plan.rs:1150:36
  30: datafusion::dataframe::DataFrame::collect::{{closure}}
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-50.3.0/src/dataframe/mod.rs:1376:33
  31: datafusion::dataframe::DataFrame::to_string::{{closure}}
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-50.3.0/src/dataframe/mod.rs:1430:38
  32: datafusion::dataframe::DataFrame::show::{{closure}}
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/datafusion-50.3.0/src/dataframe/mod.rs:1394:41
  33: vortex_table::run_query::{{closure}}
             at ./vortex-datafusion/examples/vortex_table.rs:103:41
  34: vortex_table::main::{{closure}}
             at ./vortex-datafusion/examples/vortex_table.rs:90:67
  35: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/future/future.rs:124:9
  36: tokio::runtime::park::CachedParkThread::block_on::{{closure}}
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/runtime/park.rs:285:71
  37: tokio::task::coop::with_budget
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/task/coop/mod.rs:167:5
  38: tokio::task::coop::budget
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/task/coop/mod.rs:133:5
  39: tokio::runtime::park::CachedParkThread::block_on
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/runtime/park.rs:285:31
  40: tokio::runtime::context::blocking::BlockingRegionGuard::block_on
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/runtime/context/blocking.rs:66:14
  41: tokio::runtime::scheduler::multi_thread::MultiThread::block_on::{{closure}}
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/runtime/scheduler/multi_thread/mod.rs:87:22
  42: tokio::runtime::context::runtime::enter_runtime
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/runtime/context/runtime.rs:65:16
  43: tokio::runtime::scheduler::multi_thread::MultiThread::block_on
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/runtime/scheduler/multi_thread/mod.rs:86:9
  44: tokio::runtime::runtime::Runtime::block_on_inner
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/runtime/runtime.rs:358:50
  45: tokio::runtime::runtime::Runtime::block_on
             at /Users/huaijinhao/.cargo/registry/src/rsproxy.cn-e3de039b2554c837/tokio-1.47.1/src/runtime/runtime.rs:328:18
  46: vortex_table::main
             at ./vortex-datafusion/examples/vortex_table.rs:92:7
  47: core::ops::function::FnOnce::call_once
             at /Users/huaijinhao/.rustup/toolchains/1.89-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/ops/function.rs:250:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.

Expected Behavior

if i use the infer_schema instead of with_schema, i can get the result and no panic
with_schema get error

    let config = ListingTableConfig::new(table_url)
        .with_listing_options(
            ListingOptions::new(format).with_session_config_options(ctx.state().config()),
        )
        .with_schema(Arc::new(arrow_schema));

infer_schema can run success

    let config = ListingTableConfig::new(table_url)
        .with_listing_options(
            ListingOptions::new(format).with_session_config_options(ctx.state().config()),
        )
        .infer_schema(&ctx.state())
        .await?;

Actual Behavior

panic

Reproduction Steps

use std::sync::Arc;

use arrow_schema::Schema;
use datafusion::datasource::listing::{
    ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
};
use datafusion::prelude::SessionContext;
use tempfile::tempdir;
use tokio::fs::OpenOptions;
use vortex::arrays::{ChunkedArray, StructArray, VarBinArray};
use vortex::buffer::buffer;
use vortex::error::vortex_err;
use vortex::file::WriteOptionsSessionExt;
use vortex::io::session::RuntimeSessionExt;
use vortex::session::VortexSession;
use vortex::validity::Validity;
use vortex::{IntoArray, VortexSessionDefault};
use vortex_datafusion::VortexFormat;

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let session = VortexSession::default().with_tokio();

    let temp_dir = tempdir()?;
    let strings = ChunkedArray::from_iter([
        VarBinArray::from(vec!["ab", "foo", "bar", "baz"]).into_array(),
        VarBinArray::from(vec!["ab", "foo", "bar", "baz"]).into_array(),
    ])
    .into_array();

    let numbers = ChunkedArray::from_iter([
        buffer![1u32, 2, 3, 4].into_array(),
        buffer![5u32, 6, 7, 8].into_array(),
    ])
    .into_array();

    let st = StructArray::try_new(
        ["strings", "numbers"].into(),
        vec![strings, numbers],
        8,
        Validity::NonNullable,
    )?;

    let filepath = temp_dir.path().join("a.vortex");

    let mut f = OpenOptions::new()
        .write(true)
        .truncate(true)
        .create(true)
        .open(&filepath)
        .await?;

    session
        .write_options()
        .write(&mut f, st.to_array_stream())
        .await?;

    let ctx = SessionContext::new();
    let format = Arc::new(VortexFormat::new(session));
    let table_url = ListingTableUrl::parse(
        filepath
            .to_str()
            .ok_or_else(|| vortex_err!("Path is not valid UTF-8"))?,
    )?;

    // only get one field form the schema
    let arrow_schema = st.dtype().to_arrow_schema()?;
    let arrow_schema = Schema::new(
        arrow_schema
            .fields()
            .iter()
            .filter(|f| f.name() == "numbers")
            .cloned()
            .collect::<Vec<_>>(),
    );

    let config = ListingTableConfig::new(table_url)
        .with_listing_options(
            ListingOptions::new(format).with_session_config_options(ctx.state().config()),
        )
        .with_schema(Arc::new(arrow_schema));

    let listing_table = Arc::new(ListingTable::try_new(config)?);

    ctx.register_table("vortex_tbl", listing_table as _)?;

    run_query(&ctx, "SELECT * FROM vortex_tbl where numbers > 2").await?;

    Ok(())
}

async fn run_query(ctx: &SessionContext, query_string: impl AsRef<str>) -> anyhow::Result<()> {
    let query_string = query_string.as_ref();

    ctx.sql(&format!("EXPLAIN {query_string}"))
        .await?
        .show()
        .await?;

    ctx.sql(query_string).await?.show().await?;

    Ok(())
}

OS Version Information

macos 15.6.1

I acknowledge that:

  • I have searched the Vortex repository (both open and closed Discussions and Issues) and confirm this is not a duplicate of an existing issue or discussion.
  • I have checked the "Preview" tab on all text fields to ensure that everything looks right, and have wrapped all configuration and code in code blocks with a group of three backticks (```) on separate lines.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions