diff --git a/vortex-array/src/arrays/dict/array.rs b/vortex-array/src/arrays/dict/array.rs
index 4e203001114..d55d5e09f62 100644
--- a/vortex-array/src/arrays/dict/array.rs
+++ b/vortex-array/src/arrays/dict/array.rs
@@ -45,6 +45,12 @@ pub struct DictArray {
     pub(super) all_values_referenced: bool,
 }
 
+pub struct DictArrayParts {
+    pub codes: ArrayRef,
+    pub values: ArrayRef,
+    pub dtype: DType,
+}
+
 impl DictArray {
     /// Build a new `DictArray` without validating the codes or values.
     ///
@@ -114,8 +120,12 @@ impl DictArray {
         Ok(unsafe { Self::new_unchecked(codes, values) })
     }
 
-    pub fn into_parts(self) -> (ArrayRef, ArrayRef) {
-        (self.codes, self.values)
+    pub fn into_parts(self) -> DictArrayParts {
+        DictArrayParts {
+            codes: self.codes,
+            values: self.values,
+            dtype: self.dtype,
+        }
     }
 
     #[inline]
diff --git a/vortex-array/src/arrow/executor/dictionary.rs b/vortex-array/src/arrow/executor/dictionary.rs
index 8f25ab072dc..9d6b1c33710 100644
--- a/vortex-array/src/arrow/executor/dictionary.rs
+++ b/vortex-array/src/arrow/executor/dictionary.rs
@@ -15,6 +15,7 @@ use vortex_error::vortex_bail;
 use crate::ArrayRef;
 use crate::ExecutionCtx;
 use crate::arrays::DictArray;
+use crate::arrays::DictArrayParts;
 use crate::arrays::DictVTable;
 use crate::arrow::ArrowArrayExecutor;
 
@@ -47,7 +48,7 @@ fn dict_to_dict(
     values_type: &DataType,
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<ArrowArrayRef> {
-    let (codes, values) = array.into_parts();
+    let DictArrayParts { codes, values, .. } = array.into_parts();
     let codes = codes.execute_arrow(Some(codes_type), ctx)?;
     let values = values.execute_arrow(Some(values_type), ctx)?;
 
diff --git a/vortex-cuda/benches/for_cuda.rs b/vortex-cuda/benches/for_cuda.rs
index 284291a8ead..1cfa2ff69b3 100644
--- a/vortex-cuda/benches/for_cuda.rs
+++ b/vortex-cuda/benches/for_cuda.rs
@@ -199,7 +199,7 @@ fn benchmark_for_u8(c: &mut Criterion) {
             &for_array,
             |b, for_array| {
                 b.iter_custom(|iters| {
-                    let mut cuda_ctx = CudaSession::new_ctx(VortexSession::empty())
+                    let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
                         .vortex_expect("failed to create execution context");
 
                     let encoded = for_array.encoded();
@@ -248,7 +248,7 @@ fn benchmark_for_u16(c: &mut Criterion) {
             &for_array,
             |b, for_array| {
                 b.iter_custom(|iters| {
-                    let mut cuda_ctx = CudaSession::new_ctx(VortexSession::empty())
+                    let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
                         .vortex_expect("failed to create execution context");
 
                     let encoded = for_array.encoded();
@@ -297,7 +297,7 @@ fn benchmark_for_u32(c: &mut Criterion) {
             &for_array,
             |b, for_array| {
                 b.iter_custom(|iters| {
-                    let mut cuda_ctx = CudaSession::new_ctx(VortexSession::empty())
+                    let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
                         .vortex_expect("failed to create execution context");
 
                     let encoded = for_array.encoded();
@@ -346,7 +346,7 @@ fn benchmark_for_u64(c: &mut Criterion) {
             &for_array,
             |b, for_array| {
                 b.iter_custom(|iters| {
-                    let mut cuda_ctx = CudaSession::new_ctx(VortexSession::empty())
+                    let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
                         .vortex_expect("failed to create execution context");
 
                     let encoded = for_array.encoded();
diff --git a/vortex-cuda/src/executor.rs b/vortex-cuda/src/executor.rs
index a2bab00c5dc..557774d2a10 100644
--- a/vortex-cuda/src/executor.rs
+++ b/vortex-cuda/src/executor.rs
@@ -17,20 +17,18 @@ use cudarc::driver::LaunchArgs;
 use cudarc::driver::result;
 use cudarc::driver::result::memcpy_htod_async;
 use cudarc::driver::sys;
-use cudarc::driver::sys::CUevent_flags;
 use futures::future::BoxFuture;
 use kanal::Sender;
 use result::stream;
 use vortex_array::Array;
 use vortex_array::ArrayRef;
 use vortex_array::Canonical;
-use vortex_array::VortexSessionExecute;
+use vortex_array::ExecutionCtx;
 use vortex_array::buffer::BufferHandle;
 use vortex_buffer::Buffer;
 use vortex_dtype::PType;
 use vortex_error::VortexResult;
 use vortex_error::vortex_err;
-use vortex_session::VortexSession;
 
 use crate::CudaDeviceBuffer;
 use crate::CudaSession;
@@ -115,109 +113,23 @@ pub struct CudaKernelEvents {
     pub after_launch: CudaEvent,
 }
 
-/// Convenience macro to launch a CUDA kernel.
-///
-/// The kernel gets launched on the stream of the execution context.
-///
-/// The kernel launch config:
-/// LaunchConfig {
-///     grid_dim: (array.len() / 2048, 1, 1),
-///     block_dim: (64, 1, 1),
-///     shared_mem_bytes: 0,
-/// };
-/// 64 threads are used per block which corresponds to 2 warps.
-/// Each block handles 2048 elements. Each thread handles 32 elements.
-/// The last block and thread are allowed to have less elements.
-///
-/// Note: A macro is necessary to unroll the launch builder arguments.
-///
-/// # Returns
-///
-/// A pair of CUDA events submitted before and after the kernel.
-/// Depending on `CUevent_flags` these events can contain timestamps. Use
-/// `CU_EVENT_DISABLE_TIMING` for minimal overhead and `CU_EVENT_DEFAULT` to
-/// enable timestamps.
-#[macro_export]
-macro_rules! launch_cuda_kernel {
-    (
-        execution_ctx: $ctx:expr,
-        module: $module:expr,
-        ptypes: $ptypes:expr,
-        launch_args: [$($arg:expr),* $(,)?],
-        event_recording: $event_recording:expr,
-        array_len: $len:expr
-    ) => {{
-        let cuda_function = $ctx.load_function($module, $ptypes)?;
-        let mut launch_builder = $ctx.launch_builder(&cuda_function);
-
-        $(
-            launch_builder.arg(&$arg);
-        )*
-
-        $crate::executor::launch_cuda_kernel_impl(&mut launch_builder, $event_recording, $len)?
-    }};
-}
-
-/// Launches a CUDA kernel with the passed launch builder.
-///
-/// # Arguments
-///
-/// * `launch_builder` - Configured launch builder
-/// * `array_len` - Length of the array to process
-///
-/// # Returns
-///
-/// A pair of CUDA events submitted before and after the kernel.
-/// Depending on `CUevent_flags` these events can contain timestamps. Use
-/// `CU_EVENT_DISABLE_TIMING` for minimal overhead and `CU_EVENT_DEFAULT` to
-/// enable timestamps.
-pub fn launch_cuda_kernel_impl(
-    launch_builder: &mut LaunchArgs,
-    event_flags: CUevent_flags,
-    array_len: usize,
-) -> VortexResult<CudaKernelEvents> {
-    let num_chunks = u32::try_from(array_len.div_ceil(2048))?;
-
-    let config = cudarc::driver::LaunchConfig {
-        grid_dim: (num_chunks, 1, 1),
-        block_dim: (64, 1, 1),
-        shared_mem_bytes: 0,
-    };
-
-    launch_builder.record_kernel_launch(event_flags);
-
-    unsafe {
-        launch_builder
-            .launch(config)
-            .map_err(|e| vortex_err!("Failed to launch kernel: {}", e))
-            .and_then(|events| {
-                events
-                    .ok_or_else(|| vortex_err!("CUDA events not recorded"))
-                    .map(|(before_launch, after_launch)| CudaKernelEvents {
-                        before_launch,
-                        after_launch,
-                    })
-            })
-    }
-}
-
 /// CUDA execution context.
 ///
 /// Provides access to the CUDA context and stream for kernel execution.
 /// Handles memory allocation and data transfers between host and device.
 pub struct CudaExecutionCtx {
     stream: Arc<CudaStream>,
-    vortex_session: VortexSession,
+    ctx: ExecutionCtx,
     cuda_session: CudaSession,
 }
 
 impl CudaExecutionCtx {
     /// Creates a new CUDA execution context.
-    pub(crate) fn new(stream: Arc<CudaStream>, vortex_session: VortexSession) -> Self {
-        let cuda_session = vortex_session.cuda_session().clone();
+    pub(crate) fn new(stream: Arc<CudaStream>, ctx: ExecutionCtx) -> Self {
+        let cuda_session = ctx.session().cuda_session().clone();
         Self {
             stream,
-            vortex_session,
+            ctx,
             cuda_session,
         }
     }
@@ -351,8 +263,8 @@ pub trait CudaArrayExt: Array {
 #[async_trait]
 impl CudaArrayExt for ArrayRef {
     async fn execute_cuda(self, ctx: &mut CudaExecutionCtx) -> VortexResult<Canonical> {
-        if self.is_canonical() {
-            return self.to_canonical();
+        if self.is_canonical() || self.is_empty() {
+            return self.execute(&mut ctx.ctx);
         }
 
         let Some(support) = ctx.cuda_session.kernel(&self.encoding_id()) else {
@@ -360,8 +272,7 @@ impl CudaArrayExt for ArrayRef {
                 encoding = %self.encoding_id(),
                 "No CUDA support registered for encoding, falling back to CPU execution"
             );
-            let mut array_ctx = ctx.vortex_session.create_execution_ctx();
-            return self.execute(&mut array_ctx);
+            return self.execute(&mut ctx.ctx);
         };
 
         tracing::debug!(
diff --git a/vortex-cuda/src/kernel/arrays/dict.rs b/vortex-cuda/src/kernel/arrays/dict.rs
new file mode 100644
index 00000000000..0f217001019
--- /dev/null
+++ b/vortex-cuda/src/kernel/arrays/dict.rs
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+use async_trait::async_trait;
+use vortex_array::ArrayRef;
+use vortex_array::Canonical;
+use vortex_array::arrays::DictVTable;
+use vortex_error::VortexExpect;
+use vortex_error::VortexResult;
+
+use crate::executor::CudaExecute;
+use crate::executor::CudaExecutionCtx;
+
+/// CUDA executor for dictionary-encoded arrays.
+#[derive(Debug)]
+pub struct DictExecutor;
+
+#[async_trait]
+impl CudaExecute for DictExecutor {
+    async fn execute(
+        &self,
+        array: ArrayRef,
+        _ctx: &mut CudaExecutionCtx,
+    ) -> VortexResult<Canonical> {
+        let _dict_array = array
+            .try_into::<DictVTable>()
+            .ok()
+            .vortex_expect("Array is not a Dict array");
+
+        todo!()
+    }
+}
diff --git a/vortex-cuda/src/kernel/arrays/mod.rs b/vortex-cuda/src/kernel/arrays/mod.rs
new file mode 100644
index 00000000000..6c9821b2308
--- /dev/null
+++ b/vortex-cuda/src/kernel/arrays/mod.rs
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+mod dict;
+pub use dict::DictExecutor;
diff --git a/vortex-cuda/src/for_.rs b/vortex-cuda/src/kernel/encodings/for_.rs
similarity index 85%
rename from vortex-cuda/src/for_.rs
rename to vortex-cuda/src/kernel/encodings/for_.rs
index 05bd9a61a0b..33d04be614f 100644
--- a/vortex-cuda/src/for_.rs
+++ b/vortex-cuda/src/kernel/encodings/for_.rs
@@ -5,7 +5,6 @@ use async_trait::async_trait;
 use cudarc::driver::DeviceRepr;
 use cudarc::driver::PushKernelArg;
 use cudarc::driver::sys::CUevent_flags::CU_EVENT_DISABLE_TIMING;
-use vortex_array::Array;
 use vortex_array::ArrayRef;
 use vortex_array::Canonical;
 use vortex_array::arrays::PrimitiveArray;
@@ -13,8 +12,8 @@ use vortex_dtype::NativePType;
 use vortex_dtype::match_each_native_simd_ptype;
 use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
-use vortex_error::vortex_err;
 use vortex_fastlanes::FoRArray;
+use vortex_fastlanes::FoRVTable;
 
 use crate::CudaBufferExt;
 use crate::executor::CudaArrayExt;
@@ -24,39 +23,32 @@ use crate::launch_cuda_kernel;
 
 /// CUDA executor for frame-of-reference.
 #[derive(Debug)]
-pub struct ForExecutor;
+pub struct FoRExecutor;
 
 #[async_trait]
-impl CudaExecute for ForExecutor {
+impl CudaExecute for FoRExecutor {
     async fn execute(
         &self,
         array: ArrayRef,
         ctx: &mut CudaExecutionCtx,
     ) -> VortexResult<Canonical> {
         let for_array = array
-            .as_any()
-            .downcast_ref::<FoRArray>()
-            .ok_or_else(|| vortex_err!("Array is not a FOR array"))?;
-
-        execute_for(for_array, ctx).await
+            .try_into::<FoRVTable>()
+            .ok()
+            .vortex_expect("Array is not a FOR array");
+
+        // Excludes f16 support.
+        match_each_native_simd_ptype!(for_array.ptype(), |T| {
+            execute_for_typed::<T>(for_array, ctx).await
+        })
     }
 }
 
-async fn execute_for(array: &FoRArray, ctx: &mut CudaExecutionCtx) -> VortexResult<Canonical> {
-    if array.is_empty() {
-        return array.to_array().to_canonical();
-    }
-
-    // Excludes f16 support.
-    match_each_native_simd_ptype!(array.ptype(), |T| {
-        execute_for_typed::<T>(array, ctx).await
-    })
-}
-
 async fn execute_for_typed<P: DeviceRepr + NativePType>(
-    array: &FoRArray,
+    array: FoRArray,
     ctx: &mut CudaExecutionCtx,
 ) -> VortexResult<Canonical> {
+    assert!(!array.is_empty());
     let reference = array
         .reference_scalar()
         .as_primitive()
@@ -114,7 +106,7 @@ mod tests {
             return;
         }
 
-        let mut cuda_ctx = CudaSession::new_ctx(VortexSession::empty())
+        let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
             .vortex_expect("failed to create execution context");
 
         // Create u8 offset values that cycle through 0-255, creating 5000 elements
@@ -128,7 +120,8 @@ mod tests {
         .vortex_expect("failed to create FoR array");
 
         // Decompress on the GPU.
-        let result = execute_for(&for_array, &mut cuda_ctx)
+        let result = FoRExecutor
+            .execute(for_array.to_array(), &mut cuda_ctx)
             .await
             .vortex_expect("GPU decompression failed");
 
@@ -150,7 +143,7 @@ mod tests {
             return;
         }
 
-        let mut cuda_ctx = CudaSession::new_ctx(VortexSession::empty())
+        let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
             .vortex_expect("failed to create execution context");
 
         // Create u16 offset values that cycle through 0-5000, creating 5000 elements
@@ -163,7 +156,8 @@ mod tests {
         .vortex_expect("failed to create FoR array");
 
         // Decompress on the GPU.
-        let result = execute_for(&for_array, &mut cuda_ctx)
+        let result = FoRExecutor
+            .execute(for_array.to_array(), &mut cuda_ctx)
             .await
             .vortex_expect("GPU decompression failed");
 
@@ -185,7 +179,7 @@ mod tests {
             return;
         }
 
-        let mut cuda_ctx = CudaSession::new_ctx(VortexSession::empty())
+        let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
             .vortex_expect("failed to create execution context");
 
         // Create u32 offset values that cycle through 0-5000, creating 5000 elements
@@ -198,7 +192,8 @@ mod tests {
         .vortex_expect("failed to create FoR array");
 
         // Decompress on the GPU.
-        let result = execute_for(&for_array, &mut cuda_ctx)
+        let result = FoRExecutor
+            .execute(for_array.to_array(), &mut cuda_ctx)
             .await
             .vortex_expect("GPU decompression failed");
 
@@ -220,7 +215,7 @@ mod tests {
             return;
         }
 
-        let mut cuda_ctx = CudaSession::new_ctx(VortexSession::empty())
+        let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
             .vortex_expect("failed to create execution context");
 
         // Create u64 offset values that cycle through 0-5000, creating 5000 elements
@@ -233,7 +228,8 @@ mod tests {
         .vortex_expect("failed to create FoR array");
 
         // Decompress on the GPU.
-        let result = execute_for(&for_array, &mut cuda_ctx)
+        let result = FoRExecutor
+            .execute(for_array.to_array(), &mut cuda_ctx)
             .await
             .vortex_expect("GPU decompression failed");
 
diff --git a/vortex-cuda/src/kernel/encodings/mod.rs b/vortex-cuda/src/kernel/encodings/mod.rs
new file mode 100644
index 00000000000..ae57c8649fd
--- /dev/null
+++ b/vortex-cuda/src/kernel/encodings/mod.rs
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+mod for_;
+pub use for_::FoRExecutor;
diff --git a/vortex-cuda/src/kernel.rs b/vortex-cuda/src/kernel/mod.rs
similarity index 53%
rename from vortex-cuda/src/kernel.rs
rename to vortex-cuda/src/kernel/mod.rs
index 01a896fc59b..a4efa4727ed 100644
--- a/vortex-cuda/src/kernel.rs
+++ b/vortex-cuda/src/kernel/mod.rs
@@ -12,12 +12,108 @@ use std::sync::Arc;
 use cudarc::driver::CudaContext;
 use cudarc::driver::CudaFunction;
 use cudarc::driver::CudaModule;
+use cudarc::driver::LaunchArgs;
+use cudarc::driver::sys::CUevent_flags;
 use cudarc::nvrtc::Ptx;
 use vortex_dtype::PType;
 use vortex_error::VortexResult;
 use vortex_error::vortex_err;
 use vortex_utils::aliases::dash_map::DashMap;
 
+mod arrays;
+mod encodings;
+
+pub use arrays::DictExecutor;
+pub use encodings::FoRExecutor;
+
+use crate::CudaKernelEvents;
+
+/// Convenience macro to launch a CUDA kernel.
+///
+/// The kernel gets launched on the stream of the execution context.
+///
+/// The kernel launch config:
+/// LaunchConfig {
+///     grid_dim: (array.len() / 2048, 1, 1),
+///     block_dim: (64, 1, 1),
+///     shared_mem_bytes: 0,
+/// };
+/// 64 threads are used per block which corresponds to 2 warps.
+/// Each block handles 2048 elements. Each thread handles 32 elements.
+/// The last block and thread are allowed to have less elements.
+///
+/// Note: A macro is necessary to unroll the launch builder arguments.
+///
+/// # Returns
+///
+/// A pair of CUDA events submitted before and after the kernel.
+/// Depending on `CUevent_flags` these events can contain timestamps. Use
+/// `CU_EVENT_DISABLE_TIMING` for minimal overhead and `CU_EVENT_DEFAULT` to
+/// enable timestamps.
+#[macro_export]
+macro_rules! launch_cuda_kernel {
+    (
+        execution_ctx: $ctx:expr,
+        module: $module:expr,
+        ptypes: $ptypes:expr,
+        launch_args: [$($arg:expr),* $(,)?],
+        event_recording: $event_recording:expr,
+        array_len: $len:expr
+    ) => {{
+        let cuda_function = $ctx.load_function($module, $ptypes)?;
+        let mut launch_builder = $ctx.launch_builder(&cuda_function);
+
+        $(
+            launch_builder.arg(&$arg);
+        )*
+
+        $crate::launch_cuda_kernel_impl(&mut launch_builder, $event_recording, $len)?
+    }};
+}
+
+/// Launches a CUDA kernel with the passed launch builder.
+///
+/// # Arguments
+///
+/// * `launch_builder` - Configured launch builder
+/// * `array_len` - Length of the array to process
+///
+/// # Returns
+///
+/// A pair of CUDA events submitted before and after the kernel.
+/// Depending on `CUevent_flags` these events can contain timestamps. Use
+/// `CU_EVENT_DISABLE_TIMING` for minimal overhead and `CU_EVENT_DEFAULT` to
+/// enable timestamps.
+pub fn launch_cuda_kernel_impl(
+    launch_builder: &mut LaunchArgs,
+    event_flags: CUevent_flags,
+    array_len: usize,
+) -> VortexResult<CudaKernelEvents> {
+    let num_chunks = u32::try_from(array_len.div_ceil(2048))?;
+
+    let config = cudarc::driver::LaunchConfig {
+        grid_dim: (num_chunks, 1, 1),
+        block_dim: (64, 1, 1),
+        shared_mem_bytes: 0,
+    };
+
+    launch_builder.record_kernel_launch(event_flags);
+
+    unsafe {
+        launch_builder
+            .launch(config)
+            .map_err(|e| vortex_err!("Failed to launch kernel: {}", e))
+            .and_then(|events| {
+                events
+                    .ok_or_else(|| vortex_err!("CUDA events not recorded"))
+                    .map(|(before_launch, after_launch)| CudaKernelEvents {
+                        before_launch,
+                        after_launch,
+                    })
+            })
+    }
+}
+
 /// Loader for CUDA kernels with PTX caching.
 ///
 /// Handles loading PTX files, compiling modules, and loading functions.
diff --git a/vortex-cuda/src/lib.rs b/vortex-cuda/src/lib.rs
index be6399a8c32..8075a70283a 100644
--- a/vortex-cuda/src/lib.rs
+++ b/vortex-cuda/src/lib.rs
@@ -5,7 +5,6 @@
 
 mod device_buffer;
 pub mod executor;
-mod for_;
 mod kernel;
 mod session;
 
@@ -15,8 +14,12 @@ pub use device_buffer::CudaBufferExt;
 pub use device_buffer::CudaDeviceBuffer;
 pub use executor::CudaExecutionCtx;
 pub use executor::CudaKernelEvents;
-use for_::ForExecutor;
+use kernel::DictExecutor;
+use kernel::FoRExecutor;
+pub use kernel::launch_cuda_kernel_impl;
 pub use session::CudaSession;
+use vortex_array::arrays::DictVTable;
+use vortex_fastlanes::FoRVTable;
 
 /// Check if the NVIDIA CUDA Compiler is available.
 pub fn has_nvcc() -> bool {
@@ -29,6 +32,6 @@ pub fn has_nvcc() -> bool {
 /// Registers CUDA kernels.
 pub fn initialize_cuda(session: &CudaSession) {
     tracing::info!("Registering CUDA kernels");
-    session.register_kernel("fastlanes.for".into(), &ForExecutor);
-    // TODO(0ax1): Register additional executors
+    session.register_kernel(FoRVTable::ID, &FoRExecutor);
+    session.register_kernel(DictVTable::ID, &DictExecutor);
 }
diff --git a/vortex-cuda/src/session.rs b/vortex-cuda/src/session.rs
index 6b33d10e753..1e33b5c3d5e 100644
--- a/vortex-cuda/src/session.rs
+++ b/vortex-cuda/src/session.rs
@@ -5,6 +5,7 @@ use std::fmt::Debug;
 use std::sync::Arc;
 
 use cudarc::driver::CudaContext;
+use vortex_array::VortexSessionExecute;
 use vortex_array::vtable::ArrayId;
 use vortex_dtype::PType;
 use vortex_error::VortexResult;
@@ -39,7 +40,7 @@ impl CudaSession {
     }
 
     /// Creates a new CUDA execution context.
-    pub fn new_ctx(
+    pub fn create_execution_ctx(
         vortex_session: vortex_session::VortexSession,
     ) -> VortexResult<CudaExecutionCtx> {
         let stream = vortex_session
@@ -47,7 +48,10 @@ impl CudaSession {
             .context
             .new_stream()
             .map_err(|e| vortex_err!("Failed to create CUDA stream: {}", e))?;
-        Ok(CudaExecutionCtx::new(stream, vortex_session))
+        Ok(CudaExecutionCtx::new(
+            stream,
+            vortex_session.create_execution_ctx(),
+        ))
     }
 
     /// Registers CUDA support for an array encoding.