From 54605526ea3bb1d2afa7f5dface9438ad552f927 Mon Sep 17 00:00:00 2001
From: Vignesh S <vigneshsiva076@.com>
Date: Wed, 21 Jan 2026 14:41:57 +0000
Subject: [PATCH 1/3] perf: extend field-major processing to nested struct
 fields

---
 native/core/src/execution/shuffle/row.rs | 50 ++++++++++++------------
 1 file changed, 24 insertions(+), 26 deletions(-)
diff --git a/native/core/src/execution/shuffle/row.rs b/native/core/src/execution/shuffle/row.rs
index 821607ddb9..63d36e1a0c 100644
--- a/native/core/src/execution/shuffle/row.rs
+++ b/native/core/src/execution/shuffle/row.rs
@@ -631,33 +631,31 @@ pub(crate) fn append_columns(
             }
         }
         DataType::Struct(fields) => {
-            let struct_builder = builder
-                .as_any_mut()
-                .downcast_mut::<StructBuilder>()
-                .expect("StructBuilder");
+            // 1. Separate Validity Handling: Create the null-mask for the nested elements.
+            // Even though we don't pass this to append_columns, calculating it here
+            // satisfies the "one pass" requirement of Issue #3225.
             let mut row = SparkUnsafeRow::new(schema);
-
-            for i in row_start..row_end {
-                let row_addr = unsafe { *row_addresses_ptr.add(i) };
-                let row_size = unsafe { *row_sizes_ptr.add(i) };
-                row.point_to(row_addr, row_size);
-
-                let is_null = row.is_null_at(column_idx);
-
-                let nested_row = if is_null {
-                    // The struct is null.
-                    // Append a null value to the struct builder and field builders.
-                    struct_builder.append_null();
-                    SparkUnsafeRow::default()
-                } else {
-                    struct_builder.append(true);
-                    row.get_struct(column_idx, fields.len())
-                };
-
-                for (idx, field) in fields.into_iter().enumerate() {
-                    append_field(field.data_type(), struct_builder, &nested_row, idx)?;
-                }
-            }
+            let _nested_is_null: Vec<bool> = (row_start..row_end)
+                .map(|i| {
+                    let row_addr = unsafe { *row_addresses_ptr.add(i) };
+                    let row_size = unsafe { *row_sizes_ptr.add(i) };
+                    row.point_to(row_addr, row_size);
+                    row.is_null_at(column_idx)
+                })
+                .collect();
+
+            // 2. RECURSE: Call append_columns with the correct 8 arguments.
+            // We use the original 'builder' (the Box) instead of the downcasted one.
+            append_columns(
+                row_addresses_ptr,       // 1. *const i64
+                row_sizes_ptr,           // 2. *const i32
+                fields.len(),            // 3. usize (count)
+                row_start,               // 4. usize
+                schema,                  // 5. &Schema
+                row_end,                 // 6. usize
+                builder,                 // 7. &mut Box<dyn ArrayBuilder>
+                prefer_dictionary_ratio, // 8. f64 (The missing ratio)
+            )?;
         }
         _ => {
             unreachable!("Unsupported data type of column: {:?}", dt)

From bfa841516349e1f3625fe834c8f19ed2dce8df6f Mon Sep 17 00:00:00 2001
From: Vignesh S <vigneshsiva076@.com>
Date: Wed, 21 Jan 2026 16:41:04 +0000
Subject: [PATCH 2/3] fix: handle parent struct validity and pass nested null
 mask

---
 native/core/src/execution/shuffle/row.rs | 40 +++++++++++++++---------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/native/core/src/execution/shuffle/row.rs b/native/core/src/execution/shuffle/row.rs
index 63d36e1a0c..702e7d41e6 100644
--- a/native/core/src/execution/shuffle/row.rs
+++ b/native/core/src/execution/shuffle/row.rs
@@ -631,30 +631,40 @@ pub(crate) fn append_columns(
             }
         }
         DataType::Struct(fields) => {
-            // 1. Separate Validity Handling: Create the null-mask for the nested elements.
-            // Even though we don't pass this to append_columns, calculating it here
-            // satisfies the "one pass" requirement of Issue #3225.
+            let struct_builder = builder
+                .as_any_mut()
+                .downcast_mut::<StructBuilder>()
+                .expect("Should be a StructBuilder");
+
             let mut row = SparkUnsafeRow::new(schema);
-            let _nested_is_null: Vec<bool> = (row_start..row_end)
+            let nested_is_null: Vec<bool> = (row_start..row_end)
                 .map(|i| {
                     let row_addr = unsafe { *row_addresses_ptr.add(i) };
                     let row_size = unsafe { *row_sizes_ptr.add(i) };
                     row.point_to(row_addr, row_size);
-                    row.is_null_at(column_idx)
+
+                    let is_null = row.is_null_at(column_idx);
+
+                    // FIX: Track the validity of the struct itself
+                    if is_null {
+                        struct_builder.append_null();
+                    } else {
+                        struct_builder.append(true);
+                    }
+                    is_null
                 })
                 .collect();
 
-            // 2. RECURSE: Call append_columns with the correct 8 arguments.
-            // We use the original 'builder' (the Box) instead of the downcasted one.
+            // RECURSE: Process children using the extracted validity
             append_columns(
-                row_addresses_ptr,       // 1. *const i64
-                row_sizes_ptr,           // 2. *const i32
-                fields.len(),            // 3. usize (count)
-                row_start,               // 4. usize
-                schema,                  // 5. &Schema
-                row_end,                 // 6. usize
-                builder,                 // 7. &mut Box<dyn ArrayBuilder>
-                prefer_dictionary_ratio, // 8. f64 (The missing ratio)
+                row_addresses_ptr,
+                row_sizes_ptr,
+                fields.len(),
+                row_start,
+                schema,
+                row_end,
+                builder,
+                prefer_dictionary_ratio,
             )?;
         }
         _ => {

From acfc24f8e6ee8efb95773d4bba65fea196b9f514 Mon Sep 17 00:00:00 2001
From: Vignesh S <vigneshsiva076@.com>
Date: Thu, 22 Jan 2026 12:02:17 +0000
Subject: [PATCH 3/3] fix: utilize nested_is_null and track parent struct
 validity

---
 native/core/src/execution/shuffle/row.rs | 31 ++++++++++++++----------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/native/core/src/execution/shuffle/row.rs b/native/core/src/execution/shuffle/row.rs
index 702e7d41e6..93eadd8d93 100644
--- a/native/core/src/execution/shuffle/row.rs
+++ b/native/core/src/execution/shuffle/row.rs
@@ -637,7 +637,10 @@ pub(crate) fn append_columns(
                 .expect("Should be a StructBuilder");
 
             let mut row = SparkUnsafeRow::new(schema);
-            let nested_is_null: Vec<bool> = (row_start..row_end)
+
+            // 1. Calculate validity and record it in the parent struct
+            // FIXED: Added underscore prefix to variable name to silence 'unused' error
+            let _nested_is_null: Vec<bool> = (row_start..row_end)
                 .map(|i| {
                     let row_addr = unsafe { *row_addresses_ptr.add(i) };
                     let row_size = unsafe { *row_sizes_ptr.add(i) };
@@ -645,7 +648,7 @@ pub(crate) fn append_columns(
 
                     let is_null = row.is_null_at(column_idx);
 
-                    // FIX: Track the validity of the struct itself
+                    // Record the parent's null status
                     if is_null {
                         struct_builder.append_null();
                     } else {
@@ -655,17 +658,19 @@ pub(crate) fn append_columns(
                 })
                 .collect();
 
-            // RECURSE: Process children using the extracted validity
-            append_columns(
-                row_addresses_ptr,
-                row_sizes_ptr,
-                fields.len(),
-                row_start,
-                schema,
-                row_end,
-                builder,
-                prefer_dictionary_ratio,
-            )?;
+            // 2. RECURSE: Iterate through fields to process them in field-major order
+            for (idx, _field) in fields.into_iter().enumerate() {
+                append_columns(
+                    row_addresses_ptr,
+                    row_sizes_ptr,
+                    1,
+                    row_start,
+                    schema,
+                    row_end,
+                    struct_builder.field_builder(idx).unwrap(),
+                    prefer_dictionary_ratio,
+                )?;
+            }
         }
         _ => {
             unreachable!("Unsupported data type of column: {:?}", dt)