Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/actions/java-test/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,16 @@ inputs:
description: 'Whether to upload test results including coverage to GitHub'
required: false
default: 'false'
skip-native-build:
description: 'Skip native build (when using pre-built artifact)'
required: false
default: 'false'

runs:
using: "composite"
steps:
- name: Run Cargo release build
if: ${{ inputs.skip-native-build != 'true' }}
shell: bash
# it is important that we run the Scala tests against a release build rather than a debug build
# to make sure that no tests are relying on overflow checks that are present only in debug builds
Expand Down
14 changes: 13 additions & 1 deletion .github/actions/setup-spark-builder/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ inputs:
spark-version:
description: 'The Apache Spark version (e.g., 3.5.7) to build'
required: true
skip-native-build:
description: 'Skip native build (when using pre-built artifact)'
required: false
default: 'false'
runs:
using: "composite"
steps:
Expand Down Expand Up @@ -51,7 +55,15 @@ runs:
restore-keys: |
${{ runner.os }}-spark-sql-

- name: Build Comet
- name: Build Comet (with native)
if: ${{ inputs.skip-native-build != 'true' }}
shell: bash
run: |
PROFILES="-Pspark-${{inputs.spark-short-version}}" make release

- name: Build Comet (Maven only, skip native)
if: ${{ inputs.skip-native-build == 'true' }}
shell: bash
run: |
# Native library should already be in native/target/release/
./mvnw install -Prelease -DskipTests -Pspark-${{inputs.spark-short-version}}
85 changes: 80 additions & 5 deletions .github/workflows/pr_build_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,48 @@ env:
RUST_VERSION: stable

jobs:

# Run Rust tests once per JDK version

# Build native library once and share with all test jobs
build-native:
name: Build Native Library
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v6

- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ env.RUST_VERSION }}
jdk-version: 17 # JDK only needed for common module proto generation

- name: Cache Cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
native/target
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
restore-keys: |
${{ runner.os }}-cargo-ci-

- name: Build native library (CI profile)
run: |
cd native
# CI profile: same overflow behavior as release, but faster compilation
# (no LTO, parallel codegen)
cargo build --profile ci

- name: Upload native library
uses: actions/upload-artifact@v4
with:
name: native-lib-linux
path: native/target/ci/libcomet.so
retention-days: 1

# Run Rust tests (runs in parallel with build-native, uses debug builds)
linux-test-rust:
strategy:
matrix:
Expand All @@ -60,15 +100,29 @@ jobs:
image: amd64/rust
steps:
- uses: actions/checkout@v6

- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{env.RUST_VERSION}}
rust-version: ${{ env.RUST_VERSION }}
jdk-version: ${{ matrix.java_version }}

- name: Cache Cargo
uses: actions/cache@v4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use ${{ runner.os }}-cargo-ci- generated by build-native here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The linux-test-rust job uses debug builds (cargo test) while build-native uses the CI profile, so their native/target artifacts are incompatible. Sharing the same cache key would cause cache thrashing, where each job overwrites the other's compiled artifacts, if I'm understanding this correctly.

with:
path: |
~/.cargo/registry
~/.cargo/git
native/target
key: ${{ runner.os }}-cargo-debug-java${{ matrix.java_version }}-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
restore-keys: |
${{ runner.os }}-cargo-debug-java${{ matrix.java_version }}-

- name: Rust test steps
uses: ./.github/actions/rust-test

linux-test:
needs: build-native
strategy:
matrix:
os: [ubuntu-latest]
Expand Down Expand Up @@ -186,11 +240,31 @@ jobs:

steps:
- uses: actions/checkout@v6

- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{env.RUST_VERSION}}
rust-version: ${{ env.RUST_VERSION }}
jdk-version: ${{ matrix.profile.java_version }}

- name: Download native library
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

uses: actions/download-artifact@v4
with:
name: native-lib-linux
# Download to release/ since Maven's -Prelease expects libcomet.so there
path: native/target/release/

# Restore cargo registry cache (for any cargo commands that might run)
- name: Cache Cargo registry
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('native/**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-registry-

- name: Java test steps
uses: ./.github/actions/java-test
with:
Expand All @@ -199,3 +273,4 @@ jobs:
maven_opts: ${{ matrix.profile.maven_opts }}
scan_impl: ${{ matrix.profile.scan_impl }}
upload-test-reports: true
skip-native-build: true
65 changes: 64 additions & 1 deletion .github/workflows/pr_build_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,48 @@ env:

jobs:

# Build native library once and share with all test jobs
build-native:
name: Build Native Library (macOS)
runs-on: macos-14
steps:
- uses: actions/checkout@v6

- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-macos-builder
with:
rust-version: ${{ env.RUST_VERSION }}
jdk-version: 17
jdk-architecture: aarch64
protoc-architecture: aarch_64

- name: Cache Cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
native/target
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
restore-keys: |
${{ runner.os }}-cargo-ci-

- name: Build native library (CI profile)
run: |
cd native
# CI profile: same overflow behavior as release, but faster compilation
# (no LTO, parallel codegen)
cargo build --profile ci

- name: Upload native library
uses: actions/upload-artifact@v4
with:
name: native-lib-macos
path: native/target/ci/libcomet.dylib
retention-days: 1

macos-aarch64-test:
needs: build-native
strategy:
matrix:
os: [macos-14]
Expand Down Expand Up @@ -145,13 +186,33 @@ jobs:
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v6

- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-macos-builder
with:
rust-version: ${{env.RUST_VERSION}}
rust-version: ${{ env.RUST_VERSION }}
jdk-version: ${{ matrix.profile.java_version }}
jdk-architecture: aarch64
protoc-architecture: aarch_64

- name: Download native library
uses: actions/download-artifact@v4
with:
name: native-lib-macos
# Download to release/ since Maven's -Prelease expects libcomet.dylib there
path: native/target/release/

# Restore cargo registry cache (for any cargo commands that might run)
- name: Cache Cargo registry
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('native/**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-registry-

- name: Set thread thresholds envs for spark test on macOS
# see: https://github.com/apache/datafusion-comet/issues/2965
shell: bash
Expand All @@ -160,9 +221,11 @@ jobs:
echo "SPARK_TEST_SQL_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=256" >> $GITHUB_ENV
echo "SPARK_TEST_HIVE_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV
echo "SPARK_TEST_HIVE_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV

- name: Java test steps
uses: ./.github/actions/java-test
with:
artifact_name: ${{ matrix.os }}-${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
suites: ${{ matrix.suite.name == 'sql' && matrix.profile.name == 'Spark 3.4, JDK 11, Scala 2.12' && '' || matrix.suite.value }}
maven_opts: ${{ matrix.profile.maven_opts }}
skip-native-build: true
60 changes: 60 additions & 0 deletions .github/workflows/spark_sql_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,47 @@ env:
RUST_VERSION: stable

jobs:

# Build native library once and share with all test jobs
build-native:
name: Build Native Library
runs-on: ubuntu-24.04
container:
image: amd64/rust
steps:
- uses: actions/checkout@v6

- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ env.RUST_VERSION }}
jdk-version: 17

- name: Cache Cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
native/target
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
restore-keys: |
${{ runner.os }}-cargo-ci-

- name: Build native library (CI profile)
run: |
cd native
cargo build --profile ci

- name: Upload native library
uses: actions/upload-artifact@v4
with:
name: native-lib-linux
path: native/target/ci/libcomet.so
retention-days: 1

spark-sql-auto-scan:
needs: build-native
strategy:
matrix:
os: [ubuntu-24.04]
Expand Down Expand Up @@ -81,11 +121,17 @@ jobs:
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.spark-version.java }}
- name: Download native library
uses: actions/download-artifact@v4
with:
name: native-lib-linux
path: native/target/release/
- name: Setup Spark
uses: ./.github/actions/setup-spark-builder
with:
spark-version: ${{ matrix.spark-version.full }}
spark-short-version: ${{ matrix.spark-version.short }}
skip-native-build: true
- name: Run Spark tests
run: |
cd apache-spark
Expand All @@ -105,6 +151,7 @@ jobs:
path: "**/fallback.log"

spark-sql-native-native-comet:
needs: build-native
strategy:
matrix:
os: [ ubuntu-24.04 ]
Expand All @@ -130,11 +177,17 @@ jobs:
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java-version }}
- name: Download native library
uses: actions/download-artifact@v4
with:
name: native-lib-linux
path: native/target/release/
- name: Setup Spark
uses: ./.github/actions/setup-spark-builder
with:
spark-version: ${{ matrix.spark-version.full }}
spark-short-version: ${{ matrix.spark-version.short }}
skip-native-build: true
- name: Run Spark tests
run: |
cd apache-spark
Expand All @@ -154,6 +207,7 @@ jobs:
path: "**/fallback.log"

spark-sql-native-iceberg-compat:
needs: build-native
strategy:
matrix:
os: [ubuntu-24.04]
Expand All @@ -179,11 +233,17 @@ jobs:
with:
rust-version: ${{env.RUST_VERSION}}
jdk-version: ${{ matrix.java-version }}
- name: Download native library
uses: actions/download-artifact@v4
with:
name: native-lib-linux
path: native/target/release/
- name: Setup Spark
uses: ./.github/actions/setup-spark-builder
with:
spark-version: ${{ matrix.spark-version.full }}
spark-short-version: ${{ matrix.spark-version.short }}
skip-native-build: true
- name: Run Spark tests
run: |
cd apache-spark
Expand Down
8 changes: 8 additions & 0 deletions native/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,11 @@ overflow-checks = false
lto = "thin"
codegen-units = 1
strip = "debuginfo"

# CI profile: faster compilation, same overflow behavior as release
# Use with: cargo build --profile ci
[profile.ci]
inherits = "release"
lto = false # Skip LTO for faster linking
codegen-units = 16 # Parallel codegen (faster compile, slightly larger binary)
# overflow-checks inherited as false from release
Loading