diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml index 64d4862276..6105962161 100644 --- a/.github/actions/java-test/action.yaml +++ b/.github/actions/java-test/action.yaml @@ -37,11 +37,16 @@ inputs: description: 'Whether to upload test results including coverage to GitHub' required: false default: 'false' + skip-native-build: + description: 'Skip native build (when using pre-built artifact)' + required: false + default: 'false' runs: using: "composite" steps: - name: Run Cargo release build + if: ${{ inputs.skip-native-build != 'true' }} shell: bash # it is important that we run the Scala tests against a release build rather than a debug build # to make sure that no tests are relying on overflow checks that are present only in debug builds diff --git a/.github/actions/setup-spark-builder/action.yaml b/.github/actions/setup-spark-builder/action.yaml index 68f5170c4b..99bdc9bb88 100644 --- a/.github/actions/setup-spark-builder/action.yaml +++ b/.github/actions/setup-spark-builder/action.yaml @@ -24,6 +24,10 @@ inputs: spark-version: description: 'The Apache Spark version (e.g., 3.5.7) to build' required: true + skip-native-build: + description: 'Skip native build (when using pre-built artifact)' + required: false + default: 'false' runs: using: "composite" steps: @@ -51,7 +55,15 @@ runs: restore-keys: | ${{ runner.os }}-spark-sql- - - name: Build Comet + - name: Build Comet (with native) + if: ${{ inputs.skip-native-build != 'true' }} shell: bash run: | PROFILES="-Pspark-${{inputs.spark-short-version}}" make release + + - name: Build Comet (Maven only, skip native) + if: ${{ inputs.skip-native-build == 'true' }} + shell: bash + run: | + # Native library should already be in native/target/release/ + ./mvnw install -Prelease -DskipTests -Pspark-${{inputs.spark-short-version}} diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml index 8e4dc5124b..53ba7abc9c 100644 --- a/.github/workflows/pr_build_linux.yml +++ b/.github/workflows/pr_build_linux.yml @@ -46,8 +46,48 @@ env: RUST_VERSION: stable jobs: - - # Run Rust tests once per JDK version + + # Build native library once and share with all test jobs + build-native: + name: Build Native Library + runs-on: ubuntu-latest + container: + image: amd64/rust + steps: + - uses: actions/checkout@v6 + + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ env.RUST_VERSION }} + jdk-version: 17 # JDK only needed for common module proto generation + + - name: Cache Cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + native/target + key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }} + restore-keys: | + ${{ runner.os }}-cargo-ci- + + - name: Build native library (CI profile) + run: | + cd native + # CI profile: same overflow behavior as release, but faster compilation + # (no LTO, parallel codegen) + cargo build --profile ci + + - name: Upload native library + uses: actions/upload-artifact@v4 + with: + name: native-lib-linux + path: native/target/ci/libcomet.so + retention-days: 1 + + # Run Rust tests (runs in parallel with build-native, uses debug builds) linux-test-rust: strategy: matrix: @@ -60,15 +100,29 @@ jobs: image: amd64/rust steps: - uses: actions/checkout@v6 + - name: Setup Rust & Java toolchain uses: ./.github/actions/setup-builder with: - rust-version: ${{env.RUST_VERSION}} + rust-version: ${{ env.RUST_VERSION }} jdk-version: ${{ matrix.java_version }} + + - name: Cache Cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + native/target + key: ${{ runner.os }}-cargo-debug-java${{ matrix.java_version }}-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }} + restore-keys: | + ${{ runner.os }}-cargo-debug-java${{ matrix.java_version }}- + - name: Rust test steps uses: ./.github/actions/rust-test - + linux-test: + needs: build-native strategy: matrix: os: [ubuntu-latest] @@ -186,11 +240,31 @@ jobs: steps: - uses: actions/checkout@v6 + - name: Setup Rust & Java toolchain uses: ./.github/actions/setup-builder with: - rust-version: ${{env.RUST_VERSION}} + rust-version: ${{ env.RUST_VERSION }} jdk-version: ${{ matrix.profile.java_version }} + + - name: Download native library + uses: actions/download-artifact@v4 + with: + name: native-lib-linux + # Download to release/ since Maven's -Prelease expects libcomet.so there + path: native/target/release/ + + # Restore cargo registry cache (for any cargo commands that might run) + - name: Cache Cargo registry + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('native/**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-registry- + - name: Java test steps uses: ./.github/actions/java-test with: @@ -199,3 +273,4 @@ jobs: maven_opts: ${{ matrix.profile.maven_opts }} scan_impl: ${{ matrix.profile.scan_impl }} upload-test-reports: true + skip-native-build: true diff --git a/.github/workflows/pr_build_macos.yml b/.github/workflows/pr_build_macos.yml index f94071dbc7..88dc74cdb5 100644 --- a/.github/workflows/pr_build_macos.yml +++ b/.github/workflows/pr_build_macos.yml @@ -47,7 +47,48 @@ env: jobs: + # Build native library once and share with all test jobs + build-native: + name: Build Native Library (macOS) + runs-on: macos-14 + steps: + - uses: actions/checkout@v6 + + - name: Setup Rust & Java toolchain + uses: ./.github/actions/setup-macos-builder + with: + rust-version: ${{ env.RUST_VERSION }} + jdk-version: 17 + jdk-architecture: aarch64 + protoc-architecture: aarch_64 + + - name: Cache Cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + native/target + key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }} + restore-keys: | + ${{ runner.os }}-cargo-ci- + + - name: Build native library (CI profile) + run: | + cd native + # CI profile: same overflow behavior as release, but faster compilation + # (no LTO, parallel codegen) + cargo build --profile ci + + - name: Upload native library + uses: actions/upload-artifact@v4 + with: + name: native-lib-macos + path: native/target/ci/libcomet.dylib + retention-days: 1 + macos-aarch64-test: + needs: build-native strategy: matrix: os: [macos-14] @@ -145,13 +186,33 @@ jobs: runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v6 + - name: Setup Rust & Java toolchain uses: ./.github/actions/setup-macos-builder with: - rust-version: ${{env.RUST_VERSION}} + rust-version: ${{ env.RUST_VERSION }} jdk-version: ${{ matrix.profile.java_version }} jdk-architecture: aarch64 protoc-architecture: aarch_64 + + - name: Download native library + uses: actions/download-artifact@v4 + with: + name: native-lib-macos + # Download to release/ since Maven's -Prelease expects libcomet.dylib there + path: native/target/release/ + + # Restore cargo registry cache (for any cargo commands that might run) + - name: Cache Cargo registry + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('native/**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-registry- + - name: Set thread thresholds envs for spark test on macOS # see: https://github.com/apache/datafusion-comet/issues/2965 shell: bash @@ -160,9 +221,11 @@ jobs: echo "SPARK_TEST_SQL_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=256" >> $GITHUB_ENV echo "SPARK_TEST_HIVE_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV echo "SPARK_TEST_HIVE_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV + - name: Java test steps uses: ./.github/actions/java-test with: artifact_name: ${{ matrix.os }}-${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} suites: ${{ matrix.suite.name == 'sql' && matrix.profile.name == 'Spark 3.4, JDK 11, Scala 2.12' && '' || matrix.suite.value }} maven_opts: ${{ matrix.profile.maven_opts }} + skip-native-build: true diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 2fe5fefe1a..955fc69279 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -52,7 +52,47 @@ env: RUST_VERSION: stable jobs: + + # Build native library once and share with all test jobs + build-native: + name: Build Native Library + runs-on: ubuntu-24.04 + container: + image: amd64/rust + steps: + - uses: actions/checkout@v6 + + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ env.RUST_VERSION }} + jdk-version: 17 + + - name: Cache Cargo + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + native/target + key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }} + restore-keys: | + ${{ runner.os }}-cargo-ci- + + - name: Build native library (CI profile) + run: | + cd native + cargo build --profile ci + + - name: Upload native library + uses: actions/upload-artifact@v4 + with: + name: native-lib-linux + path: native/target/ci/libcomet.so + retention-days: 1 + spark-sql-auto-scan: + needs: build-native strategy: matrix: os: [ubuntu-24.04] @@ -81,11 +121,17 @@ jobs: with: rust-version: ${{env.RUST_VERSION}} jdk-version: ${{ matrix.spark-version.java }} + - name: Download native library + uses: actions/download-artifact@v4 + with: + name: native-lib-linux + path: native/target/release/ - name: Setup Spark uses: ./.github/actions/setup-spark-builder with: spark-version: ${{ matrix.spark-version.full }} spark-short-version: ${{ matrix.spark-version.short }} + skip-native-build: true - name: Run Spark tests run: | cd apache-spark @@ -105,6 +151,7 @@ jobs: path: "**/fallback.log" spark-sql-native-native-comet: + needs: build-native strategy: matrix: os: [ ubuntu-24.04 ] @@ -130,11 +177,17 @@ jobs: with: rust-version: ${{env.RUST_VERSION}} jdk-version: ${{ matrix.java-version }} + - name: Download native library + uses: actions/download-artifact@v4 + with: + name: native-lib-linux + path: native/target/release/ - name: Setup Spark uses: ./.github/actions/setup-spark-builder with: spark-version: ${{ matrix.spark-version.full }} spark-short-version: ${{ matrix.spark-version.short }} + skip-native-build: true - name: Run Spark tests run: | cd apache-spark @@ -154,6 +207,7 @@ jobs: path: "**/fallback.log" spark-sql-native-iceberg-compat: + needs: build-native strategy: matrix: os: [ubuntu-24.04] @@ -179,11 +233,17 @@ jobs: with: rust-version: ${{env.RUST_VERSION}} jdk-version: ${{ matrix.java-version }} + - name: Download native library + uses: actions/download-artifact@v4 + with: + name: native-lib-linux + path: native/target/release/ - name: Setup Spark uses: ./.github/actions/setup-spark-builder with: spark-version: ${{ matrix.spark-version.full }} spark-short-version: ${{ matrix.spark-version.short }} + skip-native-build: true - name: Run Spark tests run: | cd apache-spark diff --git a/native/Cargo.toml b/native/Cargo.toml index 9d1632e781..bb6cc9875d 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -62,3 +62,11 @@ overflow-checks = false lto = "thin" codegen-units = 1 strip = "debuginfo" + +# CI profile: faster compilation, same overflow behavior as release +# Use with: cargo build --profile ci +[profile.ci] +inherits = "release" +lto = false # Skip LTO for faster linking +codegen-units = 16 # Parallel codegen (faster compile, slightly larger binary) +# overflow-checks inherited as false from release