Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
5598fc3
[INFRA] Share SBT compile artifact across pyspark and sparkr CI jobs
zhengruifeng May 7, 2026
f449cb7
[INFRA] Rename precompile-pyspark job to precompile
zhengruifeng May 7, 2026
00c3fc9
[INFRA] Switch precompile artifact compression from zstd to xz
zhengruifeng May 7, 2026
8c039ce
[INFRA] Switch precompile artifact compression from xz to bzip2
zhengruifeng May 7, 2026
bf0d8fc
[INFRA][TEMP] Restrict CI matrix to pyspark only for iteration
zhengruifeng May 7, 2026
ec44909
[INFRA] Drop sparkr from compile-reuse scope
zhengruifeng May 7, 2026
5901d81
[INFRA] Remove temporary precondition override
zhengruifeng May 7, 2026
ee5e084
[INFRA] Include pyspark-install in precompile job's gate
zhengruifeng May 7, 2026
9323883
[INFRA] Add precompile-cleanup job to delete artifact after pyspark f…
zhengruifeng May 7, 2026
9545921
Revert "[INFRA] Add precompile-cleanup job to delete artifact after p…
zhengruifeng May 7, 2026
d425f4e
[INFRA] Drop Free up disk space step from precompile job
zhengruifeng May 7, 2026
6781995
[INFRA] Make precompile optional with graceful fallback in pyspark ma…
zhengruifeng May 8, 2026
d842b93
[INFRA] Switch precompile artifact compression from bzip2 to gzip
zhengruifeng May 8, 2026
8eab5ca
[INFRA] Move SKIP_SCALA_BUILD export inside the Run tests step
zhengruifeng May 8, 2026
83d7ede
[INFRA] Add echo when SKIP_SCALA_BUILD is enabled
zhengruifeng May 8, 2026
17d1a25
[INFRA] Use explicit default for SKIP_SCALA_BUILD check in run-tests.py
zhengruifeng May 8, 2026
f9a79ac
[INFRA][TEMP] Restrict CI matrix to pyspark only for iteration
zhengruifeng May 8, 2026
a658f40
[INFRA][TEMP] Restrict CI matrix to pyspark only for iteration
zhengruifeng May 8, 2026
d602337
[INFRA] Remove TEMP precondition override and include branch in artif…
zhengruifeng May 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 91 additions & 1 deletion .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,80 @@ jobs:
cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-${{ env.PYSPARK_IMAGE_TO_TEST }}-cache:${{ inputs.branch }}


precompile:
needs: precondition
if: >-
(!cancelled()) && (
fromJson(needs.precondition.outputs.required).pyspark == 'true' ||
fromJson(needs.precondition.outputs.required).pyspark-pandas == 'true' ||
fromJson(needs.precondition.outputs.required).pyspark-install == 'true')
name: "Precompile Spark"
runs-on: ubuntu-latest
timeout-minutes: 60
# Optional optimization: if this job fails or is cancelled, the pyspark
# matrix entries fall back to running the SBT build locally as before.
continue-on-error: true
env:
HADOOP_PROFILE: ${{ inputs.hadoop }}
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v6
with:
fetch-depth: 0
repository: apache/spark
ref: ${{ inputs.branch }}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
- name: Cache SBT and Maven
uses: actions/cache@v5
with:
path: |
build/apache-maven-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v5
with:
path: ~/.cache/coursier
key: precompile-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
precompile-coursier-
- name: Install Java ${{ inputs.java }}
uses: actions/setup-java@v5
with:
distribution: zulu
java-version: ${{ inputs.java }}
- name: Build Spark
run: |
./build/sbt -Phadoop-3 -Pyarn -Pspark-ganglia-lgpl -Phadoop-cloud -Phive \
-Pkubernetes -Pjvm-profiler -Pkinesis-asl -Phive-thriftserver \
-Pdocker-integration-tests -Pvolcano \
Test/package streaming-kinesis-asl-assembly/assembly connect/assembly assembly/package
- name: Package compile output
run: |
find . -type d -name target -not -path './build/*' -not -path './.git/*' -print0 \
| tar --null -czf compile-artifact.tar.gz -T -
ls -lh compile-artifact.tar.gz
- name: Upload compile artifact
uses: actions/upload-artifact@v6
with:
name: spark-compile-${{ inputs.branch }}-${{ github.run_id }}
path: compile-artifact.tar.gz
retention-days: 1
if-no-files-found: error

pyspark:
needs: [precondition, infra-image]
needs: [precondition, infra-image, precompile]
# always run if pyspark == 'true', even infra-image is skip (such as non-master job)
if: (!cancelled()) && (fromJson(needs.precondition.outputs.required).pyspark == 'true' || fromJson(needs.precondition.outputs.required).pyspark-pandas == 'true')
name: "Build modules: ${{ matrix.modules }}"
Expand Down Expand Up @@ -659,11 +731,29 @@ jobs:
$py -m pip list
echo ""
done
- name: Download precompiled artifact
id: download-precompiled
if: needs.precompile.result == 'success'
continue-on-error: true
uses: actions/download-artifact@v6
with:
name: spark-compile-${{ inputs.branch }}-${{ github.run_id }}
- name: Extract precompiled artifact
id: extract-precompiled
if: steps.download-precompiled.outcome == 'success'
continue-on-error: true
run: |
tar -xzf compile-artifact.tar.gz
rm compile-artifact.tar.gz
# Run the tests.
- name: Run tests
env: ${{ fromJSON(inputs.envs) }}
shell: 'script -q -e -c "bash {0}"'
run: |
if [ "${{ steps.extract-precompiled.outcome }}" = "success" ]; then
export SKIP_SCALA_BUILD=true
echo "Reusing precompiled artifact, skipping local SBT build."
fi
if [[ "$MODULES_TO_TEST" == *"pyspark-pipelines"* ]]; then
export SKIP_PACKAGING=false
echo "Python Packaging Tests Enabled!"
Expand Down
6 changes: 4 additions & 2 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,8 @@ def main():
run_build_tests()

# spark build
build_apache_spark(build_tool, extra_profiles)
if os.environ.get("SKIP_SCALA_BUILD", "false") != "true":
build_apache_spark(build_tool, extra_profiles)

# backwards compatibility checks
if build_tool == "sbt":
Expand All @@ -653,7 +654,8 @@ def main():
detect_binary_inop_with_mima(extra_profiles)
# Since we did not build assembly/package before running dev/mima, we need to
# do it here because the tests still rely on it; see SPARK-13294 for details.
build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
if os.environ.get("SKIP_SCALA_BUILD", "false") != "true":
build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)

# run the test suites
run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags)
Expand Down