diff --git a/.github/workflows/ci-gpu.yml b/.github/workflows/ci-gpu.yml index f09edd85c6..69c9a58aab 100644 --- a/.github/workflows/ci-gpu.yml +++ b/.github/workflows/ci-gpu.yml @@ -11,6 +11,14 @@ on: permissions: contents: read +concurrency: + # Cancel previous runs of the same workflow/PR/ref. On non-PR events + # (workflow_dispatch, repository_dispatch) `pull_request.labels` is null; + # `contains(null, _)` returns false -> `!contains(...)` evaluates true, + # so cancellation defaults on. The `skip-cancel` PR label opts out on PRs. + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ !contains(github.event.pull_request.labels.*.name, 'skip-cancel') }} + env: UV_EXCLUDE_NEWER: "6 days" @@ -48,22 +56,6 @@ jobs: - name: Do nothing run: echo "Do nothing" - cancel_outstanding: - name: Detect and cancel outstanding runs of this workflow - # Temporarily disabled while gpu_public is unavailable and PR-D hardening is pending. - # Re-enable only after issue #1130 criteria are met. - if: ${{ vars.GRAPHISTRY_ENABLE_GPU_PUBLIC == 'true' }} # see #1130; disabled by default - runs-on: ubuntu-latest - permissions: - actions: write - timeout-minutes: 10 - steps: - - name: Cancel Previous Runs - if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip-cancel') }} - uses: styfle/cancel-workflow-action@0.11.0 - with: - access_token: ${{ github.token }} - test-full-ai: # Temporarily disabled while gpu_public is unavailable and PR-D hardening is pending. # Re-enable only after issue #1130 criteria are met. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c4bd2b737a..b6e43a45f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,60 +45,145 @@ jobs: - uses: actions/checkout@v4 with: persist-credentials: false - - uses: dorny/paths-filter@v3 + # full history needed so `git diff ..` resolves on PRs / non-trivial pushes + fetch-depth: 0 + - name: Compute path filters id: filter - with: - filters: | - # Infrastructure changes that affect all tests - infra: - - '.github/workflows/ci.yml' - - 'docker/**' - - 'bin/**' - - 'setup.py' - - 'setup.cfg' - - 'MANIFEST.in' - - # Python code changes - python: - - '**.py' - - 'graphistry/**' - - 'setup.py' - - 'setup.cfg' - - 'pytest.ini' - - 'mypy.ini' - - 'bin/lint.sh' - - 'bin/typecheck.sh' - - # GFQL-specific changes - gfql: - - 'graphistry/gfql/**' - - 'graphistry/compute/gfql/**' - - 'graphistry/compute/gfql_unified.py' - - 'graphistry/models/gfql/**' - - 'graphistry/Plottable.py' - - 'tests/gfql/**' - - # Cypher frontend gate relevant changes - cypher_frontend_ci: - - '.github/workflows/ci.yml' - - '.github/workflows/ci-gpu.yml' - - 'graphistry/compute/gfql/ir/**' - - 'graphistry/compute/gfql/cypher/**' - - 'graphistry/compute/gfql/frontends/cypher/**' - - 'graphistry/tests/compute/gfql/cypher/**' - - 'tests/gfql/ref/**' - - # Benchmark-specific changes - benchmarks: - - 'benchmarks/**' - - # Documentation changes - docs: - - 'docs/**' - - '**.md' - - '**.rst' - - 'demos/**' - - 'notebooks/**' + env: + EVENT_NAME: ${{ github.event_name }} + PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + PUSH_BEFORE: ${{ github.event.before }} + HEAD_SHA: ${{ github.sha }} + run: | + set -euo pipefail + keys=(infra python gfql cypher_frontend_ci benchmarks docs) + + emit_all() { + # $1 = "true" or "false" — emit the same value for every key + for k in "${keys[@]}"; do + echo "${k}=$1" >> "$GITHUB_OUTPUT" + done + } + + # Non diff-bearing events: rely on downstream `event_name == 'workflow_dispatch'` + # / `'schedule'` OR-conditions in job `if:` expressions. Emit false safely. + if [[ "$EVENT_NAME" != "pull_request" && "$EVENT_NAME" != "push" ]]; then + emit_all false + exit 0 + fi + + if [[ "$EVENT_NAME" == "pull_request" ]]; then + base="$PR_BASE_SHA" + head="$PR_HEAD_SHA" + else + base="$PUSH_BEFORE" + head="$HEAD_SHA" + fi + + # New branch / first push: zero-SHA base. Conservative: run everything. + if [[ -z "$base" || "$base" == "0000000000000000000000000000000000000000" ]]; then + emit_all true + exit 0 + fi + + # Diff vs the merge-base so PR runs see only commits that diverge from base + # (avoids picking up unrelated commits the base ref has gained meanwhile). + # If either resolution fails (e.g., force-pushed branch with orphaned + # event.before, or rebased PR base ref), fall back to the conservative + # "couldn't compute, run everything" stance and surface a GHA warning + # so operators can see why every job ran. Note: stderr is left + # un-redirected so the underlying git error (e.g., "Not a valid object + # name") shows up in the runner log alongside the warning. + if ! merge_base=$(git merge-base "$base" "$head"); then + echo "::warning::path-filter: git merge-base failed for $base..$head; running all gated jobs conservatively" + emit_all true + exit 0 + fi + if ! changed=$(git diff --name-only "$merge_base" "$head"); then + echo "::warning::path-filter: git diff failed for $merge_base..$head; running all gated jobs conservatively" + emit_all true + exit 0 + fi + + # emit ... — joins regex args with `|` and emits true/false. + # Each pattern goes on its own line at the call site for readability; + # this is the reason emit takes variadic args instead of a pre-joined string. + emit() { + local key="$1" + shift + local IFS='|' + local pat="$*" + # Use a here-string instead of `printf "%s\n" "$changed" | grep -qE`. + # Why: with `set -o pipefail`, if `grep -q` matches early on a $changed + # larger than the pipe buffer (~64 KB; thousands of files), `grep` exits + # before `printf` finishes flushing. `printf` then receives SIGPIPE and + # exits 141. `pipefail` propagates that 141 to the pipeline, the `if` + # sees non-zero, and we'd silently emit `false` for a key that should + # be `true`. A here-string is a pure redirection (not a pipeline), so + # the data is fully buffered before grep reads, eliminating the race. + if [[ -n "$changed" ]] && grep -qE "$pat" <<< "$changed"; then + echo "${key}=true" >> "$GITHUB_OUTPUT" + else + echo "${key}=false" >> "$GITHUB_OUTPUT" + fi + } + + # Filter dimensions — patterns mirror the prior dorny/paths-filter YAML + # globs converted to anchored regex. Keep one pattern per line so adds + # / removes diff cleanly and the conversion stays auditable. + + # Infrastructure: workflow defs, docker, bin scripts, root build config. + emit infra \ + '^\.github/workflows/ci\.yml$' \ + '^docker/' \ + '^bin/' \ + '^setup\.py$' \ + '^setup\.cfg$' \ + '^MANIFEST\.in$' + + # Python code + lint/type config. + emit python \ + '\.py$' \ + '^graphistry/' \ + '^setup\.py$' \ + '^setup\.cfg$' \ + '^pytest\.ini$' \ + '^mypy\.ini$' \ + '^bin/lint\.sh$' \ + '^bin/typecheck\.sh$' + + # GFQL core + tests. + emit gfql \ + '^graphistry/gfql/' \ + '^graphistry/compute/gfql/' \ + '^graphistry/compute/gfql_unified\.py$' \ + '^graphistry/models/gfql/' \ + '^graphistry/Plottable\.py$' \ + '^tests/gfql/' + + # Cypher frontend gate-relevant paths (workflow defs included so a CI + # change here re-triggers the cypher gate alongside gfql/infra). + emit cypher_frontend_ci \ + '^\.github/workflows/ci\.yml$' \ + '^\.github/workflows/ci-gpu\.yml$' \ + '^graphistry/compute/gfql/ir/' \ + '^graphistry/compute/gfql/cypher/' \ + '^graphistry/compute/gfql/frontends/cypher/' \ + '^graphistry/tests/compute/gfql/cypher/' \ + '^tests/gfql/ref/' + + # Benchmarks suite. + emit benchmarks \ + '^benchmarks/' + + # Documentation: docs tree + any md/rst at any depth + demos + notebooks. + emit docs \ + '^docs/' \ + '\.md$' \ + '\.rst$' \ + '^demos/' \ + '^notebooks/' - name: Detect docs-only change on tip id: docs_only_latest @@ -391,6 +476,7 @@ jobs: uses: actions/checkout@v4 with: lfs: true + persist-credentials: false - name: Download lockfiles uses: actions/download-artifact@v4 @@ -441,6 +527,7 @@ jobs: uses: actions/checkout@v4 with: lfs: true + persist-credentials: false - name: Download lockfiles uses: actions/download-artifact@v4 @@ -1237,6 +1324,7 @@ jobs: - uses: actions/checkout@v4 with: lfs: true + persist-credentials: false - name: Set up Python 3.14 uses: actions/setup-python@v5 diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index e1581e0158..a7b2ea7ee6 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -41,6 +41,7 @@ jobs: uses: actions/checkout@v4 with: lfs: true + persist-credentials: false - name: Checkout LFS objects run: git lfs pull diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index d273ea7aa6..42d825e0c4 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -37,6 +37,7 @@ jobs: - uses: actions/checkout@v4 with: # fetch tag for versioneer fetch-depth: 0 + persist-credentials: false - name: Verify trusted release trigger run: | diff --git a/.github/workflows/workflow-security.yml b/.github/workflows/workflow-security.yml index ef21b6b616..4d92ecf0ff 100644 --- a/.github/workflows/workflow-security.yml +++ b/.github/workflows/workflow-security.yml @@ -68,6 +68,9 @@ jobs: python3 -m venv .venv-zizmor source .venv-zizmor/bin/activate uv pip install --upgrade zizmor - - name: Run zizmor (high severity gate) + - name: Run zizmor (medium severity gate) + # Lowered from `high` to `medium` to enable enforcement of the + # `unpinned-uses` provider safelist (see #1130 / #1215). zizmor's + # `unpinned-uses` rule emits at medium severity for policy violations. run: | - .venv-zizmor/bin/zizmor --format=github --no-progress --config .github/zizmor.yml --min-severity high .github/workflows/*.yml + .venv-zizmor/bin/zizmor --format=github --no-progress --config .github/zizmor.yml --min-severity medium .github/workflows/*.yml diff --git a/.github/zizmor.yml b/.github/zizmor.yml index 2fe494c600..ae00adcbd2 100644 --- a/.github/zizmor.yml +++ b/.github/zizmor.yml @@ -1,3 +1,20 @@ rules: unpinned-uses: - disable: true + config: + # Provider safelist for `uses:` references. See umbrella #1130 + #1215. + # + # Trusted orgs (allowed on floating refs): + # - `actions/*` — GitHub-owned core + # - `github/*` — GitHub-owned extended (codeql-action etc.) + # - `pypa/*` — PyPA-maintained, including `gh-action-pypi-publish` + # + # Anything outside the safelist must pin to a full commit SHA. + # + # Decision context: SHA-pinning trusted-org actions trades fast CVE patching + # for marginal mutable-tag protection that the orgs' account security already + # provides. See #1215 body for the full rationale. + policies: + "actions/*": ref-pin + "github/*": ref-pin + "pypa/*": ref-pin + "*": hash-pin diff --git a/CHANGELOG.md b/CHANGELOG.md index aeafad4eba..d6e9760c50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Infrastructure +- **CI / supply-chain hardening (PR-G 7a)**: Deprecated all third-party GitHub Actions in favor of native primitives — `styfle/cancel-workflow-action` replaced by a workflow-level `concurrency:` block in `ci-gpu.yml`; `dorny/paths-filter` replaced by an inline `git diff --name-only` shell filter in `ci.yml` (preserves all 6 outputs: `infra`, `python`, `gfql`, `cypher_frontend_ci`, `benchmarks`, `docs`). Configured zizmor `unpinned-uses` rule with a provider safelist (`actions/*`, `github/*`, `pypa/*` permitted on floating refs; everything else must hash-pin) and lowered the workflow-security gate from `--min-severity high` to `medium` so policy violations actually fail CI. Closed 5 PR-B follow-up gaps by adding `persist-credentials: false` to LFS / fetch-depth checkouts in `ci.yml` (×3), `codeql-analysis.yml`, and `publish-pypi.yml`. Also fixed a silent fail-closed bug surfaced during multi-wave review: when `git merge-base` cannot resolve (force-pushed branch with orphaned `event.before`, or PR base rebased mid-flight), the `changes` job now conservatively emits `true` for every output and downstream jobs run, instead of silently emitting `false` and skipping all gated tests (#1221, #1215, #1130). - **CI / docs**: `test-readme` no longer runs `actions/setup-python` with an EOL Python 3.8 pin. The job now runs markdown lint directly via its Docker image, removing an unnecessary setup step and avoiding intermittent Python toolcache fetch timeouts. - **CI / build lane**: `test-build` now runs on Python 3.14 with `build-py3.14.lock` instead of a fixed Python 3.8 runner, reducing reliance on EOL interpreter setup while preserving explicit 3.8 compatibility test lanes elsewhere in CI. - **CI / token hardening**: CI workflows now declare explicit least-privilege default token scope (`permissions: contents: read`) and set `persist-credentials: false` on all checkout steps in `ci.yml` and `ci-gpu.yml`; GPU cancel job keeps a scoped `actions: write` override for run cancellation (#1130).