From 5b5781df3620cc82a9ebff7dbb8275eacbff948d Mon Sep 17 00:00:00 2001
From: Daniel Ecer <de-code@users.noreply.github.com>
Date: Tue, 26 May 2026 18:15:02 +0100
Subject: [PATCH 1/4] Update separate eval-predictions repo with results

---
 .github/workflows/benchmark.yml | 34 +++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index c90f9bec..b5edba34 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -130,6 +130,40 @@ jobs:
             --parser-url http://localhost:8080 \
             --parser-image "${{ steps.image_tag.outputs.value }}"
 
+      - name: Checkout predictions repo
+        uses: actions/checkout@v5
+        with:
+          repository: elifepathways/sciencebeam-eval-predictions
+          path: sciencebeam-eval-predictions
+          token: ${{ secrets.PREDICTIONS_REPO_PAT }}
+          fetch-depth: 1
+          filter: blob:none
+          sparse-checkout: .gitignore
+          sparse-checkout-cone-mode: false
+
+      - name: Push ScienceBeam Parser predictions
+        # TODO: restore `if: github.ref == 'refs/heads/main'` after testing
+        env:
+          IMAGE_TAG: ${{ steps.image_tag.outputs.value }}
+        run: |
+          VERSION="${IMAGE_TAG#sciencebeam-parser:}"
+          DEST="sciencebeam-eval-predictions/sciencebeam-parser/${VERSION}/${{ env.BENCHMARK_SPLIT }}"
+          mkdir -p "${DEST}"
+          cp -r "${{ env.BENCHMARK_RUN }}/predictions/." "${DEST}/"
+          DATE=$(date --utc +%Y-%m-%dT%H:%M:%SZ)
+          echo "{\"tool\":\"sciencebeam-parser\",\"version\":\"${VERSION}\",\"split\":\"${{ env.BENCHMARK_SPLIT }}\",\"mode\":\"${{ env.BENCHMARK_MODE }}\",\"generated_at\":\"${DATE}\"}" \
+            | python3 -m json.tool > "${DEST}/metadata.json"
+          cd sciencebeam-eval-predictions
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add .
+          if git diff --cached --quiet; then
+            echo "No new predictions to push"
+          else
+            git commit -m "Add sciencebeam-parser/${VERSION} ${{ env.BENCHMARK_SPLIT }} predictions (${{ env.BENCHMARK_MODE }})"
+            git push
+          fi
+
       - name: Run score
         run: |
           uv run python -m benchmarks.score \

From 9f40021c6870dae170a07a6be0beb0bdb37a9670 Mon Sep 17 00:00:00 2001
From: Daniel Ecer <de-code@users.noreply.github.com>
Date: Tue, 26 May 2026 19:22:15 +0100
Subject: [PATCH 2/4] Fixed update files missing sparse option

---
 .github/workflows/benchmark.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index b5edba34..8c330f35 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -156,7 +156,7 @@ jobs:
           cd sciencebeam-eval-predictions
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
-          git add .
+          git add --sparse .
           if git diff --cached --quiet; then
             echo "No new predictions to push"
           else

From c272c05092f19c20cf8ab283fa690785e76b04a2 Mon Sep 17 00:00:00 2001
From: Daniel Ecer <de-code@users.noreply.github.com>
Date: Tue, 26 May 2026 19:32:15 +0100
Subject: [PATCH 3/4] Limit pushing predictions to main again

---
 .github/workflows/benchmark.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 8c330f35..8f2d7c23 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -131,6 +131,7 @@ jobs:
             --parser-image "${{ steps.image_tag.outputs.value }}"
 
       - name: Checkout predictions repo
+        if: github.ref == 'refs/heads/main'
         uses: actions/checkout@v5
         with:
           repository: elifepathways/sciencebeam-eval-predictions
@@ -142,7 +143,7 @@ jobs:
           sparse-checkout-cone-mode: false
 
       - name: Push ScienceBeam Parser predictions
-        # TODO: restore `if: github.ref == 'refs/heads/main'` after testing
+        if: github.ref == 'refs/heads/main'
         env:
           IMAGE_TAG: ${{ steps.image_tag.outputs.value }}
         run: |

From 9e19cdf77eae3f947a45dab1290fa9c58cce40c7 Mon Sep 17 00:00:00 2001
From: Daniel Ecer <de-code@users.noreply.github.com>
Date: Tue, 26 May 2026 19:41:02 +0100
Subject: [PATCH 4/4] Add variant folder to predictions repo

---
 .github/workflows/benchmark.yml | 17 +++++++++++++----
 benchmarks/eval.yml             |  2 ++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 8f2d7c23..ae21b32d 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -146,13 +146,22 @@ jobs:
         if: github.ref == 'refs/heads/main'
         env:
           IMAGE_TAG: ${{ steps.image_tag.outputs.value }}
+          SPLIT: ${{ env.BENCHMARK_SPLIT }}
+          MODE: ${{ env.BENCHMARK_MODE }}
         run: |
           VERSION="${IMAGE_TAG#sciencebeam-parser:}"
-          DEST="sciencebeam-eval-predictions/sciencebeam-parser/${VERSION}/${{ env.BENCHMARK_SPLIT }}"
-          mkdir -p "${DEST}"
-          cp -r "${{ env.BENCHMARK_RUN }}/predictions/." "${DEST}/"
+          DEST="sciencebeam-eval-predictions/sciencebeam-parser/${VERSION}/${SPLIT}"
+          PREDICTIONS_SRC="${{ env.BENCHMARK_RUN }}/predictions"
+          for CORPUS_DIR in "${PREDICTIONS_SRC}"/*/; do
+            [ -d "${CORPUS_DIR}" ] || continue
+            CORPUS=$(basename "${CORPUS_DIR}")
+            VARIANT=$(python3 -c "import yaml; c=yaml.safe_load(open('benchmarks/eval.yml')); print(c['dataset']['splits'].get('${SPLIT}',{}).get('${CORPUS}',{}).get('variant','v1'))")
+            mkdir -p "${DEST}/${CORPUS}/${VARIANT}"
+            cp -r "${CORPUS_DIR}/." "${DEST}/${CORPUS}/${VARIANT}/"
+          done
+          [ -f "${PREDICTIONS_SRC}/manifest.jsonl" ] && cp "${PREDICTIONS_SRC}/manifest.jsonl" "${DEST}/"
           DATE=$(date --utc +%Y-%m-%dT%H:%M:%SZ)
-          echo "{\"tool\":\"sciencebeam-parser\",\"version\":\"${VERSION}\",\"split\":\"${{ env.BENCHMARK_SPLIT }}\",\"mode\":\"${{ env.BENCHMARK_MODE }}\",\"generated_at\":\"${DATE}\"}" \
+          echo "{\"tool\":\"sciencebeam-parser\",\"version\":\"${VERSION}\",\"split\":\"${SPLIT}\",\"mode\":\"${MODE}\",\"generated_at\":\"${DATE}\"}" \
             | python3 -m json.tool > "${DEST}/metadata.json"
           cd sciencebeam-eval-predictions
           git config user.name "github-actions[bot]"
diff --git a/benchmarks/eval.yml b/benchmarks/eval.yml
index c986bdd5..9ba16e61 100644
--- a/benchmarks/eval.yml
+++ b/benchmarks/eval.yml
@@ -6,10 +6,12 @@ dataset:
       biorxiv:
         file: biorxiv-jats/train-00000-of-00001.parquet
         id_column: ppr_id
+        variant: v1
     validation:
       biorxiv:
         file: biorxiv-jats/validation-00000-of-00001.parquet
         id_column: ppr_id
+        variant: v1
 
 sampling:
   smoke: