From 5b5781df3620cc82a9ebff7dbb8275eacbff948d Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Tue, 26 May 2026 18:15:02 +0100 Subject: [PATCH 1/4] Update separate eval-predictions repo with results --- .github/workflows/benchmark.yml | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index c90f9bec..b5edba34 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -130,6 +130,40 @@ jobs: --parser-url http://localhost:8080 \ --parser-image "${{ steps.image_tag.outputs.value }}" + - name: Checkout predictions repo + uses: actions/checkout@v5 + with: + repository: elifepathways/sciencebeam-eval-predictions + path: sciencebeam-eval-predictions + token: ${{ secrets.PREDICTIONS_REPO_PAT }} + fetch-depth: 1 + filter: blob:none + sparse-checkout: .gitignore + sparse-checkout-cone-mode: false + + - name: Push ScienceBeam Parser predictions + # TODO: restore `if: github.ref == 'refs/heads/main'` after testing + env: + IMAGE_TAG: ${{ steps.image_tag.outputs.value }} + run: | + VERSION="${IMAGE_TAG#sciencebeam-parser:}" + DEST="sciencebeam-eval-predictions/sciencebeam-parser/${VERSION}/${{ env.BENCHMARK_SPLIT }}" + mkdir -p "${DEST}" + cp -r "${{ env.BENCHMARK_RUN }}/predictions/." "${DEST}/" + DATE=$(date --utc +%Y-%m-%dT%H:%M:%SZ) + echo "{\"tool\":\"sciencebeam-parser\",\"version\":\"${VERSION}\",\"split\":\"${{ env.BENCHMARK_SPLIT }}\",\"mode\":\"${{ env.BENCHMARK_MODE }}\",\"generated_at\":\"${DATE}\"}" \ + | python3 -m json.tool > "${DEST}/metadata.json" + cd sciencebeam-eval-predictions + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add . + if git diff --cached --quiet; then + echo "No new predictions to push" + else + git commit -m "Add sciencebeam-parser/${VERSION} ${{ env.BENCHMARK_SPLIT }} predictions (${{ env.BENCHMARK_MODE }})" + git push + fi + - name: Run score run: | uv run python -m benchmarks.score \ From 9f40021c6870dae170a07a6be0beb0bdb37a9670 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Tue, 26 May 2026 19:22:15 +0100 Subject: [PATCH 2/4] Fixed update files missing sparse option --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index b5edba34..8c330f35 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -156,7 +156,7 @@ jobs: cd sciencebeam-eval-predictions git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git add . + git add --sparse . if git diff --cached --quiet; then echo "No new predictions to push" else From c272c05092f19c20cf8ab283fa690785e76b04a2 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Tue, 26 May 2026 19:32:15 +0100 Subject: [PATCH 3/4] Limit pushing predictions to main again --- .github/workflows/benchmark.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 8c330f35..8f2d7c23 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -131,6 +131,7 @@ jobs: --parser-image "${{ steps.image_tag.outputs.value }}" - name: Checkout predictions repo + if: github.ref == 'refs/heads/main' uses: actions/checkout@v5 with: repository: elifepathways/sciencebeam-eval-predictions @@ -142,7 +143,7 @@ jobs: sparse-checkout-cone-mode: false - name: Push ScienceBeam Parser predictions - # TODO: restore `if: github.ref == 'refs/heads/main'` after testing + if: github.ref == 'refs/heads/main' env: IMAGE_TAG: ${{ steps.image_tag.outputs.value }} run: | From 9e19cdf77eae3f947a45dab1290fa9c58cce40c7 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Tue, 26 May 2026 19:41:02 +0100 Subject: [PATCH 4/4] Add variant folder to predictions repo --- .github/workflows/benchmark.yml | 17 +++++++++++++---- benchmarks/eval.yml | 2 ++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 8f2d7c23..ae21b32d 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -146,13 +146,22 @@ jobs: if: github.ref == 'refs/heads/main' env: IMAGE_TAG: ${{ steps.image_tag.outputs.value }} + SPLIT: ${{ env.BENCHMARK_SPLIT }} + MODE: ${{ env.BENCHMARK_MODE }} run: | VERSION="${IMAGE_TAG#sciencebeam-parser:}" - DEST="sciencebeam-eval-predictions/sciencebeam-parser/${VERSION}/${{ env.BENCHMARK_SPLIT }}" - mkdir -p "${DEST}" - cp -r "${{ env.BENCHMARK_RUN }}/predictions/." "${DEST}/" + DEST="sciencebeam-eval-predictions/sciencebeam-parser/${VERSION}/${SPLIT}" + PREDICTIONS_SRC="${{ env.BENCHMARK_RUN }}/predictions" + for CORPUS_DIR in "${PREDICTIONS_SRC}"/*/; do + [ -d "${CORPUS_DIR}" ] || continue + CORPUS=$(basename "${CORPUS_DIR}") + VARIANT=$(python3 -c "import yaml; c=yaml.safe_load(open('benchmarks/eval.yml')); print(c['dataset']['splits'].get('${SPLIT}',{}).get('${CORPUS}',{}).get('variant','v1'))") + mkdir -p "${DEST}/${CORPUS}/${VARIANT}" + cp -r "${CORPUS_DIR}/." "${DEST}/${CORPUS}/${VARIANT}/" + done + [ -f "${PREDICTIONS_SRC}/manifest.jsonl" ] && cp "${PREDICTIONS_SRC}/manifest.jsonl" "${DEST}/" DATE=$(date --utc +%Y-%m-%dT%H:%M:%SZ) - echo "{\"tool\":\"sciencebeam-parser\",\"version\":\"${VERSION}\",\"split\":\"${{ env.BENCHMARK_SPLIT }}\",\"mode\":\"${{ env.BENCHMARK_MODE }}\",\"generated_at\":\"${DATE}\"}" \ + echo "{\"tool\":\"sciencebeam-parser\",\"version\":\"${VERSION}\",\"split\":\"${SPLIT}\",\"mode\":\"${MODE}\",\"generated_at\":\"${DATE}\"}" \ | python3 -m json.tool > "${DEST}/metadata.json" cd sciencebeam-eval-predictions git config user.name "github-actions[bot]" diff --git a/benchmarks/eval.yml b/benchmarks/eval.yml index c986bdd5..9ba16e61 100644 --- a/benchmarks/eval.yml +++ b/benchmarks/eval.yml @@ -6,10 +6,12 @@ dataset: biorxiv: file: biorxiv-jats/train-00000-of-00001.parquet id_column: ppr_id + variant: v1 validation: biorxiv: file: biorxiv-jats/validation-00000-of-00001.parquet id_column: ppr_id + variant: v1 sampling: smoke: