Skip to content

Build Commit0 Images (SDK: 0b451bfc8a1a12513c0afcf8add69242b0ca46b1) #229

Build Commit0 Images (SDK: 0b451bfc8a1a12513c0afcf8add69242b0ca46b1)

Build Commit0 Images (SDK: 0b451bfc8a1a12513c0afcf8add69242b0ca46b1) #229

name: Build Commit0 Images
run-name: >-
Build Commit0 Images
${{ github.event_name == 'workflow_dispatch' && inputs.sdk-commit != '' && format('(SDK: {0})', inputs.sdk-commit) || '' }}
on:
pull_request_target:
types: [labeled]
workflow_dispatch:
inputs:
dataset:
description: 'Dataset name (e.g., wentingzhao/commit0_combined)'
required: true
default: 'wentingzhao/commit0_combined'
type: string
split:
description: 'Dataset split (e.g., test)'
required: true
default: 'test'
type: string
repo-split:
description: 'Commit0 repo split (lite/all or repo name)'
required: true
default: 'lite'
type: string
max-workers:
description: 'Number of concurrent build workers'
required: false
default: '16'
type: string
n-limit:
description: 'Limit number of repos to build (for testing). Leave blank for no limit.'
required: false
default: ''
type: string
instance-ids:
description: 'Comma-separated instance IDs to build (optional, overrides n-limit)'
required: false
default: ''
type: string
sdk-commit:
description: 'Software Agent SDK commit/ref to use. Leave blank to use submodule default.'
required: false
default: ''
type: string
benchmarks-commit:
description: 'Benchmarks repo commit/ref to use (for manual override)'
required: false
default: ''
type: string
force-build:
description: 'Rebuild images even if matching remote tags already exist'
required: false
default: 'false'
type: string
env:
DATASET: wentingzhao/commit0_combined
SPLIT: test
REPO_SPLIT: lite
MAX_WORKERS: '16'
N_LIMIT: ''
INSTANCE_IDS: ''
SELECT_FILE: ''
concurrency:
group: build-commit0-${{ github.ref }}
cancel-in-progress: false
jobs:
build-and-push:
if: >
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'pull_request_target' && github.event.label.name == 'build-commit0')
runs-on:
labels: blacksmith-32vcpu-ubuntu-2204
permissions:
contents: read
packages: write
issues: write
steps:
- name: Determine checkout ref
id: checkout-ref
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "${{ inputs.benchmarks-commit }}" ]; then
echo "ref=${{ inputs.benchmarks-commit }}" >> "$GITHUB_OUTPUT"
echo "Using benchmarks-commit from workflow_dispatch: ${{ inputs.benchmarks-commit }}"
elif [ -n "${{ github.event.pull_request.head.sha }}" ]; then
echo "ref=${{ github.event.pull_request.head.sha }}" >> "$GITHUB_OUTPUT"
echo "Using PR head SHA: ${{ github.event.pull_request.head.sha }}"
else
echo "ref=" >> "$GITHUB_OUTPUT"
echo "Using triggering ref (default)"
fi
- uses: actions/checkout@v6
with:
ref: ${{ steps.checkout-ref.outputs.ref }}
submodules: recursive
- name: Apply workflow_dispatch overrides (if any)
if: ${{ github.event_name == 'workflow_dispatch' }}
run: |
if [ -n "${{ inputs.dataset }}" ]; then echo "DATASET=${{ inputs.dataset }}" >> "$GITHUB_ENV"; fi
if [ -n "${{ inputs.split }}" ]; then echo "SPLIT=${{ inputs.split }}" >> "$GITHUB_ENV"; fi
if [ -n "${{ inputs.repo-split }}" ]; then echo "REPO_SPLIT=${{ inputs.repo-split }}" >> "$GITHUB_ENV"; fi
if [ -n "${{ inputs.max-workers }}" ]; then echo "MAX_WORKERS=${{ inputs.max-workers }}" >> "$GITHUB_ENV"; fi
if [ -n "${{ inputs.n-limit }}" ]; then echo "N_LIMIT=${{ inputs.n-limit }}" >> "$GITHUB_ENV"; else echo "N_LIMIT=" >> "$GITHUB_ENV"; fi
if [ -n "${{ inputs.instance-ids }}" ]; then echo "INSTANCE_IDS=${{ inputs.instance-ids }}" >> "$GITHUB_ENV"; fi
if [ -n "${{ inputs.sdk-commit }}" ]; then echo "SDK_COMMIT=${{ inputs.sdk-commit }}" >> "$GITHUB_ENV"; fi
- name: Build selected instances file
run: |
set -euo pipefail
if [ -z "${INSTANCE_IDS}" ]; then
echo "No instance IDs provided; skipping select file creation."
exit 0
fi
SELECT_FILE="${RUNNER_TEMP}/selected-instances.txt"
echo "Creating selected instances file at ${SELECT_FILE}"
echo "${INSTANCE_IDS}" \
| tr ',' '\n' \
| sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \
| sed '/^$/d' > "${SELECT_FILE}"
echo "SELECT_FILE=${SELECT_FILE}" >> "$GITHUB_ENV"
echo "N_LIMIT=" >> "$GITHUB_ENV"
echo "Selected instance IDs:"
cat "${SELECT_FILE}"
- name: Update SDK submodule
if: ${{ github.event_name == 'workflow_dispatch' && inputs.sdk-commit != '' }}
run: |
cd vendor/software-agent-sdk
git fetch origin ${{ inputs.sdk-commit }}
git checkout FETCH_HEAD
SDK_SHA=$(git rev-parse HEAD)
cd ../..
git add vendor/software-agent-sdk
echo "Updated SDK submodule to $SDK_SHA (from ${{ inputs.sdk-commit }})"
- name: Set up Docker Buildx with Blacksmith
uses: useblacksmith/setup-docker-builder@v1
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
enable-cache: true
- name: Install dependencies
run: |
make build
- name: Build and push Commit0 images
run: |
set -euo pipefail
FORCE_BUILD="${{ inputs.force-build || 'false' }}"
CMD="uv run benchmarks/commit0/build_images.py \
--dataset '${DATASET}' \
--split '${SPLIT}' \
--repo-split '${REPO_SPLIT}' \
--image ghcr.io/openhands/eval-agent-server \
--push \
--max-workers '${MAX_WORKERS}'"
if [ -n "${N_LIMIT}" ]; then
CMD="$CMD --n-limit '${N_LIMIT}'"
fi
if [ -n "${SELECT_FILE}" ]; then
CMD="$CMD --select '${SELECT_FILE}'"
fi
if [ "${FORCE_BUILD}" = "true" ]; then
CMD="$CMD --force-build"
fi
echo "Running: $CMD"
eval "$CMD"
env:
DOCKER_BUILDKIT: 1
BUILDKIT_PROGRESS: plain
- name: Archive build logs
if: always()
run: |
if [ -d builds ]; then
tar -czf build-logs.tar.gz builds/
echo "Build logs archived successfully"
else
echo "No builds directory found"
fi
- name: Upload build logs
if: always()
uses: actions/upload-artifact@v6
with:
name: build-logs-${{ github.run_id }}
path: build-logs.tar.gz
retention-days: 7
if-no-files-found: warn
- name: Display build summary
if: always()
run: |
uv run python -m benchmarks.scripts.summarize_build_manifests \
--build-root builds \
--title "Commit0 Image Build Summary" >> "$GITHUB_STEP_SUMMARY"