Spaces:
Sleeping
Sleeping
| # | |
| # pre_submit_validate.sh | |
| # | |
| # Extended pre-submission checks for OpenEnv hackathon submissions. | |
| # This script complements scripts/validate-submission.sh by also checking | |
| # inference contract requirements and baseline reproducibility. | |
| set -euo pipefail | |
| DOCKER_BUILD_TIMEOUT=600 | |
| INFERENCE_TIMEOUT=1200 | |
| PING_URL="" | |
| REPO_DIR="." | |
| SKIP_DOCKER=false | |
| SKIP_INFERENCE=false | |
| PYTHON_BIN="" | |
| OPENENV_BIN="" | |
| OPENENV_USE_MODULE=false | |
| DOCKER_CONTAINER_ID="" | |
| INFERENCE_OUT_FILE=".pre-submit-inference.out" | |
| usage() { | |
| cat <<'EOF' | |
| Usage: scripts/pre_submit_validate.sh [options] | |
| Options: | |
| --ping-url <url> HF Space URL (e.g., https://team-space.hf.space) | |
| --repo-dir <path> Repo root directory (default: current directory) | |
| --skip-docker Skip docker build check | |
| --skip-inference Skip inference baseline check | |
| -h, --help Show this help message | |
| Required environment variables for inference checks: | |
| API_BASE_URL | |
| MODEL_NAME | |
| HF_TOKEN | |
| EOF | |
| } | |
| run_with_timeout() { | |
| local secs="$1"; shift | |
| if command -v timeout >/dev/null 2>&1; then | |
| timeout "$secs" "$@" | |
| elif command -v gtimeout >/dev/null 2>&1; then | |
| gtimeout "$secs" "$@" | |
| else | |
| "$@" & | |
| local pid=$! | |
| ( sleep "$secs" && kill "$pid" 2>/dev/null ) & | |
| local watcher=$! | |
| wait "$pid" 2>/dev/null | |
| local rc=$? | |
| kill "$watcher" 2>/dev/null || true | |
| wait "$watcher" 2>/dev/null || true | |
| return $rc | |
| fi | |
| } | |
| log() { | |
| printf "[%s] %s\n" "$(date -u +%H:%M:%S)" "$*" | |
| } | |
| die() { | |
| log "FAILED -- $*" | |
| exit 1 | |
| } | |
| pass() { | |
| log "PASSED -- $*" | |
| } | |
| cleanup() { | |
| if [ -n "$DOCKER_CONTAINER_ID" ]; then | |
| docker rm -f "$DOCKER_CONTAINER_ID" >/dev/null 2>&1 || true | |
| fi | |
| rm -f "$INFERENCE_OUT_FILE" >/dev/null 2>&1 || true | |
| } | |
| trap cleanup EXIT | |
| resolve_python_bin() { | |
| local candidates=( | |
| "$REPO_DIR/.venv/bin/python" | |
| "$REPO_DIR/.venv/Scripts/python.exe" | |
| "$REPO_DIR/../.venv/bin/python" | |
| "$REPO_DIR/../.venv/Scripts/python.exe" | |
| ) | |
| for c in "${candidates[@]}"; do | |
| if [ -x "$c" ]; then | |
| PYTHON_BIN="$c" | |
| return 0 | |
| fi | |
| done | |
| if command -v python >/dev/null 2>&1; then | |
| PYTHON_BIN="$(command -v python)" | |
| return 0 | |
| fi | |
| if command -v python3 >/dev/null 2>&1; then | |
| PYTHON_BIN="$(command -v python3)" | |
| return 0 | |
| fi | |
| return 1 | |
| } | |
| resolve_openenv_cmd() { | |
| local candidates=( | |
| "$REPO_DIR/.venv/bin/openenv" | |
| "$REPO_DIR/.venv/Scripts/openenv.exe" | |
| "$REPO_DIR/../.venv/bin/openenv" | |
| "$REPO_DIR/../.venv/Scripts/openenv.exe" | |
| ) | |
| for c in "${candidates[@]}"; do | |
| if [ -x "$c" ]; then | |
| OPENENV_BIN="$c" | |
| return 0 | |
| fi | |
| done | |
| if command -v openenv >/dev/null 2>&1; then | |
| OPENENV_BIN="$(command -v openenv)" | |
| return 0 | |
| fi | |
| return 1 | |
| } | |
| while [ "$#" -gt 0 ]; do | |
| case "$1" in | |
| --ping-url) | |
| shift | |
| [ "$#" -gt 0 ] || die "--ping-url requires a value" | |
| PING_URL="$1" | |
| ;; | |
| --repo-dir) | |
| shift | |
| [ "$#" -gt 0 ] || die "--repo-dir requires a value" | |
| REPO_DIR="$1" | |
| ;; | |
| --skip-docker) | |
| SKIP_DOCKER=true | |
| ;; | |
| --skip-inference) | |
| SKIP_INFERENCE=true | |
| ;; | |
| -h|--help) | |
| usage | |
| exit 0 | |
| ;; | |
| *) | |
| die "Unknown option: $1" | |
| ;; | |
| esac | |
| shift | |
| done | |
| REPO_DIR="$(cd "$REPO_DIR" && pwd)" | |
| cd "$REPO_DIR" | |
| log "Repo: $REPO_DIR" | |
| resolve_python_bin || die "No usable Python interpreter found" | |
| log "Python: $PYTHON_BIN" | |
| if resolve_openenv_cmd; then | |
| log "OpenEnv CLI: $OPENENV_BIN" | |
| else | |
| OPENENV_USE_MODULE=true | |
| log "OpenEnv CLI via module: $PYTHON_BIN -m openenv" | |
| fi | |
| log "Step 1/8: Checking OpenEnv standard file layout" | |
| required_files=( | |
| "openenv.yaml" | |
| "models.py" | |
| "env.py" | |
| "inference.py" | |
| "server/app.py" | |
| "server/cloud_devops_env_environment.py" | |
| ) | |
| for f in "${required_files[@]}"; do | |
| [ -f "$f" ] || die "Missing required file: $f" | |
| done | |
| pass "Core OpenEnv file layout looks valid" | |
| log "Step 2/8: Checking inference contract requirements" | |
| [ -f "inference.py" ] || die "inference.py must exist in repo root" | |
| grep -q "from openai import OpenAI" inference.py || die "inference.py must import OpenAI client" | |
| grep -q "OpenAI(" inference.py || die "inference.py must instantiate OpenAI client" | |
| grep -q "\[START\]" inference.py || die "inference.py must emit [START] logs" | |
| grep -q "\[STEP\]" inference.py || die "inference.py must emit [STEP] logs" | |
| grep -q "\[END\]" inference.py || die "inference.py must emit [END] logs" | |
| pass "Inference script contract checks passed" | |
| log "Step 3/8: Validating OpenEnv manifest and typed models" | |
| if [ "$OPENENV_USE_MODULE" = true ]; then | |
| "$PYTHON_BIN" -m openenv validate >/tmp/openenv-validate.out 2>&1 || { | |
| cat /tmp/openenv-validate.out | |
| die "openenv validate failed" | |
| } | |
| else | |
| "$OPENENV_BIN" validate >/tmp/openenv-validate.out 2>&1 || { | |
| cat /tmp/openenv-validate.out | |
| die "openenv validate failed" | |
| } | |
| fi | |
| pass "openenv validate passed" | |
| log "Step 4/8: Optional HF Space ping check" | |
| if [ -n "$PING_URL" ]; then | |
| PING_URL="${PING_URL%/}" | |
| code=$(curl -s -o /tmp/pre-submit-ping.out -w "%{http_code}" -X POST \ | |
| -H "Content-Type: application/json" -d '{}' \ | |
| "$PING_URL/reset" --max-time 30 || printf "000") | |
| [ "$code" = "200" ] || die "HF Space /reset returned HTTP $code" | |
| pass "HF Space responds to /reset (HTTP 200)" | |
| else | |
| log "SKIPPED -- no --ping-url provided" | |
| fi | |
| log "Step 5/8: Docker build + run check" | |
| if [ "$SKIP_DOCKER" = true ]; then | |
| log "SKIPPED -- --skip-docker enabled" | |
| else | |
| command -v docker >/dev/null 2>&1 || die "docker not found" | |
| if [ -f "Dockerfile" ]; then | |
| context="." | |
| elif [ -f "server/Dockerfile" ]; then | |
| context="server" | |
| else | |
| die "No Dockerfile found at root or server/" | |
| fi | |
| run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$context" >/tmp/pre-submit-docker.out 2>&1 || { | |
| tail -n 40 /tmp/pre-submit-docker.out | |
| die "docker build failed" | |
| } | |
| pass "Docker build succeeded" | |
| IMAGE_TAG="openenv-pre-submit-local" | |
| run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build -t "$IMAGE_TAG" "$context" >/tmp/pre-submit-docker-tagged.out 2>&1 || { | |
| tail -n 40 /tmp/pre-submit-docker-tagged.out | |
| die "docker build (tagged) failed" | |
| } | |
| DOCKER_CONTAINER_ID="$(docker run -d -p 127.0.0.1::8000 "$IMAGE_TAG" 2>/tmp/pre-submit-docker-run.err || true)" | |
| [ -n "$DOCKER_CONTAINER_ID" ] || { | |
| cat /tmp/pre-submit-docker-run.err | |
| die "docker run failed" | |
| } | |
| HOST_PORT="$(docker port "$DOCKER_CONTAINER_ID" 8000/tcp | tail -n 1 | awk -F: '{print $NF}')" | |
| [ -n "$HOST_PORT" ] || die "could not resolve mapped host port for container" | |
| HEALTH_OK=false | |
| for _ in $(seq 1 30); do | |
| health_code=$(curl -s -o /tmp/pre-submit-health.out -w "%{http_code}" \ | |
| "http://127.0.0.1:${HOST_PORT}/health" --max-time 3 || printf "000") | |
| if [ "$health_code" = "200" ]; then | |
| HEALTH_OK=true | |
| break | |
| fi | |
| sleep 1 | |
| done | |
| [ "$HEALTH_OK" = true ] || { | |
| docker logs "$DOCKER_CONTAINER_ID" | tail -n 50 | |
| die "container did not become healthy on /health" | |
| } | |
| reset_code=$(curl -s -o /tmp/pre-submit-reset.out -w "%{http_code}" -X POST \ | |
| -H "Content-Type: application/json" -d '{}' \ | |
| "http://127.0.0.1:${HOST_PORT}/reset" --max-time 10 || printf "000") | |
| [ "$reset_code" = "200" ] || { | |
| docker logs "$DOCKER_CONTAINER_ID" | tail -n 50 | |
| die "container /reset returned HTTP $reset_code" | |
| } | |
| pass "Containerized execution check passed (/health and /reset)" | |
| docker rm -f "$DOCKER_CONTAINER_ID" >/dev/null 2>&1 || true | |
| DOCKER_CONTAINER_ID="" | |
| fi | |
| log "Step 6/8: Environment variable checks" | |
| if [ "$SKIP_INFERENCE" = true ]; then | |
| log "SKIPPED -- --skip-inference enabled" | |
| else | |
| [ -n "${API_BASE_URL:-}" ] || die "API_BASE_URL is not set" | |
| [ -n "${MODEL_NAME:-}" ] || die "MODEL_NAME is not set" | |
| [ -n "${HF_TOKEN:-}" ] || die "HF_TOKEN is not set" | |
| pass "Required API_BASE_URL / MODEL_NAME / HF_TOKEN are set" | |
| fi | |
| log "Step 7/8: Baseline reproducibility (inference.py)" | |
| if [ "$SKIP_INFERENCE" = true ]; then | |
| log "SKIPPED -- --skip-inference enabled" | |
| else | |
| run_with_timeout "$INFERENCE_TIMEOUT" "$PYTHON_BIN" inference.py >"$INFERENCE_OUT_FILE" 2>&1 || { | |
| tail -n 80 "$INFERENCE_OUT_FILE" | |
| die "inference.py failed or timed out" | |
| } | |
| pass "inference.py completed within timeout" | |
| fi | |
| log "Step 8/8: Structured logs + task/grader checks" | |
| if [ "$SKIP_INFERENCE" = true ]; then | |
| log "SKIPPED -- --skip-inference enabled" | |
| else | |
| "$PYTHON_BIN" - "$INFERENCE_OUT_FILE" <<'PY' | |
| import json | |
| import sys | |
| from pathlib import Path | |
| path = Path(sys.argv[1]) | |
| text = path.read_text(encoding='utf-8', errors='replace').splitlines() | |
| starts = [] | |
| ends = [] | |
| step_count = 0 | |
| for line in text: | |
| line = line.strip() | |
| if line.startswith('[START] '): | |
| payload = json.loads(line[len('[START] '):]) | |
| starts.append(payload) | |
| elif line.startswith('[STEP] '): | |
| json.loads(line[len('[STEP] '):]) | |
| step_count += 1 | |
| elif line.startswith('[END] '): | |
| payload = json.loads(line[len('[END] '):]) | |
| ends.append(payload) | |
| if len(starts) < 3: | |
| raise SystemExit('Expected at least 3 [START] task logs') | |
| unique_tasks = {str(s.get('task', '')) for s in starts if s.get('task')} | |
| if len(unique_tasks) < 3: | |
| raise SystemExit('Expected at least 3 unique tasks in [START] logs') | |
| if len(ends) != len(starts): | |
| raise SystemExit('Mismatch between [START] and [END] log counts') | |
| if step_count == 0: | |
| raise SystemExit('No [STEP] logs found') | |
| for i, end in enumerate(ends, start=1): | |
| score = float(end.get('score', -1.0)) | |
| rewards = end.get('rewards', []) | |
| if not (0.0 <= score <= 1.0): | |
| raise SystemExit(f'END #{i} score out of range [0,1]: {score}') | |
| if not isinstance(rewards, list): | |
| raise SystemExit(f'END #{i} rewards must be a list') | |
| for r in rewards: | |
| rv = float(r) | |
| if not (-1.0 <= rv <= 1.0): | |
| raise SystemExit(f'END #{i} step reward out of sanity range [-1,1]: {rv}') | |
| print('Structured logs and task/grader checks passed') | |
| PY | |
| pass "Structured [START]/[STEP]/[END] logs and score-range checks passed" | |
| fi | |
| log "All checks passed. Submission is ready." | |