| """Report ProBas index build progress. |
| |
| Run this in a second terminal while `app.py` is building: |
| |
| python check_progress.py |
| |
| It reads the status file the app writes after every checkpoint wave under |
| indexes/probas_rag/ and prints how many records are embedded, the throughput, |
| and the ETA. The numbers update each time a wave completes (every |
| PROBAS_CHECKPOINT_EVERY waves), which is also the point a restart resumes from. |
| """ |
| from __future__ import annotations |
|
|
| import json |
| import time |
| from pathlib import Path |
|
|
| CACHE_DIR = Path("indexes") / "probas_rag" |
|
|
|
|
| def format_duration(seconds: float | None) -> str: |
| if seconds is None: |
| return "unknown" |
| seconds = int(max(0, seconds)) |
| hours, remainder = divmod(seconds, 3600) |
| minutes, secs = divmod(remainder, 60) |
| if hours: |
| return f"{hours}h{minutes:02d}m{secs:02d}s" |
| if minutes: |
| return f"{minutes}m{secs:02d}s" |
| return f"{secs}s" |
|
|
|
|
| def main() -> None: |
| if any(CACHE_DIR.glob("bundle_*.json")): |
| print("Build COMPLETE — finished index bundle is on disk.") |
| return |
|
|
| status_files = sorted(CACHE_DIR.glob("status_v*_*.json")) |
| if not status_files: |
| print("No progress yet. The status file appears after the first wave completes.") |
| return |
|
|
| latest = max(status_files, key=lambda p: p.stat().st_mtime) |
| status = json.loads(latest.read_text(encoding="utf-8")) |
| age = time.time() - latest.stat().st_mtime |
|
|
| print(f"State: {status.get('state', '?')}") |
| print(f"Progress: {status.get('completed', '?')}/{status.get('total', '?')} " |
| f"({status.get('percent', '?')}%)") |
| print(f"Rate: {status.get('rate_per_sec', '?')} rec/s") |
| print(f"ETA: {format_duration(status.get('eta_seconds'))}") |
| print(f"Model: {status.get('embedding_model', '?')}") |
| print(f"Updated: {age:.0f}s ago") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|