ProBas_RAG_Assistant / check_progress.py
Mohamed284's picture
Deploy ProBas RAG Assistant with enriched prebuilt index
0ca97fd
"""Report ProBas index build progress.
Run this in a second terminal while `app.py` is building:
python check_progress.py
It reads the status file the app writes after every checkpoint wave under
indexes/probas_rag/ and prints how many records are embedded, the throughput,
and the ETA. The numbers update each time a wave completes (every
PROBAS_CHECKPOINT_EVERY waves), which is also the point a restart resumes from.
"""
from __future__ import annotations
import json
import time
from pathlib import Path
CACHE_DIR = Path("indexes") / "probas_rag"
def format_duration(seconds: float | None) -> str:
if seconds is None:
return "unknown"
seconds = int(max(0, seconds))
hours, remainder = divmod(seconds, 3600)
minutes, secs = divmod(remainder, 60)
if hours:
return f"{hours}h{minutes:02d}m{secs:02d}s"
if minutes:
return f"{minutes}m{secs:02d}s"
return f"{secs}s"
def main() -> None:
if any(CACHE_DIR.glob("bundle_*.json")):
print("Build COMPLETE — finished index bundle is on disk.")
return
status_files = sorted(CACHE_DIR.glob("status_v*_*.json"))
if not status_files:
print("No progress yet. The status file appears after the first wave completes.")
return
latest = max(status_files, key=lambda p: p.stat().st_mtime)
status = json.loads(latest.read_text(encoding="utf-8"))
age = time.time() - latest.stat().st_mtime
print(f"State: {status.get('state', '?')}")
print(f"Progress: {status.get('completed', '?')}/{status.get('total', '?')} "
f"({status.get('percent', '?')}%)")
print(f"Rate: {status.get('rate_per_sec', '?')} rec/s")
print(f"ETA: {format_duration(status.get('eta_seconds'))}")
print(f"Model: {status.get('embedding_model', '?')}")
print(f"Updated: {age:.0f}s ago")
if __name__ == "__main__":
main()