import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
from huggingface_hub import login
import os

login(token=os.environ["HUGGINGFACEHUB_TOKEN"])

MODEL_DIR = "malomalom/mistral-lora-assignments"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
base_model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    device_map="auto",
    quantization_config=bnb_config
)
model = PeftModel.from_pretrained(base_model, MODEL_DIR)
model.eval()

def generate_explanation(user_input):
    prompt = f"User: {user_input}\nAssistant:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=200, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("Assistant:")[-1].strip()

# === Interface Gradio ===
gr.Interface(
    fn=generate_explanation,
    inputs=gr.Textbox(label="Ask your question"),
    outputs=gr.Textbox(label="Generated explanation"),
    title="ExplainMyAssignment",
    description=(
        "A local LLM-based assistant that transforms complex variable assignments (from optimization, logic, or symbolic reasoning) into clear human-readable explanations."

        "Fine-tuned from Mistral-7B, this tool is built to translate abstract symbolic mappings into natural language feedback, understanding structural constraints, and improving interpretability."

        "Designed to support AI engineers, teachers, and advanced students working with mathematical or logical models."

        "Works locally and can be deployed in constrained environments (LoRA + 4-bit quantization)."
    ),
).launch()