import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel import torch from huggingface_hub import login import os login(token=os.environ["HUGGINGFACEHUB_TOKEN"]) MODEL_DIR = "malomalom/mistral-lora-assignments" bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4" ) tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) base_model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", device_map="auto", quantization_config=bnb_config ) model = PeftModel.from_pretrained(base_model, MODEL_DIR) model.eval() def generate_explanation(user_input): prompt = f"User: {user_input}\nAssistant:" inputs = tokenizer(prompt, return_tensors="pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens=200, pad_token_id=tokenizer.eos_token_id) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response.split("Assistant:")[-1].strip() # === Interface Gradio === gr.Interface( fn=generate_explanation, inputs=gr.Textbox(label="Ask your question"), outputs=gr.Textbox(label="Generated explanation"), title="ExplainMyAssignment", description=( "A local LLM-based assistant that transforms complex variable assignments (from optimization, logic, or symbolic reasoning) into clear human-readable explanations." "Fine-tuned from Mistral-7B, this tool is built to translate abstract symbolic mappings into natural language feedback, understanding structural constraints, and improving interpretability." "Designed to support AI engineers, teachers, and advanced students working with mathematical or logical models." "Works locally and can be deployed in constrained environments (LoRA + 4-bit quantization)." ), ).launch()