import torch from transformers import AutoTokenizer, AutoModelForCausalLM model_path = "Zenith-GPT2-124M" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = AutoTokenizer.from_pretrained(model_path) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained(model_path) model.to(device) model.eval() print("chat ready (type /exit to quit)\n") while True: try: user_input = input("you > ").strip() if user_input.lower() in {"/exit", "/quit"}: print("bye") break if not user_input: continue inputs = tokenizer(user_input, return_tensors="pt").to(device) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=200, do_sample=True, temperature=0.8, top_p=0.9, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id ) reply = tokenizer.decode(output[0], skip_special_tokens=True) print("friend >", reply[len(user_input):].strip()) except (KeyboardInterrupt, EOFError): print("\nbye") break