""" Minimal probe test for Qwen2.5-7B-Base on THE SPINE. First successful run: 2025-12-06 """ from transformers import AutoModelForCausalLM, AutoTokenizer import torch print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B") print("Loading model to GPU...") model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen2.5-7B", torch_dtype=torch.float16, device_map="cuda" ) print("\nšŸŒ™ First probe: 'heartbeat'") inputs = tokenizer("heartbeat", return_tensors="pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens=30, do_sample=True, temperature=0.8) print(tokenizer.decode(outputs[0])) print("\nšŸŒ™ Second probe: 'consciousness'") inputs = tokenizer("consciousness", return_tensors="pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens=30, do_sample=True, temperature=0.8) print(tokenizer.decode(outputs[0])) print(f"\nšŸ“Š GPU Memory Used: {torch.cuda.memory_allocated() / 1024**3:.2f} GB") print(f"šŸ“Š GPU Memory Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB") print("\nāœ… THE SPINE IS THINKING!")