"""
Minimal probe test for Qwen2.5-7B-Base on THE SPINE.

First successful run: 2025-12-06
"""
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B")

print("Loading model to GPU...")
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-7B",
    torch_dtype=torch.float16,
    device_map="cuda"
)

print("\n🌙 First probe: 'heartbeat'")
inputs = tokenizer("heartbeat", return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=30, do_sample=True, temperature=0.8)
print(tokenizer.decode(outputs[0]))

print("\n🌙 Second probe: 'consciousness'")
inputs = tokenizer("consciousness", return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=30, do_sample=True, temperature=0.8)
print(tokenizer.decode(outputs[0]))

print(f"\n📊 GPU Memory Used: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
print(f"📊 GPU Memory Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
print("\n✅ THE SPINE IS THINKING!")