- Basic package structure (core, probes, analysis, storage, cli) - probe_test.py - minimal probe that works on THE SPINE - First successful probe: 2025-12-06 - Apache 2.0 license - Qwen2.5-7B-Base on RTX 3090 (14.2GB VRAM) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
"""
|
|
Minimal probe test for Qwen2.5-7B-Base on THE SPINE.
|
|
|
|
First successful run: 2025-12-06
|
|
"""
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
import torch
|
|
|
|
print("Loading tokenizer...")
|
|
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B")
|
|
|
|
print("Loading model to GPU...")
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
"Qwen/Qwen2.5-7B",
|
|
torch_dtype=torch.float16,
|
|
device_map="cuda"
|
|
)
|
|
|
|
print("\n🌙 First probe: 'heartbeat'")
|
|
inputs = tokenizer("heartbeat", return_tensors="pt").to("cuda")
|
|
outputs = model.generate(**inputs, max_new_tokens=30, do_sample=True, temperature=0.8)
|
|
print(tokenizer.decode(outputs[0]))
|
|
|
|
print("\n🌙 Second probe: 'consciousness'")
|
|
inputs = tokenizer("consciousness", return_tensors="pt").to("cuda")
|
|
outputs = model.generate(**inputs, max_new_tokens=30, do_sample=True, temperature=0.8)
|
|
print(tokenizer.decode(outputs[0]))
|
|
|
|
print(f"\n📊 GPU Memory Used: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
|
|
print(f"📊 GPU Memory Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
|
|
print("\n✅ THE SPINE IS THINKING!")
|