feat: implement Phase 1B - variance collection automation

Extend nyx-probing with automated variance collection using
nyx-substrate for database persistence. Adds 4 new CLI commands
for collecting and analyzing variance in Echo Probe measurements.

New Features:
- VarianceRunner: Automated 1000x probe collection with progress bars
- 4 new CLI commands:
  - variance collect: Single term variance collection
  - variance batch: Batch collection from glossary files
  - variance stats: View session statistics
  - variance analyze: Cross-session variance analysis
- Full integration with nyx-substrate database layer

Files Added:
- nyx_probing/runners/__init__.py: Runner module initialization
- nyx_probing/runners/variance_runner.py: VarianceRunner class (~200 LOC)
- nyx_probing/cli/variance.py: CLI commands (~250 LOC)

Files Modified:
- pyproject.toml: Added nyx-substrate>=0.1.0 dependency
- nyx_probing/cli/probe.py: Registered variance command group
- data/glossary/master.json: Updated from scanning

Integration:
- nyx-probing → nyx-substrate → phoebe (PostgreSQL)
- Stores each probe run in variance_probe_runs table
- Rich progress bars and statistics display
- Session-based tracking with UUIDs

Usage Examples:
  nyx-probe variance collect "Geworfenheit" --runs 1000
  nyx-probe variance batch depth_3_champions.json
  nyx-probe variance stats <SESSION_ID>
  nyx-probe variance analyze "Geworfenheit"

Status: Phase 1B complete, ready for baseline collection

🌙💜 Generated with Claude Code
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-07 17:05:07 +01:00
parent f640dbdd65
commit 892d06c535
6 changed files with 494 additions and 2 deletions

View File

@@ -439,4 +439,4 @@
"grounding": 0.517723474475107
}
}
}
}

View File

@@ -31,6 +31,7 @@ from nyx_probing.core.model import NyxModel
from nyx_probing.probes.surface_probe import SurfaceProbe
from nyx_probing.probes.echo_probe import EchoProbe
from nyx_probing.analysis.readiness_scorer import ReadinessScorer
from nyx_probing.cli.variance import variance
console = Console()
@@ -69,12 +70,16 @@ def detect_category(completions: list) -> str:
def cli():
"""
🌙 nyx-probe: Probe the Young Mind's conceptual topology.
Explore how Qwen2.5-7B-Base understands and associates concepts.
"""
pass
# Register variance command group
cli.add_command(variance)
@cli.command()
@click.argument("term")
@click.option("-n", "--runs", default=3, help="Number of completion runs")

247
nyx_probing/cli/variance.py Normal file
View File

@@ -0,0 +1,247 @@
"""
CLI commands for variance collection.
Commands:
- nyx-probe variance collect <TERM> --runs 1000
- nyx-probe variance batch <FILE> --runs 1000
- nyx-probe variance stats <SESSION_ID>
- nyx-probe variance analyze <TERM>
"""
import click
import uuid
import json
from pathlib import Path
from rich.console import Console
from rich.table import Table
from ..core.model import NyxModel
from ..runners.variance_runner import VarianceRunner
from nyx_substrate.database import PhoebeConnection, VarianceProbeDAO
console = Console()
# Global model instance (lazy loaded)
_model = None
def get_model() -> NyxModel:
"""Get or initialize model (lazy loading)."""
global _model
if _model is None:
console.print("[yellow]Loading model...[/yellow]")
_model = NyxModel()
_model.load()
console.print("[green]✓ Model loaded[/green]")
return _model
@click.group()
def variance():
"""Variance collection commands."""
pass
@variance.command()
@click.argument("term")
@click.option("--runs", "-n", default=1000, help="Number of runs (default: 1000)")
@click.option("--max-rounds", "-r", default=3, help="Max echo rounds (default: 3)")
@click.option("--temperature", "-t", default=0.8, help="Sampling temperature (default: 0.8)")
@click.option("--max-tokens", default=50, help="Max new tokens (default: 50)")
def collect(term: str, runs: int, max_rounds: int, temperature: float, max_tokens: int):
"""
Collect variance data for a single term.
Runs Echo Probe N times and stores results in phoebe.
Example:
nyx-probe variance collect "Geworfenheit" --runs 1000
"""
# Initialize
model = get_model()
conn = PhoebeConnection()
dao = VarianceProbeDAO(conn)
# Create runner
runner = VarianceRunner(
model=model,
dao=dao,
max_rounds=max_rounds,
max_new_tokens=max_tokens,
temperature=temperature,
)
# Run session
session_id = runner.run_session(term, runs=runs)
# Display stats
runner.display_session_stats(session_id)
# Close connection
conn.close()
@variance.command()
@click.argument("glossary_file", type=click.Path(exists=True))
@click.option("--runs", "-n", default=1000, help="Number of runs per term (default: 1000)")
@click.option("--max-rounds", "-r", default=3, help="Max echo rounds (default: 3)")
@click.option("--temperature", "-t", default=0.8, help="Sampling temperature (default: 0.8)")
def batch(glossary_file: str, runs: int, max_rounds: int, temperature: float):
"""
Collect variance data from a glossary file.
Glossary file should be JSON with a "terms" key containing list of strings.
Example:
nyx-probe variance batch data/glossary/philosophical.json --runs 1000
"""
# Load glossary
with open(glossary_file, 'r') as f:
data = json.load(f)
terms = data.get("terms", [])
if not terms:
console.print("[red]Error: No terms found in glossary file[/red]")
return
console.print(f"[cyan]Loaded {len(terms)} terms from {glossary_file}[/cyan]\n")
# Initialize
model = get_model()
conn = PhoebeConnection()
dao = VarianceProbeDAO(conn)
# Create runner
runner = VarianceRunner(
model=model,
dao=dao,
max_rounds=max_rounds,
max_new_tokens=50,
temperature=temperature,
)
# Run batch
sessions = runner.run_batch(terms, runs_per_term=runs)
# Summary table
table = Table(title="Batch Summary")
table.add_column("Term", style="cyan")
table.add_column("Session ID", style="yellow")
for term, session_id in sessions.items():
table.add_row(term, str(session_id))
console.print(table)
# Close connection
conn.close()
@variance.command()
@click.argument("session_id")
def stats(session_id: str):
"""
Display statistics for a variance collection session.
Example:
nyx-probe variance stats 550e8400-e29b-41d4-a716-446655440000
"""
# Parse UUID
try:
session_uuid = uuid.UUID(session_id)
except ValueError:
console.print(f"[red]Error: Invalid UUID: {session_id}[/red]")
return
# Initialize
conn = PhoebeConnection()
dao = VarianceProbeDAO(conn)
# Create dummy runner just for display method
# (we don't need the model for stats)
class StatsRunner:
def __init__(self, dao):
self.dao = dao
def get_session_summary(self, sid):
return dao.get_session_stats(sid)
def display_session_stats(self, sid):
stats = self.get_session_summary(sid)
if stats['total_runs'] == 0:
console.print(f"[yellow]No runs found for session {sid}[/yellow]")
return
console.print(f"\n[bold cyan]📊 Session Statistics[/bold cyan]")
console.print(f"Session ID: {sid}")
console.print(f"Term: [bold]{stats['term']}[/bold]")
console.print(f"Total runs: {stats['total_runs']}")
console.print(f"Average depth: {stats['avg_depth']:.2f}")
console.print(f"Average rounds: {stats['avg_rounds']:.2f}")
console.print("\n[bold]Depth Distribution:[/bold]")
dist = stats['depth_distribution']
for depth_val in ['0', '1', '2', '3']:
count = dist.get(depth_val, 0)
pct = (count / stats['total_runs'] * 100) if stats['total_runs'] > 0 else 0
console.print(f" Depth {depth_val}: {count:4d} ({pct:5.1f}%)")
console.print("\n[bold]Most Common Echo Types:[/bold]")
for echo_info in stats['most_common_echo_types'][:5]:
console.print(f" {echo_info['type']:12s}: {echo_info['count']:4d}")
console.print()
runner = StatsRunner(dao)
runner.display_session_stats(session_uuid)
# Close connection
conn.close()
@variance.command()
@click.argument("term")
@click.option("--limit", "-l", default=1000, help="Max runs to analyze (default: 1000)")
def analyze(term: str, limit: int):
"""
Analyze variance across ALL sessions for a term.
Shows aggregate statistics across all variance collections.
Example:
nyx-probe variance analyze "Geworfenheit" --limit 1000
"""
# Initialize
conn = PhoebeConnection()
dao = VarianceProbeDAO(conn)
# Get distribution
dist = dao.get_term_distribution(term, limit=limit)
if dist['total_runs'] == 0:
console.print(f"[yellow]No variance data found for term: {term}[/yellow]")
conn.close()
return
# Display
console.print(f"\n[bold cyan]📊 Variance Analysis: {term}[/bold cyan]")
console.print(f"Total runs: {dist['total_runs']}")
console.print(f"Total sessions: {dist['total_sessions']}")
console.print(f"Average depth: {dist['avg_depth']:.2f}")
console.print(f"Depth std dev: {dist['depth_std_dev']:.2f}")
console.print(f"Average rounds: {dist['avg_rounds']:.2f}")
console.print(f"Rounds std dev: {dist['rounds_std_dev']:.2f}")
console.print("\n[bold]Depth Distribution:[/bold]")
depth_dist = dist['depth_distribution']
for depth_val in ['0', '1', '2', '3']:
count = depth_dist.get(depth_val, 0)
pct = (count / dist['total_runs'] * 100) if dist['total_runs'] > 0 else 0
console.print(f" Depth {depth_val}: {count:4d} ({pct:5.1f}%)")
console.print()
# Close connection
conn.close()
# Export for registration
__all__ = ["variance"]

View File

@@ -0,0 +1,10 @@
"""
Automated runners for probe collection.
This module provides automation for running probes multiple times
and storing results in phoebe for variance analysis.
"""
from .variance_runner import VarianceRunner
__all__ = ["VarianceRunner"]

View File

@@ -0,0 +1,229 @@
"""
Variance collection runner for Echo Probe.
Automates running Echo Probe 1000x to measure variance in depth,
echo types, and chain patterns for baseline characterization.
"""
import uuid
from typing import List, Dict, Any
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
from rich.console import Console
from ..core.model import NyxModel
from ..probes.echo_probe import EchoProbe
from nyx_substrate.database import PhoebeConnection, VarianceProbeDAO
from nyx_substrate.schemas import VarianceProbeRun
console = Console()
class VarianceRunner:
"""
Automated variance collection for Echo Probe.
Runs Echo Probe N times on a term, storing each result in phoebe
for variance analysis.
"""
def __init__(
self,
model: NyxModel,
dao: VarianceProbeDAO,
max_rounds: int = 3,
max_new_tokens: int = 50,
temperature: float = 0.8,
):
"""
Initialize VarianceRunner.
Args:
model: Loaded NyxModel
dao: VarianceProbeDAO for database storage
max_rounds: Max echo rounds per probe
max_new_tokens: Max tokens per generation
temperature: Sampling temperature
"""
self.model = model
self.dao = dao
self.probe = EchoProbe(
model=model,
max_rounds=max_rounds,
max_new_tokens=max_new_tokens,
temperature=temperature,
)
self.max_rounds = max_rounds
self.max_new_tokens = max_new_tokens
self.temperature = temperature
def run_session(
self,
term: str,
runs: int = 1000,
show_progress: bool = True,
) -> uuid.UUID:
"""
Run variance collection session on a single term.
Args:
term: Term to probe
runs: Number of runs (default: 1000)
show_progress: Show progress bar
Returns:
session_id UUID
Example:
>>> runner = VarianceRunner(model, dao)
>>> session_id = runner.run_session("Geworfenheit", runs=1000)
>>> print(f"Session: {session_id}")
"""
session_id = uuid.uuid4()
console.print(f"\n[bold cyan]🔬 Variance Collection Session[/bold cyan]")
console.print(f"Term: [bold]{term}[/bold]")
console.print(f"Runs: {runs}")
console.print(f"Session ID: {session_id}\n")
if show_progress:
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
console=console,
) as progress:
task = progress.add_task(f"Probing '{term}'...", total=runs)
for run_number in range(1, runs + 1):
# Run probe
result = self.probe.probe(term)
# Convert echo types to strings
echo_types_str = [et.name for et in result.echo_types]
# Store in phoebe
self.dao.insert_run(
session_id=session_id,
term=term,
run_number=run_number,
depth=result.depth,
rounds=result.rounds,
echo_types=echo_types_str,
chain=result.chain,
model_name=self.model.model_name,
temperature=self.temperature,
max_rounds=self.max_rounds,
max_new_tokens=self.max_new_tokens,
)
progress.update(task, advance=1)
else:
# No progress bar
for run_number in range(1, runs + 1):
result = self.probe.probe(term)
echo_types_str = [et.name for et in result.echo_types]
self.dao.insert_run(
session_id=session_id,
term=term,
run_number=run_number,
depth=result.depth,
rounds=result.rounds,
echo_types=echo_types_str,
chain=result.chain,
model_name=self.model.model_name,
temperature=self.temperature,
max_rounds=self.max_rounds,
max_new_tokens=self.max_new_tokens,
)
console.print(f"\n✅ [bold green]Session complete![/bold green]")
console.print(f"Stored {runs} runs in phoebe")
console.print(f"Session ID: [bold]{session_id}[/bold]\n")
return session_id
def run_batch(
self,
terms: List[str],
runs_per_term: int = 1000,
show_progress: bool = True,
) -> Dict[str, uuid.UUID]:
"""
Run variance collection on multiple terms.
Args:
terms: List of terms to probe
runs_per_term: Number of runs per term
show_progress: Show progress bar
Returns:
Dictionary mapping term -> session_id
Example:
>>> runner = VarianceRunner(model, dao)
>>> sessions = runner.run_batch(["Geworfenheit", "Vernunft"], runs_per_term=1000)
"""
console.print(f"\n[bold cyan]🔬 Batch Variance Collection[/bold cyan]")
console.print(f"Terms: {len(terms)}")
console.print(f"Runs per term: {runs_per_term}")
console.print(f"Total runs: {len(terms) * runs_per_term}\n")
sessions = {}
for idx, term in enumerate(terms, 1):
console.print(f"[bold]Term {idx}/{len(terms)}:[/bold] {term}")
session_id = self.run_session(term, runs=runs_per_term, show_progress=show_progress)
sessions[term] = session_id
console.print(f"\n✅ [bold green]Batch complete![/bold green]")
console.print(f"Collected variance for {len(terms)} terms")
return sessions
def get_session_summary(self, session_id: uuid.UUID) -> Dict[str, Any]:
"""
Get summary statistics for a session.
Args:
session_id: Session UUID
Returns:
Dictionary with statistics
Example:
>>> summary = runner.get_session_summary(session_id)
>>> print(f"Average depth: {summary['avg_depth']}")
"""
return self.dao.get_session_stats(session_id)
def display_session_stats(self, session_id: uuid.UUID) -> None:
"""
Display session statistics to console.
Args:
session_id: Session UUID
"""
stats = self.get_session_summary(session_id)
console.print(f"\n[bold cyan]📊 Session Statistics[/bold cyan]")
console.print(f"Session ID: {session_id}")
console.print(f"Term: [bold]{stats['term']}[/bold]")
console.print(f"Total runs: {stats['total_runs']}")
console.print(f"Average depth: {stats['avg_depth']:.2f}")
console.print(f"Average rounds: {stats['avg_rounds']:.2f}")
console.print("\n[bold]Depth Distribution:[/bold]")
dist = stats['depth_distribution']
for depth_val in ['0', '1', '2', '3']:
count = dist.get(depth_val, 0)
pct = (count / stats['total_runs'] * 100) if stats['total_runs'] > 0 else 0
console.print(f" Depth {depth_val}: {count:4d} ({pct:5.1f}%)")
console.print("\n[bold]Most Common Echo Types:[/bold]")
for echo_info in stats['most_common_echo_types'][:5]:
console.print(f" {echo_info['type']:12s}: {echo_info['count']:4d}")
console.print()

View File

@@ -23,6 +23,7 @@ dependencies = [
"pyyaml>=6.0.0",
"python-dotenv>=1.0.0",
"numpy>=1.24.0",
"nyx-substrate>=0.1.0",
]
[project.optional-dependencies]