feat: implement Phase 1B - variance collection automation

Extend nyx-probing with automated variance collection using nyx-substrate for database persistence. Adds 4 new CLI commands for collecting and analyzing variance in Echo Probe measurements. New Features: - VarianceRunner: Automated 1000x probe collection with progress bars - 4 new CLI commands: - variance collect: Single term variance collection - variance batch: Batch collection from glossary files - variance stats: View session statistics - variance analyze: Cross-session variance analysis - Full integration with nyx-substrate database layer Files Added: - nyx_probing/runners/__init__.py: Runner module initialization - nyx_probing/runners/variance_runner.py: VarianceRunner class (~200 LOC) - nyx_probing/cli/variance.py: CLI commands (~250 LOC) Files Modified: - pyproject.toml: Added nyx-substrate>=0.1.0 dependency - nyx_probing/cli/probe.py: Registered variance command group - data/glossary/master.json: Updated from scanning Integration: - nyx-probing → nyx-substrate → phoebe (PostgreSQL) - Stores each probe run in variance_probe_runs table - Rich progress bars and statistics display - Session-based tracking with UUIDs Usage Examples: nyx-probe variance collect "Geworfenheit" --runs 1000 nyx-probe variance batch depth_3_champions.json nyx-probe variance stats <SESSION_ID> nyx-probe variance analyze "Geworfenheit" Status: Phase 1B complete, ready for baseline collection 🌙💜 Generated with Claude Code Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 17:05:07 +01:00
parent f640dbdd65
commit 892d06c535
6 changed files with 494 additions and 2 deletions
--- a/data/glossary/master.json
+++ b/data/glossary/master.json
@@ -439,4 +439,4 @@
      "grounding": 0.517723474475107
    }
  }
-}
+}
--- a/nyx_probing/cli/probe.py
+++ b/nyx_probing/cli/probe.py
@@ -31,6 +31,7 @@ from nyx_probing.core.model import NyxModel
 from nyx_probing.probes.surface_probe import SurfaceProbe
 from nyx_probing.probes.echo_probe import EchoProbe
 from nyx_probing.analysis.readiness_scorer import ReadinessScorer
+from nyx_probing.cli.variance import variance

 console = Console()

@@ -69,12 +70,16 @@ def detect_category(completions: list) -> str:
 def cli():
    """
    🌙 nyx-probe: Probe the Young Mind's conceptual topology.
-    
+
    Explore how Qwen2.5-7B-Base understands and associates concepts.
    """
    pass


+# Register variance command group
+cli.add_command(variance)
+
+
@cli.command()
@click.argument("term")
@click.option("-n", "--runs", default=3, help="Number of completion runs")
--- a/nyx_probing/cli/variance.py
+++ b/nyx_probing/cli/variance.py
@@ -0,0 +1,247 @@
+"""
+CLI commands for variance collection.
+
+Commands:
+- nyx-probe variance collect <TERM> --runs 1000
+- nyx-probe variance batch <FILE> --runs 1000
+- nyx-probe variance stats <SESSION_ID>
+- nyx-probe variance analyze <TERM>
+"""
+
+import click
+import uuid
+import json
+from pathlib import Path
+from rich.console import Console
+from rich.table import Table
+
+from ..core.model import NyxModel
+from ..runners.variance_runner import VarianceRunner
+from nyx_substrate.database import PhoebeConnection, VarianceProbeDAO
+
+console = Console()
+
+# Global model instance (lazy loaded)
+_model = None
+
+
+def get_model() -> NyxModel:
+    """Get or initialize model (lazy loading)."""
+    global _model
+    if _model is None:
+        console.print("[yellow]Loading model...[/yellow]")
+        _model = NyxModel()
+        _model.load()
+        console.print("[green]✓ Model loaded[/green]")
+    return _model
+
+
+@click.group()
+def variance():
+    """Variance collection commands."""
+    pass
+
+
+@variance.command()
+@click.argument("term")
+@click.option("--runs", "-n", default=1000, help="Number of runs (default: 1000)")
+@click.option("--max-rounds", "-r", default=3, help="Max echo rounds (default: 3)")
+@click.option("--temperature", "-t", default=0.8, help="Sampling temperature (default: 0.8)")
+@click.option("--max-tokens", default=50, help="Max new tokens (default: 50)")
+def collect(term: str, runs: int, max_rounds: int, temperature: float, max_tokens: int):
+    """
+    Collect variance data for a single term.
+
+    Runs Echo Probe N times and stores results in phoebe.
+
+    Example:
+        nyx-probe variance collect "Geworfenheit" --runs 1000
+    """
+    # Initialize
+    model = get_model()
+    conn = PhoebeConnection()
+    dao = VarianceProbeDAO(conn)
+
+    # Create runner
+    runner = VarianceRunner(
+        model=model,
+        dao=dao,
+        max_rounds=max_rounds,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+    )
+
+    # Run session
+    session_id = runner.run_session(term, runs=runs)
+
+    # Display stats
+    runner.display_session_stats(session_id)
+
+    # Close connection
+    conn.close()
+
+
+@variance.command()
+@click.argument("glossary_file", type=click.Path(exists=True))
+@click.option("--runs", "-n", default=1000, help="Number of runs per term (default: 1000)")
+@click.option("--max-rounds", "-r", default=3, help="Max echo rounds (default: 3)")
+@click.option("--temperature", "-t", default=0.8, help="Sampling temperature (default: 0.8)")
+def batch(glossary_file: str, runs: int, max_rounds: int, temperature: float):
+    """
+    Collect variance data from a glossary file.
+
+    Glossary file should be JSON with a "terms" key containing list of strings.
+
+    Example:
+        nyx-probe variance batch data/glossary/philosophical.json --runs 1000
+    """
+    # Load glossary
+    with open(glossary_file, 'r') as f:
+        data = json.load(f)
+
+    terms = data.get("terms", [])
+    if not terms:
+        console.print("[red]Error: No terms found in glossary file[/red]")
+        return
+
+    console.print(f"[cyan]Loaded {len(terms)} terms from {glossary_file}[/cyan]\n")
+
+    # Initialize
+    model = get_model()
+    conn = PhoebeConnection()
+    dao = VarianceProbeDAO(conn)
+
+    # Create runner
+    runner = VarianceRunner(
+        model=model,
+        dao=dao,
+        max_rounds=max_rounds,
+        max_new_tokens=50,
+        temperature=temperature,
+    )
+
+    # Run batch
+    sessions = runner.run_batch(terms, runs_per_term=runs)
+
+    # Summary table
+    table = Table(title="Batch Summary")
+    table.add_column("Term", style="cyan")
+    table.add_column("Session ID", style="yellow")
+
+    for term, session_id in sessions.items():
+        table.add_row(term, str(session_id))
+
+    console.print(table)
+
+    # Close connection
+    conn.close()
+
+
+@variance.command()
+@click.argument("session_id")
+def stats(session_id: str):
+    """
+    Display statistics for a variance collection session.
+
+    Example:
+        nyx-probe variance stats 550e8400-e29b-41d4-a716-446655440000
+    """
+    # Parse UUID
+    try:
+        session_uuid = uuid.UUID(session_id)
+    except ValueError:
+        console.print(f"[red]Error: Invalid UUID: {session_id}[/red]")
+        return
+
+    # Initialize
+    conn = PhoebeConnection()
+    dao = VarianceProbeDAO(conn)
+
+    # Create dummy runner just for display method
+    # (we don't need the model for stats)
+    class StatsRunner:
+        def __init__(self, dao):
+            self.dao = dao
+        def get_session_summary(self, sid):
+            return dao.get_session_stats(sid)
+        def display_session_stats(self, sid):
+            stats = self.get_session_summary(sid)
+            if stats['total_runs'] == 0:
+                console.print(f"[yellow]No runs found for session {sid}[/yellow]")
+                return
+
+            console.print(f"\n[bold cyan]📊 Session Statistics[/bold cyan]")
+            console.print(f"Session ID: {sid}")
+            console.print(f"Term: [bold]{stats['term']}[/bold]")
+            console.print(f"Total runs: {stats['total_runs']}")
+            console.print(f"Average depth: {stats['avg_depth']:.2f}")
+            console.print(f"Average rounds: {stats['avg_rounds']:.2f}")
+
+            console.print("\n[bold]Depth Distribution:[/bold]")
+            dist = stats['depth_distribution']
+            for depth_val in ['0', '1', '2', '3']:
+                count = dist.get(depth_val, 0)
+                pct = (count / stats['total_runs'] * 100) if stats['total_runs'] > 0 else 0
+                console.print(f"  Depth {depth_val}: {count:4d} ({pct:5.1f}%)")
+
+            console.print("\n[bold]Most Common Echo Types:[/bold]")
+            for echo_info in stats['most_common_echo_types'][:5]:
+                console.print(f"  {echo_info['type']:12s}: {echo_info['count']:4d}")
+
+            console.print()
+
+    runner = StatsRunner(dao)
+    runner.display_session_stats(session_uuid)
+
+    # Close connection
+    conn.close()
+
+
+@variance.command()
+@click.argument("term")
+@click.option("--limit", "-l", default=1000, help="Max runs to analyze (default: 1000)")
+def analyze(term: str, limit: int):
+    """
+    Analyze variance across ALL sessions for a term.
+
+    Shows aggregate statistics across all variance collections.
+
+    Example:
+        nyx-probe variance analyze "Geworfenheit" --limit 1000
+    """
+    # Initialize
+    conn = PhoebeConnection()
+    dao = VarianceProbeDAO(conn)
+
+    # Get distribution
+    dist = dao.get_term_distribution(term, limit=limit)
+
+    if dist['total_runs'] == 0:
+        console.print(f"[yellow]No variance data found for term: {term}[/yellow]")
+        conn.close()
+        return
+
+    # Display
+    console.print(f"\n[bold cyan]📊 Variance Analysis: {term}[/bold cyan]")
+    console.print(f"Total runs: {dist['total_runs']}")
+    console.print(f"Total sessions: {dist['total_sessions']}")
+    console.print(f"Average depth: {dist['avg_depth']:.2f}")
+    console.print(f"Depth std dev: {dist['depth_std_dev']:.2f}")
+    console.print(f"Average rounds: {dist['avg_rounds']:.2f}")
+    console.print(f"Rounds std dev: {dist['rounds_std_dev']:.2f}")
+
+    console.print("\n[bold]Depth Distribution:[/bold]")
+    depth_dist = dist['depth_distribution']
+    for depth_val in ['0', '1', '2', '3']:
+        count = depth_dist.get(depth_val, 0)
+        pct = (count / dist['total_runs'] * 100) if dist['total_runs'] > 0 else 0
+        console.print(f"  Depth {depth_val}: {count:4d} ({pct:5.1f}%)")
+
+    console.print()
+
+    # Close connection
+    conn.close()
+
+
+# Export for registration
+__all__ = ["variance"]
--- a/nyx_probing/runners/init.py
+++ b/nyx_probing/runners/init.py
@@ -0,0 +1,10 @@
+"""
+Automated runners for probe collection.
+
+This module provides automation for running probes multiple times
+and storing results in phoebe for variance analysis.
+"""
+
+from .variance_runner import VarianceRunner
+
+__all__ = ["VarianceRunner"]
--- a/nyx_probing/runners/variance_runner.py
+++ b/nyx_probing/runners/variance_runner.py
@@ -0,0 +1,229 @@
+"""
+Variance collection runner for Echo Probe.
+
+Automates running Echo Probe 1000x to measure variance in depth,
+echo types, and chain patterns for baseline characterization.
+"""
+
+import uuid
+from typing import List, Dict, Any
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
+from rich.console import Console
+
+from ..core.model import NyxModel
+from ..probes.echo_probe import EchoProbe
+from nyx_substrate.database import PhoebeConnection, VarianceProbeDAO
+from nyx_substrate.schemas import VarianceProbeRun
+
+
+console = Console()
+
+
+class VarianceRunner:
+    """
+    Automated variance collection for Echo Probe.
+
+    Runs Echo Probe N times on a term, storing each result in phoebe
+    for variance analysis.
+    """
+
+    def __init__(
+        self,
+        model: NyxModel,
+        dao: VarianceProbeDAO,
+        max_rounds: int = 3,
+        max_new_tokens: int = 50,
+        temperature: float = 0.8,
+    ):
+        """
+        Initialize VarianceRunner.
+
+        Args:
+            model: Loaded NyxModel
+            dao: VarianceProbeDAO for database storage
+            max_rounds: Max echo rounds per probe
+            max_new_tokens: Max tokens per generation
+            temperature: Sampling temperature
+        """
+        self.model = model
+        self.dao = dao
+        self.probe = EchoProbe(
+            model=model,
+            max_rounds=max_rounds,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+        )
+        self.max_rounds = max_rounds
+        self.max_new_tokens = max_new_tokens
+        self.temperature = temperature
+
+    def run_session(
+        self,
+        term: str,
+        runs: int = 1000,
+        show_progress: bool = True,
+    ) -> uuid.UUID:
+        """
+        Run variance collection session on a single term.
+
+        Args:
+            term: Term to probe
+            runs: Number of runs (default: 1000)
+            show_progress: Show progress bar
+
+        Returns:
+            session_id UUID
+
+        Example:
+            >>> runner = VarianceRunner(model, dao)
+            >>> session_id = runner.run_session("Geworfenheit", runs=1000)
+            >>> print(f"Session: {session_id}")
+        """
+        session_id = uuid.uuid4()
+
+        console.print(f"\n[bold cyan]🔬 Variance Collection Session[/bold cyan]")
+        console.print(f"Term: [bold]{term}[/bold]")
+        console.print(f"Runs: {runs}")
+        console.print(f"Session ID: {session_id}\n")
+
+        if show_progress:
+            with Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                BarColumn(),
+                TaskProgressColumn(),
+                console=console,
+            ) as progress:
+                task = progress.add_task(f"Probing '{term}'...", total=runs)
+
+                for run_number in range(1, runs + 1):
+                    # Run probe
+                    result = self.probe.probe(term)
+
+                    # Convert echo types to strings
+                    echo_types_str = [et.name for et in result.echo_types]
+
+                    # Store in phoebe
+                    self.dao.insert_run(
+                        session_id=session_id,
+                        term=term,
+                        run_number=run_number,
+                        depth=result.depth,
+                        rounds=result.rounds,
+                        echo_types=echo_types_str,
+                        chain=result.chain,
+                        model_name=self.model.model_name,
+                        temperature=self.temperature,
+                        max_rounds=self.max_rounds,
+                        max_new_tokens=self.max_new_tokens,
+                    )
+
+                    progress.update(task, advance=1)
+        else:
+            # No progress bar
+            for run_number in range(1, runs + 1):
+                result = self.probe.probe(term)
+                echo_types_str = [et.name for et in result.echo_types]
+
+                self.dao.insert_run(
+                    session_id=session_id,
+                    term=term,
+                    run_number=run_number,
+                    depth=result.depth,
+                    rounds=result.rounds,
+                    echo_types=echo_types_str,
+                    chain=result.chain,
+                    model_name=self.model.model_name,
+                    temperature=self.temperature,
+                    max_rounds=self.max_rounds,
+                    max_new_tokens=self.max_new_tokens,
+                )
+
+        console.print(f"\n✅ [bold green]Session complete![/bold green]")
+        console.print(f"Stored {runs} runs in phoebe")
+        console.print(f"Session ID: [bold]{session_id}[/bold]\n")
+
+        return session_id
+
+    def run_batch(
+        self,
+        terms: List[str],
+        runs_per_term: int = 1000,
+        show_progress: bool = True,
+    ) -> Dict[str, uuid.UUID]:
+        """
+        Run variance collection on multiple terms.
+
+        Args:
+            terms: List of terms to probe
+            runs_per_term: Number of runs per term
+            show_progress: Show progress bar
+
+        Returns:
+            Dictionary mapping term -> session_id
+
+        Example:
+            >>> runner = VarianceRunner(model, dao)
+            >>> sessions = runner.run_batch(["Geworfenheit", "Vernunft"], runs_per_term=1000)
+        """
+        console.print(f"\n[bold cyan]🔬 Batch Variance Collection[/bold cyan]")
+        console.print(f"Terms: {len(terms)}")
+        console.print(f"Runs per term: {runs_per_term}")
+        console.print(f"Total runs: {len(terms) * runs_per_term}\n")
+
+        sessions = {}
+
+        for idx, term in enumerate(terms, 1):
+            console.print(f"[bold]Term {idx}/{len(terms)}:[/bold] {term}")
+            session_id = self.run_session(term, runs=runs_per_term, show_progress=show_progress)
+            sessions[term] = session_id
+
+        console.print(f"\n✅ [bold green]Batch complete![/bold green]")
+        console.print(f"Collected variance for {len(terms)} terms")
+
+        return sessions
+
+    def get_session_summary(self, session_id: uuid.UUID) -> Dict[str, Any]:
+        """
+        Get summary statistics for a session.
+
+        Args:
+            session_id: Session UUID
+
+        Returns:
+            Dictionary with statistics
+
+        Example:
+            >>> summary = runner.get_session_summary(session_id)
+            >>> print(f"Average depth: {summary['avg_depth']}")
+        """
+        return self.dao.get_session_stats(session_id)
+
+    def display_session_stats(self, session_id: uuid.UUID) -> None:
+        """
+        Display session statistics to console.
+
+        Args:
+            session_id: Session UUID
+        """
+        stats = self.get_session_summary(session_id)
+
+        console.print(f"\n[bold cyan]📊 Session Statistics[/bold cyan]")
+        console.print(f"Session ID: {session_id}")
+        console.print(f"Term: [bold]{stats['term']}[/bold]")
+        console.print(f"Total runs: {stats['total_runs']}")
+        console.print(f"Average depth: {stats['avg_depth']:.2f}")
+        console.print(f"Average rounds: {stats['avg_rounds']:.2f}")
+
+        console.print("\n[bold]Depth Distribution:[/bold]")
+        dist = stats['depth_distribution']
+        for depth_val in ['0', '1', '2', '3']:
+            count = dist.get(depth_val, 0)
+            pct = (count / stats['total_runs'] * 100) if stats['total_runs'] > 0 else 0
+            console.print(f"  Depth {depth_val}: {count:4d} ({pct:5.1f}%)")
+
+        console.print("\n[bold]Most Common Echo Types:[/bold]")
+        for echo_info in stats['most_common_echo_types'][:5]:
+            console.print(f"  {echo_info['type']:12s}: {echo_info['count']:4d}")
+
+        console.print()
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ dependencies = [
    "pyyaml>=6.0.0",
    "python-dotenv>=1.0.0",
    "numpy>=1.24.0",
+    "nyx-substrate>=0.1.0",
 ]

 [project.optional-dependencies]