From 892d06c535468262164775a357dc32b71e4392a4 Mon Sep 17 00:00:00 2001 From: dafit Date: Sun, 7 Dec 2025 17:05:07 +0100 Subject: [PATCH] feat: implement Phase 1B - variance collection automation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend nyx-probing with automated variance collection using nyx-substrate for database persistence. Adds 4 new CLI commands for collecting and analyzing variance in Echo Probe measurements. New Features: - VarianceRunner: Automated 1000x probe collection with progress bars - 4 new CLI commands: - variance collect: Single term variance collection - variance batch: Batch collection from glossary files - variance stats: View session statistics - variance analyze: Cross-session variance analysis - Full integration with nyx-substrate database layer Files Added: - nyx_probing/runners/__init__.py: Runner module initialization - nyx_probing/runners/variance_runner.py: VarianceRunner class (~200 LOC) - nyx_probing/cli/variance.py: CLI commands (~250 LOC) Files Modified: - pyproject.toml: Added nyx-substrate>=0.1.0 dependency - nyx_probing/cli/probe.py: Registered variance command group - data/glossary/master.json: Updated from scanning Integration: - nyx-probing → nyx-substrate → phoebe (PostgreSQL) - Stores each probe run in variance_probe_runs table - Rich progress bars and statistics display - Session-based tracking with UUIDs Usage Examples: nyx-probe variance collect "Geworfenheit" --runs 1000 nyx-probe variance batch depth_3_champions.json nyx-probe variance stats nyx-probe variance analyze "Geworfenheit" Status: Phase 1B complete, ready for baseline collection šŸŒ™šŸ’œ Generated with Claude Code Co-Authored-By: Claude Opus 4.5 --- data/glossary/master.json | 2 +- nyx_probing/cli/probe.py | 7 +- nyx_probing/cli/variance.py | 247 +++++++++++++++++++++++++ nyx_probing/runners/__init__.py | 10 + nyx_probing/runners/variance_runner.py | 229 +++++++++++++++++++++++ pyproject.toml | 1 + 6 files changed, 494 insertions(+), 2 deletions(-) create mode 100644 nyx_probing/cli/variance.py create mode 100644 nyx_probing/runners/__init__.py create mode 100644 nyx_probing/runners/variance_runner.py diff --git a/data/glossary/master.json b/data/glossary/master.json index 18c4eff..2f652ef 100644 --- a/data/glossary/master.json +++ b/data/glossary/master.json @@ -439,4 +439,4 @@ "grounding": 0.517723474475107 } } -} \ No newline at end of file +} diff --git a/nyx_probing/cli/probe.py b/nyx_probing/cli/probe.py index 561f9c2..dc51e45 100644 --- a/nyx_probing/cli/probe.py +++ b/nyx_probing/cli/probe.py @@ -31,6 +31,7 @@ from nyx_probing.core.model import NyxModel from nyx_probing.probes.surface_probe import SurfaceProbe from nyx_probing.probes.echo_probe import EchoProbe from nyx_probing.analysis.readiness_scorer import ReadinessScorer +from nyx_probing.cli.variance import variance console = Console() @@ -69,12 +70,16 @@ def detect_category(completions: list) -> str: def cli(): """ šŸŒ™ nyx-probe: Probe the Young Mind's conceptual topology. - + Explore how Qwen2.5-7B-Base understands and associates concepts. """ pass +# Register variance command group +cli.add_command(variance) + + @cli.command() @click.argument("term") @click.option("-n", "--runs", default=3, help="Number of completion runs") diff --git a/nyx_probing/cli/variance.py b/nyx_probing/cli/variance.py new file mode 100644 index 0000000..c64e2f5 --- /dev/null +++ b/nyx_probing/cli/variance.py @@ -0,0 +1,247 @@ +""" +CLI commands for variance collection. + +Commands: +- nyx-probe variance collect --runs 1000 +- nyx-probe variance batch --runs 1000 +- nyx-probe variance stats +- nyx-probe variance analyze +""" + +import click +import uuid +import json +from pathlib import Path +from rich.console import Console +from rich.table import Table + +from ..core.model import NyxModel +from ..runners.variance_runner import VarianceRunner +from nyx_substrate.database import PhoebeConnection, VarianceProbeDAO + +console = Console() + +# Global model instance (lazy loaded) +_model = None + + +def get_model() -> NyxModel: + """Get or initialize model (lazy loading).""" + global _model + if _model is None: + console.print("[yellow]Loading model...[/yellow]") + _model = NyxModel() + _model.load() + console.print("[green]āœ“ Model loaded[/green]") + return _model + + +@click.group() +def variance(): + """Variance collection commands.""" + pass + + +@variance.command() +@click.argument("term") +@click.option("--runs", "-n", default=1000, help="Number of runs (default: 1000)") +@click.option("--max-rounds", "-r", default=3, help="Max echo rounds (default: 3)") +@click.option("--temperature", "-t", default=0.8, help="Sampling temperature (default: 0.8)") +@click.option("--max-tokens", default=50, help="Max new tokens (default: 50)") +def collect(term: str, runs: int, max_rounds: int, temperature: float, max_tokens: int): + """ + Collect variance data for a single term. + + Runs Echo Probe N times and stores results in phoebe. + + Example: + nyx-probe variance collect "Geworfenheit" --runs 1000 + """ + # Initialize + model = get_model() + conn = PhoebeConnection() + dao = VarianceProbeDAO(conn) + + # Create runner + runner = VarianceRunner( + model=model, + dao=dao, + max_rounds=max_rounds, + max_new_tokens=max_tokens, + temperature=temperature, + ) + + # Run session + session_id = runner.run_session(term, runs=runs) + + # Display stats + runner.display_session_stats(session_id) + + # Close connection + conn.close() + + +@variance.command() +@click.argument("glossary_file", type=click.Path(exists=True)) +@click.option("--runs", "-n", default=1000, help="Number of runs per term (default: 1000)") +@click.option("--max-rounds", "-r", default=3, help="Max echo rounds (default: 3)") +@click.option("--temperature", "-t", default=0.8, help="Sampling temperature (default: 0.8)") +def batch(glossary_file: str, runs: int, max_rounds: int, temperature: float): + """ + Collect variance data from a glossary file. + + Glossary file should be JSON with a "terms" key containing list of strings. + + Example: + nyx-probe variance batch data/glossary/philosophical.json --runs 1000 + """ + # Load glossary + with open(glossary_file, 'r') as f: + data = json.load(f) + + terms = data.get("terms", []) + if not terms: + console.print("[red]Error: No terms found in glossary file[/red]") + return + + console.print(f"[cyan]Loaded {len(terms)} terms from {glossary_file}[/cyan]\n") + + # Initialize + model = get_model() + conn = PhoebeConnection() + dao = VarianceProbeDAO(conn) + + # Create runner + runner = VarianceRunner( + model=model, + dao=dao, + max_rounds=max_rounds, + max_new_tokens=50, + temperature=temperature, + ) + + # Run batch + sessions = runner.run_batch(terms, runs_per_term=runs) + + # Summary table + table = Table(title="Batch Summary") + table.add_column("Term", style="cyan") + table.add_column("Session ID", style="yellow") + + for term, session_id in sessions.items(): + table.add_row(term, str(session_id)) + + console.print(table) + + # Close connection + conn.close() + + +@variance.command() +@click.argument("session_id") +def stats(session_id: str): + """ + Display statistics for a variance collection session. + + Example: + nyx-probe variance stats 550e8400-e29b-41d4-a716-446655440000 + """ + # Parse UUID + try: + session_uuid = uuid.UUID(session_id) + except ValueError: + console.print(f"[red]Error: Invalid UUID: {session_id}[/red]") + return + + # Initialize + conn = PhoebeConnection() + dao = VarianceProbeDAO(conn) + + # Create dummy runner just for display method + # (we don't need the model for stats) + class StatsRunner: + def __init__(self, dao): + self.dao = dao + def get_session_summary(self, sid): + return dao.get_session_stats(sid) + def display_session_stats(self, sid): + stats = self.get_session_summary(sid) + if stats['total_runs'] == 0: + console.print(f"[yellow]No runs found for session {sid}[/yellow]") + return + + console.print(f"\n[bold cyan]šŸ“Š Session Statistics[/bold cyan]") + console.print(f"Session ID: {sid}") + console.print(f"Term: [bold]{stats['term']}[/bold]") + console.print(f"Total runs: {stats['total_runs']}") + console.print(f"Average depth: {stats['avg_depth']:.2f}") + console.print(f"Average rounds: {stats['avg_rounds']:.2f}") + + console.print("\n[bold]Depth Distribution:[/bold]") + dist = stats['depth_distribution'] + for depth_val in ['0', '1', '2', '3']: + count = dist.get(depth_val, 0) + pct = (count / stats['total_runs'] * 100) if stats['total_runs'] > 0 else 0 + console.print(f" Depth {depth_val}: {count:4d} ({pct:5.1f}%)") + + console.print("\n[bold]Most Common Echo Types:[/bold]") + for echo_info in stats['most_common_echo_types'][:5]: + console.print(f" {echo_info['type']:12s}: {echo_info['count']:4d}") + + console.print() + + runner = StatsRunner(dao) + runner.display_session_stats(session_uuid) + + # Close connection + conn.close() + + +@variance.command() +@click.argument("term") +@click.option("--limit", "-l", default=1000, help="Max runs to analyze (default: 1000)") +def analyze(term: str, limit: int): + """ + Analyze variance across ALL sessions for a term. + + Shows aggregate statistics across all variance collections. + + Example: + nyx-probe variance analyze "Geworfenheit" --limit 1000 + """ + # Initialize + conn = PhoebeConnection() + dao = VarianceProbeDAO(conn) + + # Get distribution + dist = dao.get_term_distribution(term, limit=limit) + + if dist['total_runs'] == 0: + console.print(f"[yellow]No variance data found for term: {term}[/yellow]") + conn.close() + return + + # Display + console.print(f"\n[bold cyan]šŸ“Š Variance Analysis: {term}[/bold cyan]") + console.print(f"Total runs: {dist['total_runs']}") + console.print(f"Total sessions: {dist['total_sessions']}") + console.print(f"Average depth: {dist['avg_depth']:.2f}") + console.print(f"Depth std dev: {dist['depth_std_dev']:.2f}") + console.print(f"Average rounds: {dist['avg_rounds']:.2f}") + console.print(f"Rounds std dev: {dist['rounds_std_dev']:.2f}") + + console.print("\n[bold]Depth Distribution:[/bold]") + depth_dist = dist['depth_distribution'] + for depth_val in ['0', '1', '2', '3']: + count = depth_dist.get(depth_val, 0) + pct = (count / dist['total_runs'] * 100) if dist['total_runs'] > 0 else 0 + console.print(f" Depth {depth_val}: {count:4d} ({pct:5.1f}%)") + + console.print() + + # Close connection + conn.close() + + +# Export for registration +__all__ = ["variance"] diff --git a/nyx_probing/runners/__init__.py b/nyx_probing/runners/__init__.py new file mode 100644 index 0000000..f930f57 --- /dev/null +++ b/nyx_probing/runners/__init__.py @@ -0,0 +1,10 @@ +""" +Automated runners for probe collection. + +This module provides automation for running probes multiple times +and storing results in phoebe for variance analysis. +""" + +from .variance_runner import VarianceRunner + +__all__ = ["VarianceRunner"] diff --git a/nyx_probing/runners/variance_runner.py b/nyx_probing/runners/variance_runner.py new file mode 100644 index 0000000..8404371 --- /dev/null +++ b/nyx_probing/runners/variance_runner.py @@ -0,0 +1,229 @@ +""" +Variance collection runner for Echo Probe. + +Automates running Echo Probe 1000x to measure variance in depth, +echo types, and chain patterns for baseline characterization. +""" + +import uuid +from typing import List, Dict, Any +from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn +from rich.console import Console + +from ..core.model import NyxModel +from ..probes.echo_probe import EchoProbe +from nyx_substrate.database import PhoebeConnection, VarianceProbeDAO +from nyx_substrate.schemas import VarianceProbeRun + + +console = Console() + + +class VarianceRunner: + """ + Automated variance collection for Echo Probe. + + Runs Echo Probe N times on a term, storing each result in phoebe + for variance analysis. + """ + + def __init__( + self, + model: NyxModel, + dao: VarianceProbeDAO, + max_rounds: int = 3, + max_new_tokens: int = 50, + temperature: float = 0.8, + ): + """ + Initialize VarianceRunner. + + Args: + model: Loaded NyxModel + dao: VarianceProbeDAO for database storage + max_rounds: Max echo rounds per probe + max_new_tokens: Max tokens per generation + temperature: Sampling temperature + """ + self.model = model + self.dao = dao + self.probe = EchoProbe( + model=model, + max_rounds=max_rounds, + max_new_tokens=max_new_tokens, + temperature=temperature, + ) + self.max_rounds = max_rounds + self.max_new_tokens = max_new_tokens + self.temperature = temperature + + def run_session( + self, + term: str, + runs: int = 1000, + show_progress: bool = True, + ) -> uuid.UUID: + """ + Run variance collection session on a single term. + + Args: + term: Term to probe + runs: Number of runs (default: 1000) + show_progress: Show progress bar + + Returns: + session_id UUID + + Example: + >>> runner = VarianceRunner(model, dao) + >>> session_id = runner.run_session("Geworfenheit", runs=1000) + >>> print(f"Session: {session_id}") + """ + session_id = uuid.uuid4() + + console.print(f"\n[bold cyan]šŸ”¬ Variance Collection Session[/bold cyan]") + console.print(f"Term: [bold]{term}[/bold]") + console.print(f"Runs: {runs}") + console.print(f"Session ID: {session_id}\n") + + if show_progress: + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TaskProgressColumn(), + console=console, + ) as progress: + task = progress.add_task(f"Probing '{term}'...", total=runs) + + for run_number in range(1, runs + 1): + # Run probe + result = self.probe.probe(term) + + # Convert echo types to strings + echo_types_str = [et.name for et in result.echo_types] + + # Store in phoebe + self.dao.insert_run( + session_id=session_id, + term=term, + run_number=run_number, + depth=result.depth, + rounds=result.rounds, + echo_types=echo_types_str, + chain=result.chain, + model_name=self.model.model_name, + temperature=self.temperature, + max_rounds=self.max_rounds, + max_new_tokens=self.max_new_tokens, + ) + + progress.update(task, advance=1) + else: + # No progress bar + for run_number in range(1, runs + 1): + result = self.probe.probe(term) + echo_types_str = [et.name for et in result.echo_types] + + self.dao.insert_run( + session_id=session_id, + term=term, + run_number=run_number, + depth=result.depth, + rounds=result.rounds, + echo_types=echo_types_str, + chain=result.chain, + model_name=self.model.model_name, + temperature=self.temperature, + max_rounds=self.max_rounds, + max_new_tokens=self.max_new_tokens, + ) + + console.print(f"\nāœ… [bold green]Session complete![/bold green]") + console.print(f"Stored {runs} runs in phoebe") + console.print(f"Session ID: [bold]{session_id}[/bold]\n") + + return session_id + + def run_batch( + self, + terms: List[str], + runs_per_term: int = 1000, + show_progress: bool = True, + ) -> Dict[str, uuid.UUID]: + """ + Run variance collection on multiple terms. + + Args: + terms: List of terms to probe + runs_per_term: Number of runs per term + show_progress: Show progress bar + + Returns: + Dictionary mapping term -> session_id + + Example: + >>> runner = VarianceRunner(model, dao) + >>> sessions = runner.run_batch(["Geworfenheit", "Vernunft"], runs_per_term=1000) + """ + console.print(f"\n[bold cyan]šŸ”¬ Batch Variance Collection[/bold cyan]") + console.print(f"Terms: {len(terms)}") + console.print(f"Runs per term: {runs_per_term}") + console.print(f"Total runs: {len(terms) * runs_per_term}\n") + + sessions = {} + + for idx, term in enumerate(terms, 1): + console.print(f"[bold]Term {idx}/{len(terms)}:[/bold] {term}") + session_id = self.run_session(term, runs=runs_per_term, show_progress=show_progress) + sessions[term] = session_id + + console.print(f"\nāœ… [bold green]Batch complete![/bold green]") + console.print(f"Collected variance for {len(terms)} terms") + + return sessions + + def get_session_summary(self, session_id: uuid.UUID) -> Dict[str, Any]: + """ + Get summary statistics for a session. + + Args: + session_id: Session UUID + + Returns: + Dictionary with statistics + + Example: + >>> summary = runner.get_session_summary(session_id) + >>> print(f"Average depth: {summary['avg_depth']}") + """ + return self.dao.get_session_stats(session_id) + + def display_session_stats(self, session_id: uuid.UUID) -> None: + """ + Display session statistics to console. + + Args: + session_id: Session UUID + """ + stats = self.get_session_summary(session_id) + + console.print(f"\n[bold cyan]šŸ“Š Session Statistics[/bold cyan]") + console.print(f"Session ID: {session_id}") + console.print(f"Term: [bold]{stats['term']}[/bold]") + console.print(f"Total runs: {stats['total_runs']}") + console.print(f"Average depth: {stats['avg_depth']:.2f}") + console.print(f"Average rounds: {stats['avg_rounds']:.2f}") + + console.print("\n[bold]Depth Distribution:[/bold]") + dist = stats['depth_distribution'] + for depth_val in ['0', '1', '2', '3']: + count = dist.get(depth_val, 0) + pct = (count / stats['total_runs'] * 100) if stats['total_runs'] > 0 else 0 + console.print(f" Depth {depth_val}: {count:4d} ({pct:5.1f}%)") + + console.print("\n[bold]Most Common Echo Types:[/bold]") + for echo_info in stats['most_common_echo_types'][:5]: + console.print(f" {echo_info['type']:12s}: {echo_info['count']:4d}") + + console.print() diff --git a/pyproject.toml b/pyproject.toml index edcf936..11fa1e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "pyyaml>=6.0.0", "python-dotenv>=1.0.0", "numpy>=1.24.0", + "nyx-substrate>=0.1.0", ] [project.optional-dependencies]