""" CLI commands for variance collection. Commands: - nyx-probe variance collect --runs 1000 - nyx-probe variance batch --runs 1000 - nyx-probe variance stats - nyx-probe variance analyze """ import click import uuid import json from pathlib import Path from rich.console import Console from rich.table import Table from ..core.model import NyxModel from ..runners.variance_runner import VarianceRunner from nyx_substrate.database import PhoebeConnection, VarianceProbeDAO console = Console() # Global model instance (lazy loaded) _model = None def get_model() -> NyxModel: """Get or initialize model (lazy loading).""" global _model if _model is None: console.print("[yellow]Loading model...[/yellow]") _model = NyxModel() _model.load() console.print("[green]✓ Model loaded[/green]") return _model @click.group() def variance(): """Variance collection commands.""" pass @variance.command() @click.argument("term") @click.option("--runs", "-n", default=1000, help="Number of runs (default: 1000)") @click.option("--max-rounds", "-r", default=3, help="Max echo rounds (default: 3)") @click.option("--temperature", "-t", default=0.8, help="Sampling temperature (default: 0.8)") @click.option("--max-tokens", default=50, help="Max new tokens (default: 50)") def collect(term: str, runs: int, max_rounds: int, temperature: float, max_tokens: int): """ Collect variance data for a single term. Runs Echo Probe N times and stores results in phoebe. Example: nyx-probe variance collect "Geworfenheit" --runs 1000 """ # Initialize model = get_model() conn = PhoebeConnection() dao = VarianceProbeDAO(conn) # Create runner runner = VarianceRunner( model=model, dao=dao, max_rounds=max_rounds, max_new_tokens=max_tokens, temperature=temperature, ) # Run session session_id = runner.run_session(term, runs=runs) # Display stats runner.display_session_stats(session_id) # Close connection conn.close() @variance.command() @click.argument("glossary_file", type=click.Path(exists=True)) @click.option("--runs", "-n", default=1000, help="Number of runs per term (default: 1000)") @click.option("--max-rounds", "-r", default=3, help="Max echo rounds (default: 3)") @click.option("--temperature", "-t", default=0.8, help="Sampling temperature (default: 0.8)") def batch(glossary_file: str, runs: int, max_rounds: int, temperature: float): """ Collect variance data from a glossary file. Glossary file should be JSON with a "terms" key containing list of strings. Example: nyx-probe variance batch data/glossary/philosophical.json --runs 1000 """ # Load glossary with open(glossary_file, 'r') as f: data = json.load(f) terms = data.get("terms", []) if not terms: console.print("[red]Error: No terms found in glossary file[/red]") return console.print(f"[cyan]Loaded {len(terms)} terms from {glossary_file}[/cyan]\n") # Initialize model = get_model() conn = PhoebeConnection() dao = VarianceProbeDAO(conn) # Create runner runner = VarianceRunner( model=model, dao=dao, max_rounds=max_rounds, max_new_tokens=50, temperature=temperature, ) # Run batch sessions = runner.run_batch(terms, runs_per_term=runs) # Summary table table = Table(title="Batch Summary") table.add_column("Term", style="cyan") table.add_column("Session ID", style="yellow") for term, session_id in sessions.items(): table.add_row(term, str(session_id)) console.print(table) # Close connection conn.close() @variance.command() @click.argument("session_id") def stats(session_id: str): """ Display statistics for a variance collection session. Example: nyx-probe variance stats 550e8400-e29b-41d4-a716-446655440000 """ # Parse UUID try: session_uuid = uuid.UUID(session_id) except ValueError: console.print(f"[red]Error: Invalid UUID: {session_id}[/red]") return # Initialize conn = PhoebeConnection() dao = VarianceProbeDAO(conn) # Create dummy runner just for display method # (we don't need the model for stats) class StatsRunner: def __init__(self, dao): self.dao = dao def get_session_summary(self, sid): return dao.get_session_stats(sid) def display_session_stats(self, sid): stats = self.get_session_summary(sid) if stats['total_runs'] == 0: console.print(f"[yellow]No runs found for session {sid}[/yellow]") return console.print(f"\n[bold cyan]📊 Session Statistics[/bold cyan]") console.print(f"Session ID: {sid}") console.print(f"Term: [bold]{stats['term']}[/bold]") console.print(f"Total runs: {stats['total_runs']}") console.print(f"Average depth: {stats['avg_depth']:.2f}") console.print(f"Average rounds: {stats['avg_rounds']:.2f}") console.print("\n[bold]Depth Distribution:[/bold]") dist = stats['depth_distribution'] for depth_val in ['0', '1', '2', '3']: count = dist.get(depth_val, 0) pct = (count / stats['total_runs'] * 100) if stats['total_runs'] > 0 else 0 console.print(f" Depth {depth_val}: {count:4d} ({pct:5.1f}%)") console.print("\n[bold]Most Common Echo Types:[/bold]") for echo_info in stats['most_common_echo_types'][:5]: console.print(f" {echo_info['type']:12s}: {echo_info['count']:4d}") console.print() runner = StatsRunner(dao) runner.display_session_stats(session_uuid) # Close connection conn.close() @variance.command() @click.argument("term") @click.option("--limit", "-l", default=1000, help="Max runs to analyze (default: 1000)") def analyze(term: str, limit: int): """ Analyze variance across ALL sessions for a term. Shows aggregate statistics across all variance collections. Example: nyx-probe variance analyze "Geworfenheit" --limit 1000 """ # Initialize conn = PhoebeConnection() dao = VarianceProbeDAO(conn) # Get distribution dist = dao.get_term_distribution(term, limit=limit) if dist['total_runs'] == 0: console.print(f"[yellow]No variance data found for term: {term}[/yellow]") conn.close() return # Display console.print(f"\n[bold cyan]📊 Variance Analysis: {term}[/bold cyan]") console.print(f"Total runs: {dist['total_runs']}") console.print(f"Total sessions: {dist['total_sessions']}") console.print(f"Average depth: {dist['avg_depth']:.2f}") console.print(f"Depth std dev: {dist['depth_std_dev']:.2f}") console.print(f"Average rounds: {dist['avg_rounds']:.2f}") console.print(f"Rounds std dev: {dist['rounds_std_dev']:.2f}") console.print("\n[bold]Depth Distribution:[/bold]") depth_dist = dist['depth_distribution'] for depth_val in ['0', '1', '2', '3']: count = depth_dist.get(depth_val, 0) pct = (count / dist['total_runs'] * 100) if dist['total_runs'] > 0 else 0 console.print(f" Depth {depth_val}: {count:4d} ({pct:5.1f}%)") console.print() # Close connection conn.close() # Export for registration __all__ = ["variance"]