#!/usr/bin/env python3 """ nyx-probe CLI: Interactive probing of the Young Mind. Commands: surface - Probe immediate associations echo - Measure conceptual depth readiness - Full curriculum assessment tokens - Token analysis glossary - Batch probe from JSON file scan - Multilingual vocabulary scan with incremental testing """ import sys import json from pathlib import Path from typing import Optional, List from datetime import datetime import os import click from rich.console import Console from rich.table import Table from rich.panel import Panel from rich.progress import Progress, SpinnerColumn, TextColumn from rich import box # Add parent to path for imports sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from nyx_probing.core.model import NyxModel from nyx_probing.probes.surface_probe import SurfaceProbe from nyx_probing.probes.echo_probe import EchoProbe from nyx_probing.analysis.readiness_scorer import ReadinessScorer from nyx_probing.cli.variance import variance console = Console() # Global model instance (lazy loaded) _model: Optional[NyxModel] = None def get_model() -> NyxModel: """Get or create the model instance.""" global _model if _model is None: with console.status("[bold cyan]Loading Qwen2.5-7B...", spinner="dots"): _model = NyxModel() _model.load() console.print("[green]βœ“ Model loaded[/green]") return _model def detect_category(completions: list) -> str: """Simple category detection from completions.""" text = " ".join(completions).lower() code_indicators = ["def ", "class ", "function", "import ", "return ", "{", "}", ";", "=>", "()"] if any(ind in text for ind in code_indicators): return "CODE" list_indicators = ["1.", "2.", "- ", "β€’ ", "* "] if any(ind in text for ind in list_indicators): return "LIST" return "PROSE" @click.group() @click.version_option(version="0.1.0", prog_name="nyx-probe") def cli(): """ πŸŒ™ nyx-probe: Probe the Young Mind's conceptual topology. Explore how Qwen2.5-7B-Base understands and associates concepts. """ pass # Register variance command group cli.add_command(variance) @cli.command() @click.argument("term") @click.option("-n", "--runs", default=3, help="Number of completion runs") @click.option("-t", "--tokens", default=50, help="Max tokens per completion") @click.option("--temperature", default=0.8, help="Sampling temperature") def surface(term: str, runs: int, tokens: int, temperature: float): """ Probe surface associations of a term. Shows what the model completes when given a word - reveals which "valley" (code, prose, philosophy) the term lives in. """ model = get_model() probe = SurfaceProbe( model, num_runs=runs, max_new_tokens=tokens, temperature=temperature, ) console.print(f"\n[bold cyan]πŸ”¬ Surface Probe:[/bold cyan] [yellow]{term}[/yellow]\n") with console.status("[bold cyan]Probing...", spinner="dots"): result = probe.probe(term) # Display completions table = Table(title="Completions", box=box.ROUNDED) table.add_column("#", style="dim", width=3) table.add_column("Completion", style="white") table.add_column("EOS", style="green", width=5) for i, comp in enumerate(result.completions[:5], 1): preview = comp[:80] + "..." if len(comp) > 80 else comp preview = preview.replace("\n", " ↡ ") table.add_row(str(i), preview, "βœ“" if result.hit_eos_count > 0 else "") console.print(table) # Detect category category = detect_category(result.completions) coherence = result.coherence_score or 0.0 # Summary panel summary = f""" [bold]Category:[/bold] {category} [bold]Coherence:[/bold] {coherence:.2f} [bold]Avg Tokens:[/bold] {result.avg_tokens:.1f} [bold]EOS Rate:[/bold] {result.hit_eos_count}/{len(result.completions)} """ console.print(Panel(summary, title="πŸ“Š Analysis", border_style="cyan")) @cli.command() @click.argument("term") @click.option("-r", "--rounds", default=3, help="Echo rounds") @click.option("-t", "--tokens", default=50, help="Max tokens per round") def echo(term: str, rounds: int, tokens: int): """ Measure conceptual depth through iterative echoing. Feeds completions back to measure how deep the concept goes. Classifications: EXPANDS, CONFIRMS, CIRCULAR, DIVERGENT, COLLAPSE """ model = get_model() probe = EchoProbe( model, max_rounds=rounds, max_new_tokens=tokens, ) console.print(f"\n[bold cyan]πŸ”„ Echo Probe:[/bold cyan] [yellow]{term}[/yellow]\n") with console.status("[bold cyan]Echoing...", spinner="dots"): result = probe.probe(term) # Display chain table = Table(title="Echo Chain", box=box.ROUNDED) table.add_column("Round", style="dim", width=6) table.add_column("Type", style="bold", width=12) table.add_column("Content", style="white") table.add_row("0", "[cyan]SEED[/cyan]", term) type_colors = { "EXPANDS": "green", "CONFIRMS": "yellow", "CIRCULAR": "red", "DIVERGENT": "magenta", "COLLAPSE": "dim red", } for i, (echo_type, content) in enumerate(zip(result.echo_types, result.chain[1:]), 1): color = type_colors.get(echo_type.value, "white") preview = content[:60] + "..." if len(content) > 60 else content preview = preview.replace("\n", " ↡ ") table.add_row(str(i), f"[{color}]{echo_type.value}[/{color}]", preview) console.print(table) # Depth indicator depth = result.depth depth_bar = "β–ˆ" * depth + "β–‘" * (3 - depth) colors = ["red", "yellow", "green", "cyan"] console.print(f"\n[bold]Depth Score:[/bold] [{colors[min(depth, 3)]}]{depth_bar}[/] {depth}/3") @cli.command() @click.argument("term") def readiness(term: str): """ Full curriculum readiness assessment. Combines surface + echo probes to determine if a concept is ready for training: HIGH, MEDIUM, or LOW. """ model = get_model() scorer = ReadinessScorer(model) console.print(f"\n[bold cyan]πŸ“‹ Readiness Assessment:[/bold cyan] [yellow]{term}[/yellow]\n") with console.status("[bold cyan]Assessing...", spinner="dots"): result = scorer.score(term) # Level colors level_styles = { "HIGH": ("green", "🟒"), "MEDIUM": ("yellow", "🟑"), "LOW": ("red", "πŸ”΄"), } color, emoji = level_styles.get(result.level.value, ("white", "βšͺ")) # Get category and metrics category = detect_category(result.surface.completions) if result.surface else "UNKNOWN" coherence = result.surface.coherence_score if result.surface else 0.0 depth = result.echo.depth if result.echo else 0 # Main panel content = f""" {emoji} [bold {color}]{result.level.value}[/bold {color}] [bold]Valley:[/bold] {category} [bold]Coherence:[/bold] {coherence:.2f} [bold]Depth:[/bold] {depth}/3 [bold]Action:[/bold] {result.action} """ console.print(Panel(content, title=f"Readiness: {term}", border_style=color)) # Recommendations if result.level.value == "HIGH": console.print("[green]βœ“ Ready for direct training or state machine implementation[/green]") elif result.level.value == "MEDIUM": console.print("[yellow]⚠ Consider scaffolding or bridging concepts[/yellow]") else: console.print("[red]βœ— Requires foundational work before training[/red]") @cli.command() @click.argument("term") def tokens(term: str): """ Analyze tokenization of a term. Shows how the model breaks down the term into tokens - critical for understanding valley access (single vs multi-token). """ model = get_model() console.print(f"\n[bold cyan]πŸ”€ Token Analysis:[/bold cyan] [yellow]{term}[/yellow]\n") token_list = model.tokenize(term) count = len(token_list) # Token display token_display = " | ".join([f"[cyan]{t}[/cyan]" for t in token_list]) console.print(f"Tokens: {token_display}") console.print(f"Count: [bold]{count}[/bold]") # Interpretation if count == 1: console.print("\n[red]⚠ Single token - likely CODE valley (high activation spike)[/red]") elif count <= 2: console.print("\n[yellow]β†’ Few tokens - may be efficient but limited valley access[/yellow]") else: console.print("\n[green]βœ“ Multi-token - distributed signal, better valley access[/green]") @cli.command() @click.argument("glossary_file", type=click.Path(exists=True)) @click.option("-o", "--output", type=click.Path(), help="Output JSON file") @click.option("--surface-only", is_flag=True, help="Only run surface probe") def glossary(glossary_file: str, output: Optional[str], surface_only: bool): """ Batch probe terms from a glossary JSON file. Expected format: {"terms": [{"term": "...", "translations": {...}}, ...]} or simple: {"terms": ["term1", "term2", ...]} """ model = get_model() # Load glossary with open(glossary_file) as f: data = json.load(f) terms = data.get("terms", data) if isinstance(terms, dict): terms = list(terms.keys()) # Normalize to list of strings term_list = [] for t in terms: if isinstance(t, str): term_list.append(t) elif isinstance(t, dict): term_list.append(t.get("term", t.get("en", str(t)))) console.print(f"\n[bold cyan]πŸ“š Glossary Probe:[/bold cyan] {len(term_list)} terms\n") results = [] if surface_only: probe = SurfaceProbe(model, num_runs=3) with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console, ) as progress: task = progress.add_task("Probing...", total=len(term_list)) for term in term_list: progress.update(task, description=f"Probing: {term}") result = probe.probe(term) category = detect_category(result.completions) results.append({ "term": term, "category": category, "coherence": result.coherence_score or 0.0, "tokens": model.token_count(term), }) progress.advance(task) else: scorer = ReadinessScorer(model) with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console, ) as progress: task = progress.add_task("Assessing...", total=len(term_list)) for term in term_list: progress.update(task, description=f"Assessing: {term}") result = scorer.score(term) category = detect_category(result.surface.completions) if result.surface else "UNKNOWN" coherence = result.surface.coherence_score if result.surface else 0.0 depth = result.echo.depth if result.echo else 0 results.append({ "term": term, "level": result.level.value, "valley": category, "coherence": coherence, "depth": depth, "action": result.action, "tokens": model.token_count(term), }) progress.advance(task) # Display results table table = Table(title="Glossary Results", box=box.ROUNDED) table.add_column("Term", style="yellow") table.add_column("Tokens", style="dim", width=6) if surface_only: table.add_column("Category", style="cyan") table.add_column("Coherence", style="white") for r in results: table.add_row( r["term"], str(r["tokens"]), r["category"], f"{r['coherence']:.2f}", ) else: table.add_column("Level", style="bold") table.add_column("Valley", style="cyan") table.add_column("Depth", style="white") level_colors = {"HIGH": "green", "MEDIUM": "yellow", "LOW": "red"} for r in results: color = level_colors.get(r["level"], "white") table.add_row( r["term"], str(r["tokens"]), f"[{color}]{r['level']}[/{color}]", r["valley"], f"{r['depth']}/3", ) console.print(table) # Save if output specified if output: with open(output, "w") as f: json.dump({"glossary": glossary_file, "results": results}, f, indent=2) console.print(f"\n[green]βœ“ Results saved to {output}[/green]") # Summary if not surface_only: high = sum(1 for r in results if r["level"] == "HIGH") med = sum(1 for r in results if r["level"] == "MEDIUM") low = sum(1 for r in results if r["level"] == "LOW") console.print(f"\n[bold]Summary:[/bold] 🟒 {high} HIGH | 🟑 {med} MEDIUM | πŸ”΄ {low} LOW") def load_glossary_files(paths: List[str]) -> tuple[list, dict]: """Load terms from files or directories, tracking source collection.""" terms = [] sources = {} # term -> collection name for path_str in paths: path = Path(path_str) if path.is_dir(): # Load all JSON files from directory json_files = list(path.glob("*.json")) else: json_files = [path] for json_file in json_files: collection_name = json_file.stem try: with open(json_file) as f: data = json.load(f) file_terms = data.get("terms", data) if isinstance(file_terms, dict): file_terms = list(file_terms.keys()) for t in file_terms: if isinstance(t, str): term_data = {"term": t, "translations": {"EN": t}} elif isinstance(t, dict): term_data = t else: continue term_name = term_data.get("term", term_data.get("en", str(term_data))) terms.append(term_data) sources[term_name] = collection_name except Exception as e: console.print(f"[yellow]Warning: Could not load {json_file}: {e}[/yellow]") return terms, sources def load_master_json() -> dict: """Load master.json if it exists.""" master_path = Path(__file__).parent.parent.parent / "data" / "glossary" / "master.json" if master_path.exists(): with open(master_path) as f: return json.load(f) return {"last_scan": None, "total_terms": 0, "collections_loaded": [], "terms": {}} def save_master_json(master: dict): """Save master.json.""" master_path = Path(__file__).parent.parent.parent / "data" / "glossary" / "master.json" with open(master_path, "w") as f: json.dump(master, f, indent=2) @cli.command() @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @click.option("--summary/--full", default=True, help="Show summary (default) or full table") @click.option("--delta", is_flag=True, help="Only test new/untested terms") @click.option("--force", is_flag=True, help="Re-test all terms even if already in master.json") @click.option("-o", "--output", type=click.Path(), help="Output JSON file") def scan(paths: tuple, summary: bool, delta: bool, force: bool, output: Optional[str]): """ Multilingual vocabulary scan with incremental testing. Scans terms using surface + echo probes and tracks results in master.json. Examples: nyx-probe scan data/glossary/collections/ # Scan all collections nyx-probe scan collections/philosophical.json # Scan specific file nyx-probe scan collections/ --delta # Only test new terms nyx-probe scan collections/ --full # Full detailed output """ if not paths: console.print("[red]Error: Please provide at least one file or directory path[/red]") return model = get_model() # Load terms from all paths all_terms, sources = load_glossary_files(list(paths)) console.print(f"\n[bold cyan]πŸ”¬ Vocabulary Scan:[/bold cyan] {len(all_terms)} terms from {len(set(sources.values()))} collection(s)\n") # Load master.json for delta mode master = load_master_json() # Filter terms if delta mode if delta and not force: tested_terms = set(master.get("terms", {}).keys()) original_count = len(all_terms) all_terms = [t for t in all_terms if t.get("term", t.get("en", str(t))) not in tested_terms] skipped = original_count - len(all_terms) if skipped > 0: console.print(f"[dim]Skipping {skipped} already-tested terms (use --force to re-test)[/dim]") if not all_terms: console.print("[green]All terms already tested! Use --force to re-test.[/green]") return # Run probes scorer = ReadinessScorer(model) results = [] with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console, ) as progress: task = progress.add_task("Scanning...", total=len(all_terms)) for term_data in all_terms: term = term_data.get("term", term_data.get("en", str(term_data))) progress.update(task, description=f"Probing: {term}") result = scorer.score(term) category = detect_category(result.surface.completions) if result.surface else "UNKNOWN" coherence = result.surface.coherence_score if result.surface else 0.0 depth = result.echo.depth if result.echo else 0 entry = { "term": term, "source": sources.get(term, "unknown"), "level": result.level.value, "valley": category, "coherence": coherence, "depth": depth, "action": result.action, "tokens": model.token_count(term), } results.append(entry) # Update master.json entry master["terms"][term] = { "source": sources.get(term, "unknown"), "tested": datetime.now().strftime("%Y-%m-%d"), "depth": depth, "valley": category, "transfer": False, # Would need triangulation "grounding": coherence, } progress.advance(task) # Update master.json metadata master["last_scan"] = datetime.now().isoformat() master["total_terms"] = len(master["terms"]) collections = set(master.get("collections_loaded", [])) collections.update(sources.values()) master["collections_loaded"] = list(collections) save_master_json(master) # Display results if summary: # Summary mode - lean output high = sum(1 for r in results if r["level"] == "HIGH") med = sum(1 for r in results if r["level"] == "MEDIUM") low = sum(1 for r in results if r["level"] == "LOW") depth_hits = [r for r in results if r["depth"] >= 2] console.print(f"\n[bold]🌍 Scanned {len(results)} terms | Depthβ‰₯2: {len(depth_hits)} | 🟒{high} 🟑{med} πŸ”΄{low}[/bold]\n") if depth_hits: console.print("[bold cyan]DEPTH HITS (β‰₯2/3):[/bold cyan]") for r in depth_hits: level_colors = {"HIGH": "green", "MEDIUM": "yellow", "LOW": "red"} color = level_colors.get(r["level"], "white") console.print(f" [{color}]{r['term']:20}[/{color}] {r['depth']}/3 {r['valley']:10} ({r['source']})") high_grounding = [r for r in results if r["coherence"] > 0.7] if high_grounding: console.print(f"\n[bold cyan]BEST GROUNDING (>0.7):[/bold cyan]") for r in high_grounding[:5]: console.print(f" {r['term']:20} {r['coherence']:.2f}") console.print(f"\n[dim]Run with --full for complete table[/dim]") else: # Full mode - detailed table table = Table(title="Scan Results", box=box.ROUNDED) table.add_column("Term", style="yellow") table.add_column("Source", style="dim", width=12) table.add_column("Tokens", style="dim", width=6) table.add_column("Level", style="bold") table.add_column("Valley", style="cyan") table.add_column("Depth", style="white") table.add_column("Coherence", style="white") level_colors = {"HIGH": "green", "MEDIUM": "yellow", "LOW": "red"} for r in results: color = level_colors.get(r["level"], "white") table.add_row( r["term"], r["source"], str(r["tokens"]), f"[{color}]{r['level']}[/{color}]", r["valley"], f"{r['depth']}/3", f"{r['coherence']:.2f}", ) console.print(table) high = sum(1 for r in results if r["level"] == "HIGH") med = sum(1 for r in results if r["level"] == "MEDIUM") low = sum(1 for r in results if r["level"] == "LOW") console.print(f"\n[bold]Summary:[/bold] 🟒 {high} HIGH | 🟑 {med} MEDIUM | πŸ”΄ {low} LOW") # Save output if specified if output: with open(output, "w") as f: json.dump({"scan_time": datetime.now().isoformat(), "results": results}, f, indent=2) console.print(f"\n[green]βœ“ Results saved to {output}[/green]") console.print(f"\n[green]βœ“ master.json updated ({master['total_terms']} total terms)[/green]") def main(): """Entry point.""" cli() if __name__ == "__main__": main()