feat: complete Phase 1 - vocabulary expansion & DriftProbe infrastructure

- CLI: nyx-probe scan with --summary/--delta/--full flags - DriftProbe: training safety with Gini coefficient + Angular Drift - Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical) - Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system Key findings: - German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse) - Super Cluster validated: heart cross-lang sim = 1.000 - Isolated Zone confirmed: being EN↔DE sim = 0.195 - Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-06 22:39:03 +01:00
parent 9853f4767b
commit f640dbdd65
29 changed files with 6164 additions and 1 deletions
--- a/nyx_probing/core/probe_result.py
+++ b/nyx_probing/core/probe_result.py
@@ -0,0 +1,97 @@
+"""
+Result dataclasses for probing operations.
+
+These structures capture what we learn about each term.
+"""
+from dataclasses import dataclass, field
+from typing import List, Optional, Literal
+from datetime import datetime
+from enum import Enum
+
+
+class EchoType(str, Enum):
+    """Classification of echo probe responses."""
+
+    EXPANDS = "EXPANDS"      # Real depth - adds new information
+    CONFIRMS = "CONFIRMS"    # Shallow but solid - reinforces without adding
+    CIRCULAR = "CIRCULAR"    # Surface only - returns to original term
+    DIVERGENT = "DIVERGENT"  # Wrong direction - unrelated tangent
+    COLLAPSE = "COLLAPSE"    # Nothing there - incoherent or empty
+
+
+class ReadinessLevel(str, Enum):
+    """Readiness classification for curriculum design."""
+
+    HIGH = "HIGH"      # Ready for state machine / direct training
+    MEDIUM = "MEDIUM"  # Needs scaffolding / bridging concepts
+    LOW = "LOW"        # Requires foundational work first
+
+
+@dataclass
+class SurfaceProbeResult:
+    """Result from a surface probe (single word → completions)."""
+
+    term: str
+    completions: List[str]
+    hit_eos_count: int  # How many completions ended with EOS
+    avg_tokens: float   # Average completion length
+
+    # Optional analysis
+    coherence_score: Optional[float] = None  # 0-1, how related are completions
+
+    timestamp: datetime = field(default_factory=datetime.now)
+
+
+@dataclass
+class EchoProbeResult:
+    """Result from an echo probe (iterative depth measurement)."""
+
+    term: str
+    rounds: int
+    chain: List[str]  # The sequence of prompts/completions
+    echo_types: List[EchoType]  # Classification of each round
+
+    # Derived metrics
+    depth: int = 0  # How many EXPANDS before plateau
+
+    timestamp: datetime = field(default_factory=datetime.now)
+
+
+@dataclass
+class ReadinessResult:
+    """Combined analysis for curriculum readiness."""
+
+    term: str
+    level: ReadinessLevel
+    action: str  # Recommended curriculum action
+
+    # Supporting evidence
+    surface: Optional[SurfaceProbeResult] = None
+    echo: Optional[EchoProbeResult] = None
+
+    # Reasoning
+    reasoning: str = ""
+
+    timestamp: datetime = field(default_factory=datetime.now)
+
+    def to_dict(self) -> dict:
+        """Convert to JSON-serializable dict."""
+        return {
+            "term": self.term,
+            "readiness": {
+                "level": self.level.value,
+                "action": self.action,
+                "reasoning": self.reasoning,
+            },
+            "surface": {
+                "completions": self.surface.completions if self.surface else [],
+                "coherence": self.surface.coherence_score if self.surface else None,
+                "hit_eos_count": self.surface.hit_eos_count if self.surface else 0,
+            } if self.surface else None,
+            "echo": {
+                "depth": self.echo.depth if self.echo else 0,
+                "types": [t.value for t in self.echo.echo_types] if self.echo else [],
+                "chain": self.echo.chain if self.echo else [],
+            } if self.echo else None,
+            "timestamp": self.timestamp.isoformat(),
+        }