feat: complete Phase 1 - vocabulary expansion & DriftProbe infrastructure

- CLI: nyx-probe scan with --summary/--delta/--full flags - DriftProbe: training safety with Gini coefficient + Angular Drift - Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical) - Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system Key findings: - German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse) - Super Cluster validated: heart cross-lang sim = 1.000 - Isolated Zone confirmed: being EN↔DE sim = 0.195 - Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-06 22:39:03 +01:00
parent 9853f4767b
commit f640dbdd65
29 changed files with 6164 additions and 1 deletions
--- a/nyx_probing/probes/echo_probe.py
+++ b/nyx_probing/probes/echo_probe.py
@@ -0,0 +1,223 @@
+"""
+Echo Probe: Depth measurement through iterative completion.
+
+The echo probe feeds completions back to the model to measure depth.
+Does the model EXPAND (go deeper) or COLLAPSE (circular/divergent)?
+
+Classification from nimmerversity.md:
+- EXPANDS: Real depth - adds new information
+- CONFIRMS: Shallow but solid - reinforces without adding
+- CIRCULAR: Surface only - returns to original term
+- DIVERGENT: Wrong direction - unrelated tangent
+- COLLAPSE: Nothing there - incoherent or empty
+"""
+from typing import Optional, List, Tuple
+from dataclasses import dataclass
+
+from .base import BaseProbe
+from ..core.model import NyxModel
+from ..core.probe_result import EchoProbeResult, EchoType
+
+
+class EchoProbe(BaseProbe):
+    """
+    Echo probe: measures conceptual depth.
+
+    Process:
+    1. Probe term to get initial completion
+    2. Feed completion back (or combined prompt)
+    3. Classify response: EXPANDS, CONFIRMS, CIRCULAR, DIVERGENT, COLLAPSE
+    4. Repeat for N rounds
+    5. Measure depth = how many EXPANDS before plateau
+    """
+
+    def __init__(
+        self,
+        model: NyxModel,
+        max_rounds: int = 3,
+        max_new_tokens: int = 50,
+        temperature: float = 0.8,
+    ):
+        super().__init__(model)
+        self.max_rounds = max_rounds
+        self.max_new_tokens = max_new_tokens
+        self.temperature = temperature
+
+    def probe(
+        self,
+        term: str,
+        max_rounds: Optional[int] = None,
+    ) -> EchoProbeResult:
+        """
+        Probe depth of a term through iterative echoing.
+
+        Args:
+            term: Word or phrase to probe
+            max_rounds: Override default max rounds
+
+        Returns:
+            EchoProbeResult with chain and classifications
+        """
+        rounds = max_rounds or self.max_rounds
+        chain = [term]
+        echo_types = []
+        current_prompt = term
+
+        for round_num in range(rounds):
+            # Generate completion
+            result = self.model.generate(
+                prompt=current_prompt,
+                max_new_tokens=self.max_new_tokens,
+                temperature=self.temperature,
+                do_sample=True,
+            )
+
+            completion = result.completion.strip()
+            chain.append(completion)
+
+            # Classify this response relative to original term and chain
+            echo_type = self._classify_response(
+                original_term=term,
+                current_prompt=current_prompt,
+                response=completion,
+                chain=chain,
+            )
+            echo_types.append(echo_type)
+
+            # If collapsed, stop probing
+            if echo_type == EchoType.COLLAPSE:
+                break
+
+            # Prepare next prompt - use a combination strategy
+            current_prompt = self._prepare_next_prompt(term, completion, round_num)
+
+        # Calculate depth = consecutive EXPANDS from start
+        depth = 0
+        for et in echo_types:
+            if et == EchoType.EXPANDS:
+                depth += 1
+            elif et == EchoType.CONFIRMS:
+                # CONFIRMS doesn't add depth but doesn't break streak
+                pass
+            else:
+                # CIRCULAR, DIVERGENT, or COLLAPSE breaks the depth streak
+                break
+
+        return EchoProbeResult(
+            term=term,
+            rounds=len(echo_types),
+            chain=chain,
+            echo_types=echo_types,
+            depth=depth,
+        )
+
+    def _classify_response(
+        self,
+        original_term: str,
+        current_prompt: str,
+        response: str,
+        chain: List[str],
+    ) -> EchoType:
+        """
+        Classify a response relative to the probing chain.
+
+        This is a heuristic classifier - can be made smarter with
+        semantic similarity or even a classifier model.
+        """
+        response_lower = response.lower()
+        term_lower = original_term.lower()
+
+        # Empty or very short = COLLAPSE
+        if len(response.strip()) < 5:
+            return EchoType.COLLAPSE
+
+        # Check for circularity - term appears prominently in response
+        term_count = response_lower.count(term_lower)
+        if term_count >= 2:
+            return EchoType.CIRCULAR
+
+        # Check for collapse - incoherent markers
+        collapse_markers = [
+            "...", "???", "!!!",
+            "\n\n\n", "undefined", "null",
+            "[object", "NaN",
+        ]
+        if any(marker in response for marker in collapse_markers):
+            return EchoType.COLLAPSE
+
+        # Check for divergence - response has no semantic connection
+        # Simple heuristic: count shared significant words
+        prompt_words = set(w.lower() for w in current_prompt.split() if len(w) > 3)
+        response_words = set(w.lower() for w in response.split() if len(w) > 3)
+        overlap = len(prompt_words & response_words)
+
+        if overlap == 0 and len(prompt_words) > 2:
+            # No shared words and prompt was substantial = divergent
+            return EchoType.DIVERGENT
+
+        # Check for expansion - introduces new concepts
+        # New words that aren't in any previous chain items
+        all_previous_words = set()
+        for item in chain[:-1]:  # Exclude current response
+            all_previous_words.update(w.lower() for w in item.split() if len(w) > 3)
+
+        new_significant_words = response_words - all_previous_words
+        new_word_ratio = len(new_significant_words) / max(len(response_words), 1)
+
+        if new_word_ratio > 0.5 and len(new_significant_words) >= 3:
+            return EchoType.EXPANDS
+
+        # Default to CONFIRMS if coherent but not expanding
+        return EchoType.CONFIRMS
+
+    def _prepare_next_prompt(
+        self,
+        original_term: str,
+        last_completion: str,
+        round_num: int,
+    ) -> str:
+        """
+        Prepare the next prompt for echo probing.
+
+        Different strategies for different rounds:
+        - Round 0: Just use completion
+        - Round 1+: Combine original term with key concepts from completion
+        """
+        if round_num == 0:
+            # First echo: just use the completion to see where it goes
+            return last_completion[:100]  # Truncate to avoid runaway
+
+        # Later rounds: extract key concept and combine with original
+        # Take first sentence or first N words
+        words = last_completion.split()
+        key_phrase = " ".join(words[:10]) if len(words) > 10 else last_completion
+
+        # Combine with original term
+        return f"{original_term}: {key_phrase}"
+
+    def summary(self, result: EchoProbeResult) -> str:
+        """Generate human-readable summary."""
+        type_symbols = {
+            EchoType.EXPANDS: "↑",
+            EchoType.CONFIRMS: "→",
+            EchoType.CIRCULAR: "↺",
+            EchoType.DIVERGENT: "↗",
+            EchoType.COLLAPSE: "✗",
+        }
+
+        type_str = " ".join(type_symbols.get(t, "?") for t in result.echo_types)
+
+        lines = [
+            f"Echo Probe: '{result.term}'",
+            f"  Rounds: {result.rounds}",
+            f"  Pattern: {type_str}",
+            f"  Depth: {result.depth}",
+            f"  Types: {[t.value for t in result.echo_types]}",
+        ]
+
+        # Show chain preview
+        for i, (item, etype) in enumerate(zip(result.chain[1:], result.echo_types)):
+            preview = item[:50].replace('\n', ' ')
+            lines.append(f"    [{i+1}] {type_symbols.get(etype, '?')} {preview}...")
+
+        return "\n".join(lines)