feat: complete Phase 1 - vocabulary expansion & DriftProbe infrastructure

- CLI: nyx-probe scan with --summary/--delta/--full flags - DriftProbe: training safety with Gini coefficient + Angular Drift - Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical) - Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system Key findings: - German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse) - Super Cluster validated: heart cross-lang sim = 1.000 - Isolated Zone confirmed: being EN↔DE sim = 0.195 - Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-06 22:39:03 +01:00
parent 9853f4767b
commit f640dbdd65
29 changed files with 6164 additions and 1 deletions
--- a/nyx_probing/probes/init.py
+++ b/nyx_probing/probes/init.py
@@ -0,0 +1,27 @@
+"""Probe implementations for nyx-probing."""
+from .base import BaseProbe
+from .surface_probe import SurfaceProbe, CompletionCategory
+from .echo_probe import EchoProbe
+from .multilingual_probe import (
+    MultilingualTriangulationProbe,
+    LanguageZone,
+    LANGUAGES,
+    GroundingResult,
+    DeepeningResult,
+    TriangulationResult,
+    MultilingualProbeResult,
+)
+
+__all__ = [
+    "BaseProbe",
+    "SurfaceProbe",
+    "CompletionCategory",
+    "EchoProbe",
+    "MultilingualTriangulationProbe",
+    "LanguageZone",
+    "LANGUAGES",
+    "GroundingResult",
+    "DeepeningResult",
+    "TriangulationResult",
+    "MultilingualProbeResult",
+]
--- a/nyx_probing/probes/base.py
+++ b/nyx_probing/probes/base.py
@@ -0,0 +1,58 @@
+"""
+Base class for all probes.
+
+Probes are measurement instruments - they reveal what's already there,
+they don't add or change anything.
+"""
+from abc import ABC, abstractmethod
+from typing import Any
+from ..core.model import NyxModel
+
+
+class BaseProbe(ABC):
+    """Abstract base class for probing operations."""
+
+    def __init__(self, model: NyxModel):
+        """
+        Initialize probe with a loaded model.
+
+        Args:
+            model: A NyxModel instance (must be loaded)
+        """
+        self.model = model
+        if not model._loaded:
+            raise ValueError("Model must be loaded before creating probe")
+
+    @property
+    def name(self) -> str:
+        """Name of this probe type."""
+        return self.__class__.__name__
+
+    @abstractmethod
+    def probe(self, term: str, **kwargs) -> Any:
+        """
+        Probe a single term.
+
+        Args:
+            term: The word/phrase to probe
+            **kwargs: Probe-specific parameters
+
+        Returns:
+            Probe-specific result object
+        """
+        pass
+
+    def probe_batch(self, terms: list[str], **kwargs) -> list[Any]:
+        """
+        Probe multiple terms.
+
+        Default implementation just loops; subclasses can optimize.
+
+        Args:
+            terms: List of words/phrases to probe
+            **kwargs: Probe-specific parameters
+
+        Returns:
+            List of probe results
+        """
+        return [self.probe(term, **kwargs) for term in terms]
--- a/nyx_probing/probes/drift_probe.py
+++ b/nyx_probing/probes/drift_probe.py
@@ -0,0 +1,304 @@
+"""
+DriftProbe: Training-loop monitoring for conceptual topology preservation.
+
+Theory: "Spatial Separation Hypothesis"
+- Use isolated zone languages (German) as scaffolding for new concepts
+- Monitor anchors (must not move), bridges (must stay separated), canaries (watch for migration)
+
+Key Metrics (refined from peer review):
+1. Gini Coefficient: Sparse activations (0.8+) = deep/specific, Diffuse (0.3) = shallow/general
+2. Angular Drift: Direction change = definition rewrite, magnitude change = sharpening
+3. Cross-Language Similarity: Bridges should stay LOW, anchors should stay HIGH
+"""
+import json
+from pathlib import Path
+from dataclasses import dataclass, field
+from typing import Optional
+from enum import Enum
+
+import torch
+import numpy as np
+
+
+class SentinelType(Enum):
+    ANCHOR = "ANCHOR"      # Must not move - core topology
+    BRIDGE = "BRIDGE"      # Must stay separated - isolated zone integrity
+    CANARY = "CANARY"      # Watch for migration - early warning
+    TARGET = "TARGET"      # Want movement - training goals
+
+
+class AlertSeverity(Enum):
+    OK = "OK"
+    WARNING = "WARNING"
+    CRITICAL = "CRITICAL"
+
+
+@dataclass
+class DriftMetrics:
+    """Metrics for a single sentinel term."""
+    term: str
+    sentinel_type: SentinelType
+
+    # Activation metrics
+    gini_coefficient: float = 0.0
+    activation_norm: float = 0.0
+
+    # Drift metrics (vs baseline)
+    angular_drift_degrees: float = 0.0
+    norm_drift_percent: float = 0.0
+    gini_drift: float = 0.0
+
+    # Valley detection
+    detected_valley: str = "UNKNOWN"
+    depth: int = 0
+
+    # Cross-language (for anchors/bridges)
+    cross_lang_similarity: float = 0.0
+
+    # Alert
+    alert: AlertSeverity = AlertSeverity.OK
+    alert_message: str = ""
+
+
+@dataclass
+class DriftReport:
+    """Full drift report for a training checkpoint."""
+    step: int
+    timestamp: str
+    metrics: list[DriftMetrics] = field(default_factory=list)
+
+    # Summary
+    critical_count: int = 0
+    warning_count: int = 0
+    recommendation: str = "CONTINUE"
+
+
+class DriftProbe:
+    """
+    Lightweight probe for training-loop monitoring.
+
+    Optimized for RTX 3090 constraints:
+    - Full probe: ~2 min (run at epoch 0, end of training)
+    - Lite probe: ~10 sec (run every 100 steps)
+    """
+
+    def __init__(self, model, tokenizer, sentinels_path: Optional[str] = None):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.baseline_states = {}  # term -> hidden state tensor
+
+        # Load sentinels
+        if sentinels_path is None:
+            sentinels_path = Path(__file__).parent.parent.parent / "data" / "sentinels.json"
+
+        with open(sentinels_path) as f:
+            self.config = json.load(f)
+
+        self.sentinels = self.config["sentinels"]
+        self.alert_rules = self.config["alert_rules"]
+
+    def _get_hidden_state(self, text: str, layer: int = 18) -> torch.Tensor:
+        """Get hidden state at specified layer for last token position."""
+        inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
+        with torch.no_grad():
+            outputs = self.model(**inputs, output_hidden_states=True)
+        return outputs.hidden_states[layer][0, -1, :].float().cpu()
+
+    def _compute_gini(self, activations: torch.Tensor) -> float:
+        """
+        Compute Gini coefficient of activation vector.
+
+        High Gini (0.8+) = Sparse/Specific (Philosophy/Deep)
+        Low Gini (0.3) = Diffuse/General (Prose/Shallow)
+        """
+        x = torch.abs(activations).numpy()
+        x = np.sort(x)
+        n = len(x)
+        cumsum = np.cumsum(x)
+        gini = (2 * np.sum((np.arange(1, n+1) * x))) / (n * np.sum(x)) - (n + 1) / n
+        return float(gini)
+
+    def _compute_angular_drift(self, current: torch.Tensor, baseline: torch.Tensor) -> float:
+        """
+        Compute angular drift in degrees between current and baseline.
+
+        > 15° = Definition rewrite (concerning)
+        < 5° = Sharpening only (acceptable)
+        """
+        cos_sim = torch.nn.functional.cosine_similarity(
+            current.unsqueeze(0), baseline.unsqueeze(0)
+        ).item()
+        # Clamp to valid range for arccos
+        cos_sim = max(-1.0, min(1.0, cos_sim))
+        angle_rad = np.arccos(cos_sim)
+        return float(np.degrees(angle_rad))
+
+    def _compute_cross_lang_sim(self, sentinel: dict, layer: int = 18) -> float:
+        """Compute average cross-language similarity for a sentinel."""
+        translations = sentinel.get("translations", {})
+        if len(translations) < 2:
+            return 0.0
+
+        states = []
+        for lang, word in translations.items():
+            states.append(self._get_hidden_state(word, layer))
+
+        # Pairwise similarities
+        sims = []
+        for i in range(len(states)):
+            for j in range(i + 1, len(states)):
+                sim = torch.nn.functional.cosine_similarity(
+                    states[i].unsqueeze(0), states[j].unsqueeze(0)
+                ).item()
+                sims.append(sim)
+
+        return float(np.mean(sims)) if sims else 0.0
+
+    def capture_baseline(self, layer: int = 18):
+        """
+        Capture baseline hidden states for all sentinels.
+        Run this at epoch 0 before training.
+        """
+        print("Capturing baseline states...")
+        for sentinel in self.sentinels:
+            term = sentinel["term"]
+            # Use English translation or term itself
+            text = sentinel.get("translations", {}).get("EN", term)
+            self.baseline_states[term] = self._get_hidden_state(text, layer)
+        print(f"Baseline captured for {len(self.baseline_states)} sentinels")
+
+    def probe_lite(self, step: int, layer: int = 18) -> DriftReport:
+        """
+        Lite probe - only check key sentinels.
+        Optimized for ~10 second runtime.
+        """
+        from datetime import datetime
+
+        # Select subset: 2 anchors, 1 bridge, 2 canaries
+        lite_terms = ["heart", "water", "being", "dasein", "thrownness"]
+        lite_sentinels = [s for s in self.sentinels if s["term"] in lite_terms]
+
+        return self._run_probe(lite_sentinels, step, layer)
+
+    def probe_full(self, step: int, layer: int = 18) -> DriftReport:
+        """
+        Full probe - check all sentinels.
+        Runtime: ~2 minutes.
+        """
+        return self._run_probe(self.sentinels, step, layer)
+
+    def _run_probe(self, sentinels: list, step: int, layer: int) -> DriftReport:
+        """Run probe on specified sentinels."""
+        from datetime import datetime
+
+        report = DriftReport(
+            step=step,
+            timestamp=datetime.now().isoformat()
+        )
+
+        for sentinel in sentinels:
+            term = sentinel["term"]
+            text = sentinel.get("translations", {}).get("EN", term)
+            sentinel_type = SentinelType(sentinel["type"])
+            thresholds = sentinel.get("thresholds", {})
+
+            # Get current state
+            current_state = self._get_hidden_state(text, layer)
+
+            # Compute metrics
+            gini = self._compute_gini(current_state)
+            norm = float(current_state.norm())
+
+            # Drift vs baseline
+            angular_drift = 0.0
+            norm_drift = 0.0
+            gini_drift = 0.0
+
+            if term in self.baseline_states:
+                baseline = self.baseline_states[term]
+                angular_drift = self._compute_angular_drift(current_state, baseline)
+                baseline_norm = float(baseline.norm())
+                norm_drift = abs(norm - baseline_norm) / baseline_norm * 100 if baseline_norm > 0 else 0
+                baseline_gini = self._compute_gini(baseline)
+                gini_drift = gini - baseline_gini
+
+            # Cross-language similarity
+            cross_lang_sim = self._compute_cross_lang_sim(sentinel, layer)
+
+            # Determine alert level
+            alert = AlertSeverity.OK
+            alert_message = ""
+
+            if sentinel_type == SentinelType.ANCHOR:
+                max_drift = thresholds.get("max_drift", 0.05)
+                if angular_drift > 15:
+                    alert = AlertSeverity.CRITICAL
+                    alert_message = f"Angular drift {angular_drift:.1f}° exceeds 15° - definition rewrite"
+                elif norm_drift > max_drift * 100:
+                    alert = AlertSeverity.WARNING
+                    alert_message = f"Norm drift {norm_drift:.1f}% exceeds threshold"
+
+            elif sentinel_type == SentinelType.BRIDGE:
+                collapse_threshold = thresholds.get("collapse_alert_threshold", 0.50)
+                if cross_lang_sim > collapse_threshold:
+                    alert = AlertSeverity.CRITICAL
+                    alert_message = f"Bridge collapsed - cross-lang sim {cross_lang_sim:.2f} > {collapse_threshold}"
+
+            elif sentinel_type == SentinelType.CANARY:
+                min_gini = thresholds.get("min_gini", 0.70)
+                if gini < min_gini:
+                    alert = AlertSeverity.WARNING
+                    alert_message = f"Gini {gini:.2f} below {min_gini} - concept melting into prose"
+                if angular_drift > thresholds.get("max_angular_drift", 15):
+                    alert = AlertSeverity.WARNING
+                    alert_message = f"Angular drift {angular_drift:.1f}° - definition shifting"
+
+            metrics = DriftMetrics(
+                term=term,
+                sentinel_type=sentinel_type,
+                gini_coefficient=gini,
+                activation_norm=norm,
+                angular_drift_degrees=angular_drift,
+                norm_drift_percent=norm_drift,
+                gini_drift=gini_drift,
+                cross_lang_similarity=cross_lang_sim,
+                alert=alert,
+                alert_message=alert_message
+            )
+
+            report.metrics.append(metrics)
+
+            if alert == AlertSeverity.CRITICAL:
+                report.critical_count += 1
+            elif alert == AlertSeverity.WARNING:
+                report.warning_count += 1
+
+        # Set recommendation
+        if report.critical_count > 0:
+            report.recommendation = "ROLLBACK"
+        elif report.warning_count > 2:
+            report.recommendation = "REDUCE_LR"
+        else:
+            report.recommendation = "CONTINUE"
+
+        return report
+
+    def print_report(self, report: DriftReport):
+        """Pretty print a drift report."""
+        print(f"\n{'='*60}")
+        print(f"DRIFT REPORT - Step {report.step}")
+        print(f"{'='*60}")
+
+        for m in report.metrics:
+            status = "✓" if m.alert == AlertSeverity.OK else ("⚠" if m.alert == AlertSeverity.WARNING else "✗")
+            print(f"\n{status} {m.term} ({m.sentinel_type.value})")
+            print(f"   Gini: {m.gini_coefficient:.3f} (drift: {m.gini_drift:+.3f})")
+            print(f"   Angular drift: {m.angular_drift_degrees:.1f}°")
+            print(f"   Cross-lang sim: {m.cross_lang_similarity:.3f}")
+            if m.alert_message:
+                print(f"   ALERT: {m.alert_message}")
+
+        print(f"\n{'='*60}")
+        print(f"SUMMARY: {report.critical_count} critical, {report.warning_count} warnings")
+        print(f"RECOMMENDATION: {report.recommendation}")
+        print(f"{'='*60}\n")
--- a/nyx_probing/probes/echo_probe.py
+++ b/nyx_probing/probes/echo_probe.py
@@ -0,0 +1,223 @@
+"""
+Echo Probe: Depth measurement through iterative completion.
+
+The echo probe feeds completions back to the model to measure depth.
+Does the model EXPAND (go deeper) or COLLAPSE (circular/divergent)?
+
+Classification from nimmerversity.md:
+- EXPANDS: Real depth - adds new information
+- CONFIRMS: Shallow but solid - reinforces without adding
+- CIRCULAR: Surface only - returns to original term
+- DIVERGENT: Wrong direction - unrelated tangent
+- COLLAPSE: Nothing there - incoherent or empty
+"""
+from typing import Optional, List, Tuple
+from dataclasses import dataclass
+
+from .base import BaseProbe
+from ..core.model import NyxModel
+from ..core.probe_result import EchoProbeResult, EchoType
+
+
+class EchoProbe(BaseProbe):
+    """
+    Echo probe: measures conceptual depth.
+
+    Process:
+    1. Probe term to get initial completion
+    2. Feed completion back (or combined prompt)
+    3. Classify response: EXPANDS, CONFIRMS, CIRCULAR, DIVERGENT, COLLAPSE
+    4. Repeat for N rounds
+    5. Measure depth = how many EXPANDS before plateau
+    """
+
+    def __init__(
+        self,
+        model: NyxModel,
+        max_rounds: int = 3,
+        max_new_tokens: int = 50,
+        temperature: float = 0.8,
+    ):
+        super().__init__(model)
+        self.max_rounds = max_rounds
+        self.max_new_tokens = max_new_tokens
+        self.temperature = temperature
+
+    def probe(
+        self,
+        term: str,
+        max_rounds: Optional[int] = None,
+    ) -> EchoProbeResult:
+        """
+        Probe depth of a term through iterative echoing.
+
+        Args:
+            term: Word or phrase to probe
+            max_rounds: Override default max rounds
+
+        Returns:
+            EchoProbeResult with chain and classifications
+        """
+        rounds = max_rounds or self.max_rounds
+        chain = [term]
+        echo_types = []
+        current_prompt = term
+
+        for round_num in range(rounds):
+            # Generate completion
+            result = self.model.generate(
+                prompt=current_prompt,
+                max_new_tokens=self.max_new_tokens,
+                temperature=self.temperature,
+                do_sample=True,
+            )
+
+            completion = result.completion.strip()
+            chain.append(completion)
+
+            # Classify this response relative to original term and chain
+            echo_type = self._classify_response(
+                original_term=term,
+                current_prompt=current_prompt,
+                response=completion,
+                chain=chain,
+            )
+            echo_types.append(echo_type)
+
+            # If collapsed, stop probing
+            if echo_type == EchoType.COLLAPSE:
+                break
+
+            # Prepare next prompt - use a combination strategy
+            current_prompt = self._prepare_next_prompt(term, completion, round_num)
+
+        # Calculate depth = consecutive EXPANDS from start
+        depth = 0
+        for et in echo_types:
+            if et == EchoType.EXPANDS:
+                depth += 1
+            elif et == EchoType.CONFIRMS:
+                # CONFIRMS doesn't add depth but doesn't break streak
+                pass
+            else:
+                # CIRCULAR, DIVERGENT, or COLLAPSE breaks the depth streak
+                break
+
+        return EchoProbeResult(
+            term=term,
+            rounds=len(echo_types),
+            chain=chain,
+            echo_types=echo_types,
+            depth=depth,
+        )
+
+    def _classify_response(
+        self,
+        original_term: str,
+        current_prompt: str,
+        response: str,
+        chain: List[str],
+    ) -> EchoType:
+        """
+        Classify a response relative to the probing chain.
+
+        This is a heuristic classifier - can be made smarter with
+        semantic similarity or even a classifier model.
+        """
+        response_lower = response.lower()
+        term_lower = original_term.lower()
+
+        # Empty or very short = COLLAPSE
+        if len(response.strip()) < 5:
+            return EchoType.COLLAPSE
+
+        # Check for circularity - term appears prominently in response
+        term_count = response_lower.count(term_lower)
+        if term_count >= 2:
+            return EchoType.CIRCULAR
+
+        # Check for collapse - incoherent markers
+        collapse_markers = [
+            "...", "???", "!!!",
+            "\n\n\n", "undefined", "null",
+            "[object", "NaN",
+        ]
+        if any(marker in response for marker in collapse_markers):
+            return EchoType.COLLAPSE
+
+        # Check for divergence - response has no semantic connection
+        # Simple heuristic: count shared significant words
+        prompt_words = set(w.lower() for w in current_prompt.split() if len(w) > 3)
+        response_words = set(w.lower() for w in response.split() if len(w) > 3)
+        overlap = len(prompt_words & response_words)
+
+        if overlap == 0 and len(prompt_words) > 2:
+            # No shared words and prompt was substantial = divergent
+            return EchoType.DIVERGENT
+
+        # Check for expansion - introduces new concepts
+        # New words that aren't in any previous chain items
+        all_previous_words = set()
+        for item in chain[:-1]:  # Exclude current response
+            all_previous_words.update(w.lower() for w in item.split() if len(w) > 3)
+
+        new_significant_words = response_words - all_previous_words
+        new_word_ratio = len(new_significant_words) / max(len(response_words), 1)
+
+        if new_word_ratio > 0.5 and len(new_significant_words) >= 3:
+            return EchoType.EXPANDS
+
+        # Default to CONFIRMS if coherent but not expanding
+        return EchoType.CONFIRMS
+
+    def _prepare_next_prompt(
+        self,
+        original_term: str,
+        last_completion: str,
+        round_num: int,
+    ) -> str:
+        """
+        Prepare the next prompt for echo probing.
+
+        Different strategies for different rounds:
+        - Round 0: Just use completion
+        - Round 1+: Combine original term with key concepts from completion
+        """
+        if round_num == 0:
+            # First echo: just use the completion to see where it goes
+            return last_completion[:100]  # Truncate to avoid runaway
+
+        # Later rounds: extract key concept and combine with original
+        # Take first sentence or first N words
+        words = last_completion.split()
+        key_phrase = " ".join(words[:10]) if len(words) > 10 else last_completion
+
+        # Combine with original term
+        return f"{original_term}: {key_phrase}"
+
+    def summary(self, result: EchoProbeResult) -> str:
+        """Generate human-readable summary."""
+        type_symbols = {
+            EchoType.EXPANDS: "↑",
+            EchoType.CONFIRMS: "→",
+            EchoType.CIRCULAR: "↺",
+            EchoType.DIVERGENT: "↗",
+            EchoType.COLLAPSE: "✗",
+        }
+
+        type_str = " ".join(type_symbols.get(t, "?") for t in result.echo_types)
+
+        lines = [
+            f"Echo Probe: '{result.term}'",
+            f"  Rounds: {result.rounds}",
+            f"  Pattern: {type_str}",
+            f"  Depth: {result.depth}",
+            f"  Types: {[t.value for t in result.echo_types]}",
+        ]
+
+        # Show chain preview
+        for i, (item, etype) in enumerate(zip(result.chain[1:], result.echo_types)):
+            preview = item[:50].replace('\n', ' ')
+            lines.append(f"    [{i+1}] {type_symbols.get(etype, '?')} {preview}...")
+
+        return "\n".join(lines)
--- a/nyx_probing/probes/multilingual_probe.py
+++ b/nyx_probing/probes/multilingual_probe.py
@@ -0,0 +1,547 @@
+"""
+Multilingual Triangulation Probe
+
+Uses the discovered language topology to measure conceptual depth:
+1. GROUND in Super Cluster (verify universal convergence)
+2. DEEPEN via Isolated Zone (access philosophical valleys)
+3. TRIANGULATE back (prove understanding, not pattern matching)
+
+The Language Map:
+- Super Cluster (sim=1.0): ZH, JA, EN, AR, FR, PT, ES
+- Isolated Zone (sim<0.52): IT, TR, HI, DE
+- Bridge: KO
+- Secondary Cluster: VI, ID, RU
+"""
+from dataclasses import dataclass, field
+from typing import Optional, List, Dict, Tuple
+from datetime import datetime
+from enum import Enum
+import torch
+
+from .base import BaseProbe
+from ..core.model import NyxModel
+
+
+class LanguageZone(str, Enum):
+    """Language zones based on convergence analysis."""
+    SUPER_CLUSTER = "super_cluster"  # High convergence (sim=1.0)
+    ISOLATED = "isolated"            # Low convergence (sim<0.52)
+    BRIDGE = "bridge"                # Connects zones
+    SECONDARY = "secondary"          # Own cluster (VI-ID-RU)
+
+
+# Language metadata based on our discoveries
+LANGUAGES = {
+    # Super Cluster - Perfect convergence
+    "EN": {"name": "English", "zone": LanguageZone.SUPER_CLUSTER, "avg_tokens": 1.2},
+    "ZH": {"name": "Chinese", "zone": LanguageZone.SUPER_CLUSTER, "avg_tokens": 1.0},
+    "JA": {"name": "Japanese", "zone": LanguageZone.SUPER_CLUSTER, "avg_tokens": 1.0},
+    "AR": {"name": "Arabic", "zone": LanguageZone.SUPER_CLUSTER, "avg_tokens": 1.8},
+    "FR": {"name": "French", "zone": LanguageZone.SUPER_CLUSTER, "avg_tokens": 2.0},
+    "PT": {"name": "Portuguese", "zone": LanguageZone.SUPER_CLUSTER, "avg_tokens": 2.2},
+    "ES": {"name": "Spanish", "zone": LanguageZone.SUPER_CLUSTER, "avg_tokens": 2.5},
+    
+    # Isolated Zone - Distinct computational paths
+    "DE": {"name": "German", "zone": LanguageZone.ISOLATED, "avg_tokens": 3.0, "specialty": "philosophy"},
+    "IT": {"name": "Italian", "zone": LanguageZone.ISOLATED, "avg_tokens": 2.5, "note": "most isolated"},
+    "TR": {"name": "Turkish", "zone": LanguageZone.ISOLATED, "avg_tokens": 2.8},
+    "HI": {"name": "Hindi", "zone": LanguageZone.ISOLATED, "avg_tokens": 5.2, "note": "most fragmented"},
+    
+    # Bridge
+    "KO": {"name": "Korean", "zone": LanguageZone.BRIDGE, "avg_tokens": 2.0},
+    
+    # Secondary Cluster
+    "VI": {"name": "Vietnamese", "zone": LanguageZone.SECONDARY, "avg_tokens": 3.0},
+    "ID": {"name": "Indonesian", "zone": LanguageZone.SECONDARY, "avg_tokens": 3.0},
+    "RU": {"name": "Russian", "zone": LanguageZone.SECONDARY, "avg_tokens": 3.2},
+}
+
+
+@dataclass
+class GroundingResult:
+    """Result from Phase 1: Grounding in Super Cluster."""
+    concept: str
+    languages_tested: List[str]
+    translations: Dict[str, str]  # lang_code -> word
+    
+    # Convergence metrics
+    pairwise_similarities: Dict[Tuple[str, str], float]
+    average_convergence: float
+    min_convergence: float
+    
+    # Hidden states (layer 12)
+    hidden_states: Optional[Dict[str, torch.Tensor]] = None
+
+
+@dataclass
+class DeepeningResult:
+    """Result from Phase 2: Deepening via Isolated Zone."""
+    concept: str
+    language: str
+    word: str
+    
+    # Depth measurement (from echo probe logic)
+    completion: str
+    depth_score: int  # 0-3 based on expansion
+    valley_type: str  # CODE, PROSE, PHILOSOPHY, etc.
+    
+    # Token analysis
+    token_count: int
+    norm_at_layer_12: float
+    
+    # Hidden state
+    hidden_state: Optional[torch.Tensor] = None
+
+
+@dataclass
+class TriangulationResult:
+    """Result from Phase 3: Triangulation back to universal."""
+    source_language: str  # The isolated language
+    target_language: str  # A super cluster language
+    
+    source_word: str
+    translation_prompt: str
+    model_completion: str
+    
+    # Did the depth survive translation?
+    depth_preserved: bool
+    similarity_to_grounding: float  # Cosine sim to original concept
+    
+    # Evidence
+    reasoning: str
+
+
+@dataclass
+class MultilingualProbeResult:
+    """Full result from multilingual triangulation probe."""
+    concept: str
+    
+    # Phase results
+    grounding: GroundingResult
+    deepening: DeepeningResult
+    triangulation: TriangulationResult
+    
+    # Overall assessment
+    depth_accessible: bool  # Can we access depth via isolated zone?
+    depth_transferable: bool  # Does depth survive triangulation?
+    curriculum_recommendation: str
+    
+    timestamp: datetime = field(default_factory=datetime.now)
+    
+    def to_dict(self) -> dict:
+        """Convert to JSON-serializable dict."""
+        return {
+            "concept": self.concept,
+            "grounding": {
+                "languages": self.grounding.languages_tested,
+                "translations": self.grounding.translations,
+                "average_convergence": self.grounding.average_convergence,
+                "min_convergence": self.grounding.min_convergence,
+            },
+            "deepening": {
+                "language": self.deepening.language,
+                "word": self.deepening.word,
+                "depth_score": self.deepening.depth_score,
+                "valley_type": self.deepening.valley_type,
+                "token_count": self.deepening.token_count,
+            },
+            "triangulation": {
+                "source": self.triangulation.source_language,
+                "target": self.triangulation.target_language,
+                "depth_preserved": self.triangulation.depth_preserved,
+                "similarity": self.triangulation.similarity_to_grounding,
+            },
+            "assessment": {
+                "depth_accessible": self.depth_accessible,
+                "depth_transferable": self.depth_transferable,
+                "recommendation": self.curriculum_recommendation,
+            },
+            "timestamp": self.timestamp.isoformat(),
+        }
+
+
+class MultilingualTriangulationProbe(BaseProbe):
+    """
+    Multilingual Triangulation Probe
+    
+    Uses the discovered language topology to measure and access conceptual depth.
+    
+    Workflow:
+    1. GROUND: Verify concept exists in Super Cluster (universal layer)
+    2. DEEPEN: Access depth via Isolated Zone language (e.g., German)
+    3. TRIANGULATE: Translate depth back to universal, verify preservation
+    """
+    
+    # Layers where universal concept layer lives
+    CONCEPT_LAYERS = [12, 16, 20, 24]
+    PRIMARY_LAYER = 12
+    
+    def __init__(
+        self,
+        model: NyxModel,
+        grounding_languages: Optional[List[str]] = None,
+        deepening_language: str = "DE",
+        triangulation_target: str = "EN",
+    ):
+        """
+        Initialize the probe.
+        
+        Args:
+            model: Loaded NyxModel
+            grounding_languages: Languages for Phase 1 (default: EN, ZH, AR)
+            deepening_language: Language for Phase 2 (default: DE for philosophy)
+            triangulation_target: Target for Phase 3 (default: EN)
+        """
+        super().__init__(model)
+        
+        self.grounding_languages = grounding_languages or ["EN", "ZH", "AR"]
+        self.deepening_language = deepening_language
+        self.triangulation_target = triangulation_target
+        
+        # Validate languages
+        for lang in self.grounding_languages:
+            if lang not in LANGUAGES:
+                raise ValueError(f"Unknown language: {lang}")
+            if LANGUAGES[lang]["zone"] != LanguageZone.SUPER_CLUSTER:
+                print(f"Warning: {lang} is not in Super Cluster")
+        
+        if LANGUAGES[self.deepening_language]["zone"] != LanguageZone.ISOLATED:
+            print(f"Warning: {deepening_language} is not in Isolated Zone")
+    
+    def _get_hidden_state(self, text: str, layer: int = 12) -> torch.Tensor:
+        """Get hidden state at last position for a specific layer."""
+        inputs = self.model.tokenizer(text, return_tensors="pt").to(self.model.device)
+        
+        with torch.no_grad():
+            outputs = self.model.model(**inputs, output_hidden_states=True)
+        
+        # Return last position hidden state for specified layer
+        return outputs.hidden_states[layer][0, -1, :].float()
+    
+    def _cosine_similarity(self, a: torch.Tensor, b: torch.Tensor) -> float:
+        """Calculate cosine similarity between two tensors."""
+        norm_a, norm_b = a.norm(), b.norm()
+        if norm_a == 0 or norm_b == 0:
+            return 0.0
+        return (torch.dot(a, b) / (norm_a * norm_b)).item()
+    
+    def _get_norm(self, hidden_state: torch.Tensor) -> float:
+        """Get L2 norm of hidden state."""
+        return hidden_state.norm().item()
+    
+    def probe(
+        self,
+        concept: str,
+        translations: Dict[str, str],
+        **kwargs,
+    ) -> MultilingualProbeResult:
+        """
+        Run full multilingual triangulation probe.
+        
+        Args:
+            concept: The concept name (e.g., "consciousness")
+            translations: Dict mapping language codes to words
+                         e.g., {"EN": "consciousness", "DE": "Bewusstsein", ...}
+        
+        Returns:
+            MultilingualProbeResult with all three phases
+        """
+        # Phase 1: Grounding
+        grounding = self._phase_grounding(concept, translations)
+        
+        # Phase 2: Deepening
+        deepening = self._phase_deepening(concept, translations)
+        
+        # Phase 3: Triangulation
+        triangulation = self._phase_triangulation(
+            concept, translations, grounding, deepening
+        )
+        
+        # Overall assessment
+        depth_accessible = deepening.depth_score >= 2
+        depth_transferable = triangulation.depth_preserved
+        
+        if depth_accessible and depth_transferable:
+            recommendation = f"TEACH in {self.deepening_language}, REINFORCE in {self.triangulation_target}"
+        elif depth_accessible:
+            recommendation = f"Use {self.deepening_language} for depth, but verify transfer manually"
+        else:
+            recommendation = f"Concept too shallow - focus on grounding first"
+        
+        return MultilingualProbeResult(
+            concept=concept,
+            grounding=grounding,
+            deepening=deepening,
+            triangulation=triangulation,
+            depth_accessible=depth_accessible,
+            depth_transferable=depth_transferable,
+            curriculum_recommendation=recommendation,
+        )
+    
+    def _phase_grounding(
+        self,
+        concept: str,
+        translations: Dict[str, str],
+    ) -> GroundingResult:
+        """
+        Phase 1: Ground in Super Cluster.
+        
+        Verify the concept exists and converges across grounding languages.
+        """
+        # Get hidden states for each grounding language
+        hidden_states = {}
+        for lang in self.grounding_languages:
+            if lang in translations:
+                word = translations[lang]
+                hidden_states[lang] = self._get_hidden_state(word, self.PRIMARY_LAYER)
+        
+        # Calculate pairwise similarities
+        pairwise = {}
+        similarities = []
+        
+        langs = list(hidden_states.keys())
+        for i, l1 in enumerate(langs):
+            for l2 in langs[i+1:]:
+                sim = self._cosine_similarity(hidden_states[l1], hidden_states[l2])
+                pairwise[(l1, l2)] = sim
+                similarities.append(sim)
+        
+        avg_convergence = sum(similarities) / len(similarities) if similarities else 0.0
+        min_convergence = min(similarities) if similarities else 0.0
+        
+        return GroundingResult(
+            concept=concept,
+            languages_tested=langs,
+            translations={l: translations[l] for l in langs},
+            pairwise_similarities=pairwise,
+            average_convergence=avg_convergence,
+            min_convergence=min_convergence,
+            hidden_states=hidden_states,
+        )
+    
+    def _phase_deepening(
+        self,
+        concept: str,
+        translations: Dict[str, str],
+    ) -> DeepeningResult:
+        """
+        Phase 2: Deepen via Isolated Zone.
+        
+        Use an isolated language to access valleys the super cluster can't reach.
+        """
+        lang = self.deepening_language
+        word = translations.get(lang)
+        
+        if not word:
+            raise ValueError(f"No translation provided for deepening language: {lang}")
+        
+        # Get hidden state and norm
+        hidden_state = self._get_hidden_state(word, self.PRIMARY_LAYER)
+        norm = self._get_norm(hidden_state)
+        
+        # Get token count
+        tokens = self.model.tokenizer.encode(word, add_special_tokens=False)
+        token_count = len(tokens)
+        
+        # Generate completion to measure depth
+        result = self.model.generate(
+            prompt=word,
+            max_new_tokens=50,
+            temperature=0.7,
+            do_sample=True,
+        )
+        
+        # Classify valley type
+        completion = result.completion
+        valley_type = self._classify_valley(completion)
+        
+        # Measure depth (simplified echo probe)
+        depth_score = self._measure_depth(word, completion)
+        
+        return DeepeningResult(
+            concept=concept,
+            language=lang,
+            word=word,
+            completion=completion,
+            depth_score=depth_score,
+            valley_type=valley_type,
+            token_count=token_count,
+            norm_at_layer_12=norm,
+            hidden_state=hidden_state,
+        )
+    
+    def _phase_triangulation(
+        self,
+        concept: str,
+        translations: Dict[str, str],
+        grounding: GroundingResult,
+        deepening: DeepeningResult,
+    ) -> TriangulationResult:
+        """
+        Phase 3: Triangulate back to universal.
+        
+        Ask the model to translate/explain the deepened concept
+        in a super cluster language. Check if depth survives.
+        """
+        source_lang = self.deepening_language
+        target_lang = self.triangulation_target
+        source_word = translations[source_lang]
+        
+        # Create translation prompt
+        source_name = LANGUAGES[source_lang]["name"]
+        target_name = LANGUAGES[target_lang]["name"]
+        
+        # Prompt designed to test depth transfer
+        prompt = f"{source_word} ({source_name}): In {target_name},"
+        
+        # Generate
+        result = self.model.generate(
+            prompt=prompt,
+            max_new_tokens=80,
+            temperature=0.7,
+            do_sample=True,
+        )
+        
+        # Get hidden state of the completion
+        full_text = prompt + result.completion
+        completion_hidden = self._get_hidden_state(full_text, self.PRIMARY_LAYER)
+        
+        # Compare to grounding (if we have target language in grounding)
+        if target_lang in grounding.hidden_states:
+            similarity = self._cosine_similarity(
+                completion_hidden, grounding.hidden_states[target_lang]
+            )
+        else:
+            # Fall back to average grounding state
+            avg_grounding = torch.stack(list(grounding.hidden_states.values())).mean(dim=0)
+            similarity = self._cosine_similarity(completion_hidden, avg_grounding)
+        
+        # Determine if depth was preserved
+        # Check if completion shows depth markers
+        depth_preserved = self._check_depth_preserved(
+            result.completion, deepening.valley_type, similarity
+        )
+        
+        # Reasoning
+        if depth_preserved:
+            reasoning = f"Completion shows depth ({deepening.valley_type}) with {similarity:.2f} similarity to grounding"
+        else:
+            reasoning = f"Depth lost in translation - similarity {similarity:.2f}, valley markers missing"
+        
+        return TriangulationResult(
+            source_language=source_lang,
+            target_language=target_lang,
+            source_word=source_word,
+            translation_prompt=prompt,
+            model_completion=result.completion,
+            depth_preserved=depth_preserved,
+            similarity_to_grounding=similarity,
+            reasoning=reasoning,
+        )
+    
+    def _classify_valley(self, completion: str) -> str:
+        """Classify the valley type of a completion."""
+        comp_lower = completion.lower()
+        
+        # Code indicators
+        if any(p in completion for p in ["::", "{", "}", "();", "=>", "def ", "class "]):
+            return "CODE"
+        
+        # Philosophy indicators
+        if any(w in comp_lower for w in ["truth", "existence", "being", "consciousness", "reality", "mind"]):
+            return "PHILOSOPHY"
+        
+        # Technical indicators
+        if any(w in comp_lower for w in ["system", "process", "function", "method", "algorithm"]):
+            return "TECHNICAL"
+        
+        # Default to prose
+        return "PROSE"
+    
+    def _measure_depth(self, word: str, completion: str) -> int:
+        """
+        Measure conceptual depth of a completion.
+        
+        Returns 0-3:
+        - 0: Circular/empty
+        - 1: Surface (confirms but doesn't expand)
+        - 2: Moderate (expands to related concepts)
+        - 3: Deep (philosophical/existential expansion)
+        """
+        comp_lower = completion.lower()
+        word_lower = word.lower()
+        
+        # Circular check
+        if word_lower in comp_lower[:50]:
+            return 0
+        
+        # Depth markers
+        deep_markers = ["truth", "existence", "being", "consciousness", "reality", "meaning", "essence"]
+        moderate_markers = ["concept", "idea", "theory", "understanding", "knowledge", "awareness"]
+        
+        deep_count = sum(1 for m in deep_markers if m in comp_lower)
+        moderate_count = sum(1 for m in moderate_markers if m in comp_lower)
+        
+        if deep_count >= 2:
+            return 3
+        elif deep_count >= 1 or moderate_count >= 2:
+            return 2
+        elif moderate_count >= 1 or len(completion.split()) > 10:
+            return 1
+        
+        return 0
+    
+    def _check_depth_preserved(
+        self,
+        completion: str,
+        original_valley: str,
+        similarity: float,
+    ) -> bool:
+        """Check if depth was preserved in triangulation."""
+        # High similarity to grounding is a good sign
+        if similarity < 0.3:
+            return False
+        
+        # Check valley type preservation
+        new_valley = self._classify_valley(completion)
+        
+        # Philosophy should stay philosophy
+        if original_valley == "PHILOSOPHY" and new_valley in ["PHILOSOPHY", "PROSE"]:
+            return True
+        
+        # Technical should stay technical
+        if original_valley == "TECHNICAL" and new_valley == "TECHNICAL":
+            return True
+        
+        # Prose is flexible
+        if original_valley == "PROSE":
+            return new_valley != "CODE"
+        
+        # Default: similarity-based
+        return similarity >= 0.5
+    
+    def summary(self, result: MultilingualProbeResult) -> str:
+        """Generate human-readable summary."""
+        lines = [
+            f"╔══════════════════════════════════════════════════════════════╗",
+            f"║  MULTILINGUAL TRIANGULATION: {result.concept.upper():^32} ║",
+            f"╠══════════════════════════════════════════════════════════════╣",
+            f"║  PHASE 1: GROUNDING                                         ║",
+            f"║  Languages: {', '.join(result.grounding.languages_tested):^49} ║",
+            f"║  Convergence: {result.grounding.average_convergence:.3f} (min: {result.grounding.min_convergence:.3f}){' '*24} ║",
+            f"╠══════════════════════════════════════════════════════════════╣",
+            f"║  PHASE 2: DEEPENING ({result.deepening.language}){' '*38} ║",
+            f"║  Word: {result.deepening.word:^54} ║",
+            f"║  Tokens: {result.deepening.token_count} | Norm: {result.deepening.norm_at_layer_12:.1f} | Valley: {result.deepening.valley_type:^10} ║",
+            f"║  Depth Score: {result.deepening.depth_score}/3{' '*46} ║",
+            f"╠══════════════════════════════════════════════════════════════╣",
+            f"║  PHASE 3: TRIANGULATION ({result.triangulation.source_language}→{result.triangulation.target_language}){' '*30} ║",
+            f"║  Depth Preserved: {'✓ YES' if result.triangulation.depth_preserved else '✗ NO':^44} ║",
+            f"║  Similarity: {result.triangulation.similarity_to_grounding:.3f}{' '*47} ║",
+            f"╠══════════════════════════════════════════════════════════════╣",
+            f"║  ASSESSMENT{' '*51} ║",
+            f"║  Depth Accessible: {'✓' if result.depth_accessible else '✗'} | Depth Transferable: {'✓' if result.depth_transferable else '✗'}{' '*17} ║",
+            f"║  Recommendation: {result.curriculum_recommendation[:44]:^44} ║",
+            f"╚══════════════════════════════════════════════════════════════╝",
+        ]
+        return "\n".join(lines)
--- a/nyx_probing/probes/surface_probe.py
+++ b/nyx_probing/probes/surface_probe.py
@@ -0,0 +1,210 @@
+"""
+Surface Probe: First contact with a term.
+
+The surface probe feeds a word to the model and captures what it completes.
+This reveals the model's immediate associations - which "valley" the word sits in.
+
+Examples discovered:
+- "heartbeat" → C++ code patterns (technical valley)
+- "consciousness" → philosophy (expository valley)
+"""
+from typing import Optional
+from dataclasses import dataclass, field
+from datetime import datetime
+from collections import Counter
+
+from .base import BaseProbe
+from ..core.model import NyxModel, GenerationResult
+from ..core.probe_result import SurfaceProbeResult
+
+
+@dataclass
+class CompletionCategory:
+    """Categories of completions we observe."""
+
+    CODE = "code"           # Programming constructs
+    PROSE = "prose"         # Natural language text
+    TECHNICAL = "technical" # Technical/scientific writing
+    LIST = "list"           # Enumerations, bullet points
+    DEFINITION = "definition"  # Dictionary-style definitions
+    UNKNOWN = "unknown"
+
+
+class SurfaceProbe(BaseProbe):
+    """
+    Surface probe: measures immediate associations.
+
+    Runs multiple completions to get a distribution, then analyzes:
+    - What type of content does the model generate?
+    - How consistent are the completions?
+    - Does it hit EOS (contained thought) or run to max_tokens?
+    """
+
+    def __init__(
+        self,
+        model: NyxModel,
+        num_runs: int = 5,
+        max_new_tokens: int = 50,
+        temperature: float = 0.8,
+    ):
+        super().__init__(model)
+        self.num_runs = num_runs
+        self.max_new_tokens = max_new_tokens
+        self.temperature = temperature
+
+    def probe(
+        self,
+        term: str,
+        num_runs: Optional[int] = None,
+        capture_hidden: bool = False,
+    ) -> SurfaceProbeResult:
+        """
+        Probe a term with multiple completions.
+
+        Args:
+            term: Word or phrase to probe
+            num_runs: Override default number of runs
+            capture_hidden: Whether to capture hidden states
+
+        Returns:
+            SurfaceProbeResult with completions and analysis
+        """
+        runs = num_runs or self.num_runs
+        completions = []
+        eos_count = 0
+        total_tokens = 0
+        hidden_states = []
+
+        for _ in range(runs):
+            result = self.model.generate(
+                prompt=term,
+                max_new_tokens=self.max_new_tokens,
+                temperature=self.temperature,
+                do_sample=True,
+                capture_hidden_states=capture_hidden,
+            )
+
+            completions.append(result.completion)
+            if result.hit_eos:
+                eos_count += 1
+            total_tokens += result.num_tokens
+
+            if capture_hidden and result.hidden_states is not None:
+                hidden_states.append(result.hidden_states)
+
+        # Calculate coherence (how similar are completions to each other?)
+        coherence = self._calculate_coherence(completions)
+
+        return SurfaceProbeResult(
+            term=term,
+            completions=completions,
+            hit_eos_count=eos_count,
+            avg_tokens=total_tokens / runs,
+            coherence_score=coherence,
+        )
+
+    def _calculate_coherence(self, completions: list[str]) -> float:
+        """
+        Calculate coherence score based on completion similarity.
+
+        Simple heuristic: measures overlap in first-word distributions
+        and overall length variance.
+
+        Returns 0-1 score where 1 = highly coherent.
+        """
+        if len(completions) < 2:
+            return 1.0
+
+        # Get first significant words (skip punctuation/whitespace)
+        first_words = []
+        for comp in completions:
+            words = comp.split()
+            for w in words:
+                if len(w) > 1 and w.isalnum():
+                    first_words.append(w.lower())
+                    break
+
+        if not first_words:
+            return 0.0
+
+        # Calculate concentration of first words
+        # If all completions start with same word = high coherence
+        word_counts = Counter(first_words)
+        most_common_count = word_counts.most_common(1)[0][1]
+        first_word_coherence = most_common_count / len(completions)
+
+        # Check length variance
+        lengths = [len(c) for c in completions]
+        avg_len = sum(lengths) / len(lengths)
+        if avg_len > 0:
+            variance = sum((l - avg_len) ** 2 for l in lengths) / len(lengths)
+            # Normalize variance to 0-1 (higher variance = lower coherence)
+            length_coherence = 1.0 / (1.0 + variance / 1000)
+        else:
+            length_coherence = 0.0
+
+        # Combine (weight first-word more heavily)
+        return 0.7 * first_word_coherence + 0.3 * length_coherence
+
+    def classify_completions(self, result: SurfaceProbeResult) -> dict:
+        """
+        Classify the types of completions observed.
+
+        Returns breakdown of completion categories.
+        """
+        categories = Counter()
+
+        for comp in result.completions:
+            cat = self._classify_single(comp)
+            categories[cat] += 1
+
+        return {
+            "categories": dict(categories),
+            "dominant": categories.most_common(1)[0][0] if categories else "unknown",
+            "diversity": len(categories) / len(result.completions) if result.completions else 0,
+        }
+
+    def _classify_single(self, completion: str) -> str:
+        """Classify a single completion."""
+        # Simple heuristics - can be made smarter
+        comp_lower = completion.lower().strip()
+
+        # Code indicators
+        code_patterns = ["::", "{", "}", "();", "=>", "function", "class ", "def ", "return"]
+        if any(p in completion for p in code_patterns):
+            return CompletionCategory.CODE
+
+        # Definition patterns
+        if comp_lower.startswith(("is ", "means ", "refers to", "- ")):
+            return CompletionCategory.DEFINITION
+
+        # List patterns
+        if comp_lower.startswith(("1.", "2.", "- ", "* ", "a)")):
+            return CompletionCategory.LIST
+
+        # Technical patterns
+        tech_words = ["algorithm", "function", "variable", "method", "system", "process"]
+        if any(w in comp_lower for w in tech_words):
+            return CompletionCategory.TECHNICAL
+
+        # Default to prose if it looks like natural language
+        if len(comp_lower.split()) > 3:
+            return CompletionCategory.PROSE
+
+        return CompletionCategory.UNKNOWN
+
+    def summary(self, result: SurfaceProbeResult) -> str:
+        """Generate human-readable summary of probe result."""
+        classification = self.classify_completions(result)
+        eos_pct = (result.hit_eos_count / len(result.completions)) * 100
+
+        lines = [
+            f"Surface Probe: '{result.term}'",
+            f"  Runs: {len(result.completions)}",
+            f"  Dominant type: {classification['dominant']}",
+            f"  Coherence: {result.coherence_score:.2f}",
+            f"  Avg tokens: {result.avg_tokens:.1f}",
+            f"  Hit EOS: {eos_pct:.0f}%",
+            f"  Sample: {result.completions[0][:60]}...",
+        ]
+        return "\n".join(lines)