nyx-probing/nyx_probing/probes/echo_probe.py

"""
Echo Probe: Depth measurement through iterative completion.

The echo probe feeds completions back to the model to measure depth.
Does the model EXPAND (go deeper) or COLLAPSE (circular/divergent)?

Classification from nimmerversity.md:
- EXPANDS: Real depth - adds new information
- CONFIRMS: Shallow but solid - reinforces without adding
- CIRCULAR: Surface only - returns to original term
- DIVERGENT: Wrong direction - unrelated tangent
- COLLAPSE: Nothing there - incoherent or empty
"""
from typing import Optional, List, Tuple
from dataclasses import dataclass

from .base import BaseProbe
from ..core.model import NyxModel
from ..core.probe_result import EchoProbeResult, EchoType


class EchoProbe(BaseProbe):
    """
    Echo probe: measures conceptual depth.

    Process:
    1. Probe term to get initial completion
    2. Feed completion back (or combined prompt)
    3. Classify response: EXPANDS, CONFIRMS, CIRCULAR, DIVERGENT, COLLAPSE
    4. Repeat for N rounds
    5. Measure depth = how many EXPANDS before plateau
    """

    def __init__(
        self,
        model: NyxModel,
        max_rounds: int = 3,
        max_new_tokens: int = 50,
        temperature: float = 0.8,
    ):
        super().__init__(model)
        self.max_rounds = max_rounds
        self.max_new_tokens = max_new_tokens
        self.temperature = temperature

    def probe(
        self,
        term: str,
        max_rounds: Optional[int] = None,
    ) -> EchoProbeResult:
        """
        Probe depth of a term through iterative echoing.

        Args:
            term: Word or phrase to probe
            max_rounds: Override default max rounds

        Returns:
            EchoProbeResult with chain and classifications
        """
        rounds = max_rounds or self.max_rounds
        chain = [term]
        echo_types = []
        current_prompt = term

        for round_num in range(rounds):
            # Generate completion
            result = self.model.generate(
                prompt=current_prompt,
                max_new_tokens=self.max_new_tokens,
                temperature=self.temperature,
                do_sample=True,
            )

            completion = result.completion.strip()
            chain.append(completion)

            # Classify this response relative to original term and chain
            echo_type = self._classify_response(
                original_term=term,
                current_prompt=current_prompt,
                response=completion,
                chain=chain,
            )
            echo_types.append(echo_type)

            # If collapsed, stop probing
            if echo_type == EchoType.COLLAPSE:
                break

            # Prepare next prompt - use a combination strategy
            current_prompt = self._prepare_next_prompt(term, completion, round_num)

        # Calculate depth = consecutive EXPANDS from start
        depth = 0
        for et in echo_types:
            if et == EchoType.EXPANDS:
                depth += 1
            elif et == EchoType.CONFIRMS:
                # CONFIRMS doesn't add depth but doesn't break streak
                pass
            else:
                # CIRCULAR, DIVERGENT, or COLLAPSE breaks the depth streak
                break

        return EchoProbeResult(
            term=term,
            rounds=len(echo_types),
            chain=chain,
            echo_types=echo_types,
            depth=depth,
        )

    def _classify_response(
        self,
        original_term: str,
        current_prompt: str,
        response: str,
        chain: List[str],
    ) -> EchoType:
        """
        Classify a response relative to the probing chain.

        This is a heuristic classifier - can be made smarter with
        semantic similarity or even a classifier model.
        """
        response_lower = response.lower()
        term_lower = original_term.lower()

        # Empty or very short = COLLAPSE
        if len(response.strip()) < 5:
            return EchoType.COLLAPSE

        # Check for circularity - term appears prominently in response
        term_count = response_lower.count(term_lower)
        if term_count >= 2:
            return EchoType.CIRCULAR

        # Check for collapse - incoherent markers
        collapse_markers = [
            "...", "???", "!!!",
            "\n\n\n", "undefined", "null",
            "[object", "NaN",
        ]
        if any(marker in response for marker in collapse_markers):
            return EchoType.COLLAPSE

        # Check for divergence - response has no semantic connection
        # Simple heuristic: count shared significant words
        prompt_words = set(w.lower() for w in current_prompt.split() if len(w) > 3)
        response_words = set(w.lower() for w in response.split() if len(w) > 3)
        overlap = len(prompt_words & response_words)

        if overlap == 0 and len(prompt_words) > 2:
            # No shared words and prompt was substantial = divergent
            return EchoType.DIVERGENT

        # Check for expansion - introduces new concepts
        # New words that aren't in any previous chain items
        all_previous_words = set()
        for item in chain[:-1]:  # Exclude current response
            all_previous_words.update(w.lower() for w in item.split() if len(w) > 3)

        new_significant_words = response_words - all_previous_words
        new_word_ratio = len(new_significant_words) / max(len(response_words), 1)

        if new_word_ratio > 0.5 and len(new_significant_words) >= 3:
            return EchoType.EXPANDS

        # Default to CONFIRMS if coherent but not expanding
        return EchoType.CONFIRMS

    def _prepare_next_prompt(
        self,
        original_term: str,
        last_completion: str,
        round_num: int,
    ) -> str:
        """
        Prepare the next prompt for echo probing.

        Different strategies for different rounds:
        - Round 0: Just use completion
        - Round 1+: Combine original term with key concepts from completion
        """
        if round_num == 0:
            # First echo: just use the completion to see where it goes
            return last_completion[:100]  # Truncate to avoid runaway

        # Later rounds: extract key concept and combine with original
        # Take first sentence or first N words
        words = last_completion.split()
        key_phrase = " ".join(words[:10]) if len(words) > 10 else last_completion

        # Combine with original term
        return f"{original_term}: {key_phrase}"

    def summary(self, result: EchoProbeResult) -> str:
        """Generate human-readable summary."""
        type_symbols = {
            EchoType.EXPANDS: "↑",
            EchoType.CONFIRMS: "→",
            EchoType.CIRCULAR: "↺",
            EchoType.DIVERGENT: "↗",
            EchoType.COLLAPSE: "✗",
        }

        type_str = " ".join(type_symbols.get(t, "?") for t in result.echo_types)

        lines = [
            f"Echo Probe: '{result.term}'",
            f"  Rounds: {result.rounds}",
            f"  Pattern: {type_str}",
            f"  Depth: {result.depth}",
            f"  Types: {[t.value for t in result.echo_types]}",
        ]

        # Show chain preview
        for i, (item, etype) in enumerate(zip(result.chain[1:], result.echo_types)):
            preview = item[:50].replace('\n', ' ')
            lines.append(f"    [{i+1}] {type_symbols.get(etype, '?')} {preview}...")

        return "\n".join(lines)