feat: complete Phase 1 - vocabulary expansion & DriftProbe infrastructure
- CLI: nyx-probe scan with --summary/--delta/--full flags - DriftProbe: training safety with Gini coefficient + Angular Drift - Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical) - Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system Key findings: - German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse) - Super Cluster validated: heart cross-lang sim = 1.000 - Isolated Zone confirmed: being EN↔DE sim = 0.195 - Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
223
nyx_probing/probes/echo_probe.py
Normal file
223
nyx_probing/probes/echo_probe.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""
|
||||
Echo Probe: Depth measurement through iterative completion.
|
||||
|
||||
The echo probe feeds completions back to the model to measure depth.
|
||||
Does the model EXPAND (go deeper) or COLLAPSE (circular/divergent)?
|
||||
|
||||
Classification from nimmerversity.md:
|
||||
- EXPANDS: Real depth - adds new information
|
||||
- CONFIRMS: Shallow but solid - reinforces without adding
|
||||
- CIRCULAR: Surface only - returns to original term
|
||||
- DIVERGENT: Wrong direction - unrelated tangent
|
||||
- COLLAPSE: Nothing there - incoherent or empty
|
||||
"""
|
||||
from typing import Optional, List, Tuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .base import BaseProbe
|
||||
from ..core.model import NyxModel
|
||||
from ..core.probe_result import EchoProbeResult, EchoType
|
||||
|
||||
|
||||
class EchoProbe(BaseProbe):
|
||||
"""
|
||||
Echo probe: measures conceptual depth.
|
||||
|
||||
Process:
|
||||
1. Probe term to get initial completion
|
||||
2. Feed completion back (or combined prompt)
|
||||
3. Classify response: EXPANDS, CONFIRMS, CIRCULAR, DIVERGENT, COLLAPSE
|
||||
4. Repeat for N rounds
|
||||
5. Measure depth = how many EXPANDS before plateau
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: NyxModel,
|
||||
max_rounds: int = 3,
|
||||
max_new_tokens: int = 50,
|
||||
temperature: float = 0.8,
|
||||
):
|
||||
super().__init__(model)
|
||||
self.max_rounds = max_rounds
|
||||
self.max_new_tokens = max_new_tokens
|
||||
self.temperature = temperature
|
||||
|
||||
def probe(
|
||||
self,
|
||||
term: str,
|
||||
max_rounds: Optional[int] = None,
|
||||
) -> EchoProbeResult:
|
||||
"""
|
||||
Probe depth of a term through iterative echoing.
|
||||
|
||||
Args:
|
||||
term: Word or phrase to probe
|
||||
max_rounds: Override default max rounds
|
||||
|
||||
Returns:
|
||||
EchoProbeResult with chain and classifications
|
||||
"""
|
||||
rounds = max_rounds or self.max_rounds
|
||||
chain = [term]
|
||||
echo_types = []
|
||||
current_prompt = term
|
||||
|
||||
for round_num in range(rounds):
|
||||
# Generate completion
|
||||
result = self.model.generate(
|
||||
prompt=current_prompt,
|
||||
max_new_tokens=self.max_new_tokens,
|
||||
temperature=self.temperature,
|
||||
do_sample=True,
|
||||
)
|
||||
|
||||
completion = result.completion.strip()
|
||||
chain.append(completion)
|
||||
|
||||
# Classify this response relative to original term and chain
|
||||
echo_type = self._classify_response(
|
||||
original_term=term,
|
||||
current_prompt=current_prompt,
|
||||
response=completion,
|
||||
chain=chain,
|
||||
)
|
||||
echo_types.append(echo_type)
|
||||
|
||||
# If collapsed, stop probing
|
||||
if echo_type == EchoType.COLLAPSE:
|
||||
break
|
||||
|
||||
# Prepare next prompt - use a combination strategy
|
||||
current_prompt = self._prepare_next_prompt(term, completion, round_num)
|
||||
|
||||
# Calculate depth = consecutive EXPANDS from start
|
||||
depth = 0
|
||||
for et in echo_types:
|
||||
if et == EchoType.EXPANDS:
|
||||
depth += 1
|
||||
elif et == EchoType.CONFIRMS:
|
||||
# CONFIRMS doesn't add depth but doesn't break streak
|
||||
pass
|
||||
else:
|
||||
# CIRCULAR, DIVERGENT, or COLLAPSE breaks the depth streak
|
||||
break
|
||||
|
||||
return EchoProbeResult(
|
||||
term=term,
|
||||
rounds=len(echo_types),
|
||||
chain=chain,
|
||||
echo_types=echo_types,
|
||||
depth=depth,
|
||||
)
|
||||
|
||||
def _classify_response(
|
||||
self,
|
||||
original_term: str,
|
||||
current_prompt: str,
|
||||
response: str,
|
||||
chain: List[str],
|
||||
) -> EchoType:
|
||||
"""
|
||||
Classify a response relative to the probing chain.
|
||||
|
||||
This is a heuristic classifier - can be made smarter with
|
||||
semantic similarity or even a classifier model.
|
||||
"""
|
||||
response_lower = response.lower()
|
||||
term_lower = original_term.lower()
|
||||
|
||||
# Empty or very short = COLLAPSE
|
||||
if len(response.strip()) < 5:
|
||||
return EchoType.COLLAPSE
|
||||
|
||||
# Check for circularity - term appears prominently in response
|
||||
term_count = response_lower.count(term_lower)
|
||||
if term_count >= 2:
|
||||
return EchoType.CIRCULAR
|
||||
|
||||
# Check for collapse - incoherent markers
|
||||
collapse_markers = [
|
||||
"...", "???", "!!!",
|
||||
"\n\n\n", "undefined", "null",
|
||||
"[object", "NaN",
|
||||
]
|
||||
if any(marker in response for marker in collapse_markers):
|
||||
return EchoType.COLLAPSE
|
||||
|
||||
# Check for divergence - response has no semantic connection
|
||||
# Simple heuristic: count shared significant words
|
||||
prompt_words = set(w.lower() for w in current_prompt.split() if len(w) > 3)
|
||||
response_words = set(w.lower() for w in response.split() if len(w) > 3)
|
||||
overlap = len(prompt_words & response_words)
|
||||
|
||||
if overlap == 0 and len(prompt_words) > 2:
|
||||
# No shared words and prompt was substantial = divergent
|
||||
return EchoType.DIVERGENT
|
||||
|
||||
# Check for expansion - introduces new concepts
|
||||
# New words that aren't in any previous chain items
|
||||
all_previous_words = set()
|
||||
for item in chain[:-1]: # Exclude current response
|
||||
all_previous_words.update(w.lower() for w in item.split() if len(w) > 3)
|
||||
|
||||
new_significant_words = response_words - all_previous_words
|
||||
new_word_ratio = len(new_significant_words) / max(len(response_words), 1)
|
||||
|
||||
if new_word_ratio > 0.5 and len(new_significant_words) >= 3:
|
||||
return EchoType.EXPANDS
|
||||
|
||||
# Default to CONFIRMS if coherent but not expanding
|
||||
return EchoType.CONFIRMS
|
||||
|
||||
def _prepare_next_prompt(
|
||||
self,
|
||||
original_term: str,
|
||||
last_completion: str,
|
||||
round_num: int,
|
||||
) -> str:
|
||||
"""
|
||||
Prepare the next prompt for echo probing.
|
||||
|
||||
Different strategies for different rounds:
|
||||
- Round 0: Just use completion
|
||||
- Round 1+: Combine original term with key concepts from completion
|
||||
"""
|
||||
if round_num == 0:
|
||||
# First echo: just use the completion to see where it goes
|
||||
return last_completion[:100] # Truncate to avoid runaway
|
||||
|
||||
# Later rounds: extract key concept and combine with original
|
||||
# Take first sentence or first N words
|
||||
words = last_completion.split()
|
||||
key_phrase = " ".join(words[:10]) if len(words) > 10 else last_completion
|
||||
|
||||
# Combine with original term
|
||||
return f"{original_term}: {key_phrase}"
|
||||
|
||||
def summary(self, result: EchoProbeResult) -> str:
|
||||
"""Generate human-readable summary."""
|
||||
type_symbols = {
|
||||
EchoType.EXPANDS: "↑",
|
||||
EchoType.CONFIRMS: "→",
|
||||
EchoType.CIRCULAR: "↺",
|
||||
EchoType.DIVERGENT: "↗",
|
||||
EchoType.COLLAPSE: "✗",
|
||||
}
|
||||
|
||||
type_str = " ".join(type_symbols.get(t, "?") for t in result.echo_types)
|
||||
|
||||
lines = [
|
||||
f"Echo Probe: '{result.term}'",
|
||||
f" Rounds: {result.rounds}",
|
||||
f" Pattern: {type_str}",
|
||||
f" Depth: {result.depth}",
|
||||
f" Types: {[t.value for t in result.echo_types]}",
|
||||
]
|
||||
|
||||
# Show chain preview
|
||||
for i, (item, etype) in enumerate(zip(result.chain[1:], result.echo_types)):
|
||||
preview = item[:50].replace('\n', ' ')
|
||||
lines.append(f" [{i+1}] {type_symbols.get(etype, '?')} {preview}...")
|
||||
|
||||
return "\n".join(lines)
|
||||
Reference in New Issue
Block a user