feat: complete Phase 1 - vocabulary expansion & DriftProbe infrastructure
- CLI: nyx-probe scan with --summary/--delta/--full flags - DriftProbe: training safety with Gini coefficient + Angular Drift - Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical) - Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system Key findings: - German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse) - Super Cluster validated: heart cross-lang sim = 1.000 - Isolated Zone confirmed: being EN↔DE sim = 0.195 - Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
221
nyx_probing/analysis/readiness_scorer.py
Normal file
221
nyx_probing/analysis/readiness_scorer.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""
|
||||
Readiness Scorer: Combines surface and echo probes into curriculum guidance.
|
||||
|
||||
Outputs:
|
||||
- HIGH: Ready for direct training / state machine
|
||||
- MEDIUM: Needs scaffolding or bridging concepts
|
||||
- LOW: Requires foundational work first
|
||||
"""
|
||||
from typing import Optional, List
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ..core.model import NyxModel
|
||||
from ..core.probe_result import (
|
||||
SurfaceProbeResult,
|
||||
EchoProbeResult,
|
||||
ReadinessResult,
|
||||
ReadinessLevel,
|
||||
EchoType,
|
||||
)
|
||||
from ..probes.surface_probe import SurfaceProbe, CompletionCategory
|
||||
from ..probes.echo_probe import EchoProbe
|
||||
|
||||
|
||||
# Recommended actions for each readiness level
|
||||
ACTIONS = {
|
||||
ReadinessLevel.HIGH: "state_machine", # Direct training
|
||||
ReadinessLevel.MEDIUM: "scaffolding", # Bridge concepts
|
||||
ReadinessLevel.LOW: "foundational", # Build from scratch
|
||||
}
|
||||
|
||||
|
||||
class ReadinessScorer:
|
||||
"""
|
||||
Combines surface + echo probes to assess curriculum readiness.
|
||||
|
||||
A term is ready for training when:
|
||||
1. Surface: Coherent associations (not scattered/random)
|
||||
2. Echo: Can expand beyond surface (depth > 0)
|
||||
3. Valley: In a productive valley (prose/philosophy, not just code)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: NyxModel,
|
||||
surface_runs: int = 3,
|
||||
echo_rounds: int = 3,
|
||||
max_new_tokens: int = 50,
|
||||
):
|
||||
self.model = model
|
||||
self.surface_probe = SurfaceProbe(
|
||||
model,
|
||||
num_runs=surface_runs,
|
||||
max_new_tokens=max_new_tokens,
|
||||
)
|
||||
self.echo_probe = EchoProbe(
|
||||
model,
|
||||
max_rounds=echo_rounds,
|
||||
max_new_tokens=max_new_tokens,
|
||||
)
|
||||
|
||||
def score(self, term: str) -> ReadinessResult:
|
||||
"""
|
||||
Assess readiness of a term for curriculum.
|
||||
|
||||
Args:
|
||||
term: Word or phrase to assess
|
||||
|
||||
Returns:
|
||||
ReadinessResult with level, action, and supporting evidence
|
||||
"""
|
||||
# Run both probes
|
||||
surface = self.surface_probe.probe(term)
|
||||
echo = self.echo_probe.probe(term)
|
||||
|
||||
# Classify valley from surface probe
|
||||
classification = self.surface_probe.classify_completions(surface)
|
||||
dominant_valley = classification['dominant']
|
||||
|
||||
# Calculate composite score
|
||||
level, reasoning = self._calculate_level(
|
||||
surface=surface,
|
||||
echo=echo,
|
||||
dominant_valley=dominant_valley,
|
||||
)
|
||||
|
||||
return ReadinessResult(
|
||||
term=term,
|
||||
level=level,
|
||||
action=ACTIONS[level],
|
||||
surface=surface,
|
||||
echo=echo,
|
||||
reasoning=reasoning,
|
||||
)
|
||||
|
||||
def _calculate_level(
|
||||
self,
|
||||
surface: SurfaceProbeResult,
|
||||
echo: EchoProbeResult,
|
||||
dominant_valley: str,
|
||||
) -> tuple[ReadinessLevel, str]:
|
||||
"""
|
||||
Calculate readiness level based on probe results.
|
||||
|
||||
Heuristics:
|
||||
- HIGH: depth >= 2 AND coherence >= 0.5 AND not pure code
|
||||
- MEDIUM: depth >= 1 OR (coherence >= 0.5 AND prose/philosophy)
|
||||
- LOW: everything else
|
||||
"""
|
||||
depth = echo.depth
|
||||
coherence = surface.coherence_score or 0.0
|
||||
eos_ratio = surface.hit_eos_count / len(surface.completions) if surface.completions else 0
|
||||
|
||||
# Count echo types
|
||||
expands = sum(1 for t in echo.echo_types if t == EchoType.EXPANDS)
|
||||
collapses = sum(1 for t in echo.echo_types if t == EchoType.COLLAPSE)
|
||||
circulars = sum(1 for t in echo.echo_types if t == EchoType.CIRCULAR)
|
||||
|
||||
# Build reasoning
|
||||
reasons = []
|
||||
|
||||
# HIGH: Good depth + coherence + productive valley
|
||||
if depth >= 2 and coherence >= 0.4:
|
||||
if dominant_valley not in [CompletionCategory.CODE]:
|
||||
reasons.append(f"depth={depth} (strong conceptual expansion)")
|
||||
reasons.append(f"coherence={coherence:.2f} (consistent associations)")
|
||||
reasons.append(f"valley={dominant_valley} (productive for training)")
|
||||
return ReadinessLevel.HIGH, "; ".join(reasons)
|
||||
|
||||
# HIGH: Exceptional depth even with lower coherence
|
||||
if depth >= 3:
|
||||
reasons.append(f"depth={depth} (exceptional expansion)")
|
||||
reasons.append(f"all {expands} echoes expand")
|
||||
return ReadinessLevel.HIGH, "; ".join(reasons)
|
||||
|
||||
# MEDIUM: Some depth or good coherence in prose
|
||||
if depth >= 1:
|
||||
reasons.append(f"depth={depth} (some expansion capability)")
|
||||
if dominant_valley in [CompletionCategory.PROSE, 'prose', 'definition']:
|
||||
reasons.append(f"valley={dominant_valley} (trainable with scaffolding)")
|
||||
return ReadinessLevel.MEDIUM, "; ".join(reasons)
|
||||
|
||||
if coherence >= 0.5 and dominant_valley not in [CompletionCategory.CODE, 'code']:
|
||||
reasons.append(f"coherence={coherence:.2f} (consistent surface)")
|
||||
reasons.append(f"valley={dominant_valley}")
|
||||
reasons.append("but limited depth - needs bridging concepts")
|
||||
return ReadinessLevel.MEDIUM, "; ".join(reasons)
|
||||
|
||||
# LOW: Trapped in code, circular, or incoherent
|
||||
if dominant_valley in [CompletionCategory.CODE, 'code']:
|
||||
reasons.append(f"valley=CODE (trapped in technical patterns)")
|
||||
if circulars >= 2:
|
||||
reasons.append(f"{circulars} circular echoes (surface-only knowledge)")
|
||||
if collapses >= 1:
|
||||
reasons.append(f"{collapses} collapses (unstable representations)")
|
||||
if coherence < 0.4:
|
||||
reasons.append(f"coherence={coherence:.2f} (scattered associations)")
|
||||
|
||||
return ReadinessLevel.LOW, "; ".join(reasons) if reasons else "insufficient depth and coherence"
|
||||
|
||||
def score_batch(self, terms: List[str]) -> List[ReadinessResult]:
|
||||
"""Score multiple terms."""
|
||||
return [self.score(term) for term in terms]
|
||||
|
||||
def summary(self, result: ReadinessResult) -> str:
|
||||
"""Generate human-readable summary."""
|
||||
symbols = {
|
||||
ReadinessLevel.HIGH: "🟢",
|
||||
ReadinessLevel.MEDIUM: "🟡",
|
||||
ReadinessLevel.LOW: "🔴",
|
||||
}
|
||||
|
||||
surface_summary = f"coherence={result.surface.coherence_score:.2f}" if result.surface else "N/A"
|
||||
echo_summary = f"depth={result.echo.depth}" if result.echo else "N/A"
|
||||
|
||||
lines = [
|
||||
f"{symbols[result.level]} {result.term}: {result.level.value}",
|
||||
f" Action: {result.action}",
|
||||
f" Surface: {surface_summary}",
|
||||
f" Echo: {echo_summary}",
|
||||
f" Reasoning: {result.reasoning}",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
def curriculum_report(self, results: List[ReadinessResult]) -> str:
|
||||
"""Generate curriculum planning report."""
|
||||
high = [r for r in results if r.level == ReadinessLevel.HIGH]
|
||||
medium = [r for r in results if r.level == ReadinessLevel.MEDIUM]
|
||||
low = [r for r in results if r.level == ReadinessLevel.LOW]
|
||||
|
||||
lines = [
|
||||
"=" * 60,
|
||||
"CURRICULUM READINESS REPORT",
|
||||
"=" * 60,
|
||||
"",
|
||||
f"🟢 HIGH ({len(high)} terms) - Ready for state machine:",
|
||||
]
|
||||
for r in high:
|
||||
lines.append(f" • {r.term}")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
f"🟡 MEDIUM ({len(medium)} terms) - Need scaffolding:",
|
||||
])
|
||||
for r in medium:
|
||||
lines.append(f" • {r.term}: {r.reasoning[:60]}...")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
f"🔴 LOW ({len(low)} terms) - Require foundational work:",
|
||||
])
|
||||
for r in low:
|
||||
lines.append(f" • {r.term}: {r.reasoning[:60]}...")
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"=" * 60,
|
||||
f"Summary: {len(high)}/{len(results)} ready, {len(medium)} scaffolding, {len(low)} foundational",
|
||||
"=" * 60,
|
||||
])
|
||||
|
||||
return "\n".join(lines)
|
||||
Reference in New Issue
Block a user