Files
nyx-probing/nyx_probing/analysis/readiness_scorer.py
dafit f640dbdd65 feat: complete Phase 1 - vocabulary expansion & DriftProbe infrastructure
- CLI: nyx-probe scan with --summary/--delta/--full flags
- DriftProbe: training safety with Gini coefficient + Angular Drift
- Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical)
- Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system

Key findings:
- German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse)
- Super Cluster validated: heart cross-lang sim = 1.000
- Isolated Zone confirmed: being EN↔DE sim = 0.195
- Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-06 22:39:03 +01:00

222 lines
7.7 KiB
Python

"""
Readiness Scorer: Combines surface and echo probes into curriculum guidance.
Outputs:
- HIGH: Ready for direct training / state machine
- MEDIUM: Needs scaffolding or bridging concepts
- LOW: Requires foundational work first
"""
from typing import Optional, List
from dataclasses import dataclass
from ..core.model import NyxModel
from ..core.probe_result import (
SurfaceProbeResult,
EchoProbeResult,
ReadinessResult,
ReadinessLevel,
EchoType,
)
from ..probes.surface_probe import SurfaceProbe, CompletionCategory
from ..probes.echo_probe import EchoProbe
# Recommended actions for each readiness level
ACTIONS = {
ReadinessLevel.HIGH: "state_machine", # Direct training
ReadinessLevel.MEDIUM: "scaffolding", # Bridge concepts
ReadinessLevel.LOW: "foundational", # Build from scratch
}
class ReadinessScorer:
"""
Combines surface + echo probes to assess curriculum readiness.
A term is ready for training when:
1. Surface: Coherent associations (not scattered/random)
2. Echo: Can expand beyond surface (depth > 0)
3. Valley: In a productive valley (prose/philosophy, not just code)
"""
def __init__(
self,
model: NyxModel,
surface_runs: int = 3,
echo_rounds: int = 3,
max_new_tokens: int = 50,
):
self.model = model
self.surface_probe = SurfaceProbe(
model,
num_runs=surface_runs,
max_new_tokens=max_new_tokens,
)
self.echo_probe = EchoProbe(
model,
max_rounds=echo_rounds,
max_new_tokens=max_new_tokens,
)
def score(self, term: str) -> ReadinessResult:
"""
Assess readiness of a term for curriculum.
Args:
term: Word or phrase to assess
Returns:
ReadinessResult with level, action, and supporting evidence
"""
# Run both probes
surface = self.surface_probe.probe(term)
echo = self.echo_probe.probe(term)
# Classify valley from surface probe
classification = self.surface_probe.classify_completions(surface)
dominant_valley = classification['dominant']
# Calculate composite score
level, reasoning = self._calculate_level(
surface=surface,
echo=echo,
dominant_valley=dominant_valley,
)
return ReadinessResult(
term=term,
level=level,
action=ACTIONS[level],
surface=surface,
echo=echo,
reasoning=reasoning,
)
def _calculate_level(
self,
surface: SurfaceProbeResult,
echo: EchoProbeResult,
dominant_valley: str,
) -> tuple[ReadinessLevel, str]:
"""
Calculate readiness level based on probe results.
Heuristics:
- HIGH: depth >= 2 AND coherence >= 0.5 AND not pure code
- MEDIUM: depth >= 1 OR (coherence >= 0.5 AND prose/philosophy)
- LOW: everything else
"""
depth = echo.depth
coherence = surface.coherence_score or 0.0
eos_ratio = surface.hit_eos_count / len(surface.completions) if surface.completions else 0
# Count echo types
expands = sum(1 for t in echo.echo_types if t == EchoType.EXPANDS)
collapses = sum(1 for t in echo.echo_types if t == EchoType.COLLAPSE)
circulars = sum(1 for t in echo.echo_types if t == EchoType.CIRCULAR)
# Build reasoning
reasons = []
# HIGH: Good depth + coherence + productive valley
if depth >= 2 and coherence >= 0.4:
if dominant_valley not in [CompletionCategory.CODE]:
reasons.append(f"depth={depth} (strong conceptual expansion)")
reasons.append(f"coherence={coherence:.2f} (consistent associations)")
reasons.append(f"valley={dominant_valley} (productive for training)")
return ReadinessLevel.HIGH, "; ".join(reasons)
# HIGH: Exceptional depth even with lower coherence
if depth >= 3:
reasons.append(f"depth={depth} (exceptional expansion)")
reasons.append(f"all {expands} echoes expand")
return ReadinessLevel.HIGH, "; ".join(reasons)
# MEDIUM: Some depth or good coherence in prose
if depth >= 1:
reasons.append(f"depth={depth} (some expansion capability)")
if dominant_valley in [CompletionCategory.PROSE, 'prose', 'definition']:
reasons.append(f"valley={dominant_valley} (trainable with scaffolding)")
return ReadinessLevel.MEDIUM, "; ".join(reasons)
if coherence >= 0.5 and dominant_valley not in [CompletionCategory.CODE, 'code']:
reasons.append(f"coherence={coherence:.2f} (consistent surface)")
reasons.append(f"valley={dominant_valley}")
reasons.append("but limited depth - needs bridging concepts")
return ReadinessLevel.MEDIUM, "; ".join(reasons)
# LOW: Trapped in code, circular, or incoherent
if dominant_valley in [CompletionCategory.CODE, 'code']:
reasons.append(f"valley=CODE (trapped in technical patterns)")
if circulars >= 2:
reasons.append(f"{circulars} circular echoes (surface-only knowledge)")
if collapses >= 1:
reasons.append(f"{collapses} collapses (unstable representations)")
if coherence < 0.4:
reasons.append(f"coherence={coherence:.2f} (scattered associations)")
return ReadinessLevel.LOW, "; ".join(reasons) if reasons else "insufficient depth and coherence"
def score_batch(self, terms: List[str]) -> List[ReadinessResult]:
"""Score multiple terms."""
return [self.score(term) for term in terms]
def summary(self, result: ReadinessResult) -> str:
"""Generate human-readable summary."""
symbols = {
ReadinessLevel.HIGH: "🟢",
ReadinessLevel.MEDIUM: "🟡",
ReadinessLevel.LOW: "🔴",
}
surface_summary = f"coherence={result.surface.coherence_score:.2f}" if result.surface else "N/A"
echo_summary = f"depth={result.echo.depth}" if result.echo else "N/A"
lines = [
f"{symbols[result.level]} {result.term}: {result.level.value}",
f" Action: {result.action}",
f" Surface: {surface_summary}",
f" Echo: {echo_summary}",
f" Reasoning: {result.reasoning}",
]
return "\n".join(lines)
def curriculum_report(self, results: List[ReadinessResult]) -> str:
"""Generate curriculum planning report."""
high = [r for r in results if r.level == ReadinessLevel.HIGH]
medium = [r for r in results if r.level == ReadinessLevel.MEDIUM]
low = [r for r in results if r.level == ReadinessLevel.LOW]
lines = [
"=" * 60,
"CURRICULUM READINESS REPORT",
"=" * 60,
"",
f"🟢 HIGH ({len(high)} terms) - Ready for state machine:",
]
for r in high:
lines.append(f"{r.term}")
lines.extend([
"",
f"🟡 MEDIUM ({len(medium)} terms) - Need scaffolding:",
])
for r in medium:
lines.append(f"{r.term}: {r.reasoning[:60]}...")
lines.extend([
"",
f"🔴 LOW ({len(low)} terms) - Require foundational work:",
])
for r in low:
lines.append(f"{r.term}: {r.reasoning[:60]}...")
lines.extend([
"",
"=" * 60,
f"Summary: {len(high)}/{len(results)} ready, {len(medium)} scaffolding, {len(low)} foundational",
"=" * 60,
])
return "\n".join(lines)