- CLI: nyx-probe scan with --summary/--delta/--full flags - DriftProbe: training safety with Gini coefficient + Angular Drift - Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical) - Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system Key findings: - German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse) - Super Cluster validated: heart cross-lang sim = 1.000 - Isolated Zone confirmed: being EN↔DE sim = 0.195 - Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
222 lines
7.7 KiB
Python
222 lines
7.7 KiB
Python
"""
|
|
Readiness Scorer: Combines surface and echo probes into curriculum guidance.
|
|
|
|
Outputs:
|
|
- HIGH: Ready for direct training / state machine
|
|
- MEDIUM: Needs scaffolding or bridging concepts
|
|
- LOW: Requires foundational work first
|
|
"""
|
|
from typing import Optional, List
|
|
from dataclasses import dataclass
|
|
|
|
from ..core.model import NyxModel
|
|
from ..core.probe_result import (
|
|
SurfaceProbeResult,
|
|
EchoProbeResult,
|
|
ReadinessResult,
|
|
ReadinessLevel,
|
|
EchoType,
|
|
)
|
|
from ..probes.surface_probe import SurfaceProbe, CompletionCategory
|
|
from ..probes.echo_probe import EchoProbe
|
|
|
|
|
|
# Recommended actions for each readiness level
|
|
ACTIONS = {
|
|
ReadinessLevel.HIGH: "state_machine", # Direct training
|
|
ReadinessLevel.MEDIUM: "scaffolding", # Bridge concepts
|
|
ReadinessLevel.LOW: "foundational", # Build from scratch
|
|
}
|
|
|
|
|
|
class ReadinessScorer:
|
|
"""
|
|
Combines surface + echo probes to assess curriculum readiness.
|
|
|
|
A term is ready for training when:
|
|
1. Surface: Coherent associations (not scattered/random)
|
|
2. Echo: Can expand beyond surface (depth > 0)
|
|
3. Valley: In a productive valley (prose/philosophy, not just code)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
model: NyxModel,
|
|
surface_runs: int = 3,
|
|
echo_rounds: int = 3,
|
|
max_new_tokens: int = 50,
|
|
):
|
|
self.model = model
|
|
self.surface_probe = SurfaceProbe(
|
|
model,
|
|
num_runs=surface_runs,
|
|
max_new_tokens=max_new_tokens,
|
|
)
|
|
self.echo_probe = EchoProbe(
|
|
model,
|
|
max_rounds=echo_rounds,
|
|
max_new_tokens=max_new_tokens,
|
|
)
|
|
|
|
def score(self, term: str) -> ReadinessResult:
|
|
"""
|
|
Assess readiness of a term for curriculum.
|
|
|
|
Args:
|
|
term: Word or phrase to assess
|
|
|
|
Returns:
|
|
ReadinessResult with level, action, and supporting evidence
|
|
"""
|
|
# Run both probes
|
|
surface = self.surface_probe.probe(term)
|
|
echo = self.echo_probe.probe(term)
|
|
|
|
# Classify valley from surface probe
|
|
classification = self.surface_probe.classify_completions(surface)
|
|
dominant_valley = classification['dominant']
|
|
|
|
# Calculate composite score
|
|
level, reasoning = self._calculate_level(
|
|
surface=surface,
|
|
echo=echo,
|
|
dominant_valley=dominant_valley,
|
|
)
|
|
|
|
return ReadinessResult(
|
|
term=term,
|
|
level=level,
|
|
action=ACTIONS[level],
|
|
surface=surface,
|
|
echo=echo,
|
|
reasoning=reasoning,
|
|
)
|
|
|
|
def _calculate_level(
|
|
self,
|
|
surface: SurfaceProbeResult,
|
|
echo: EchoProbeResult,
|
|
dominant_valley: str,
|
|
) -> tuple[ReadinessLevel, str]:
|
|
"""
|
|
Calculate readiness level based on probe results.
|
|
|
|
Heuristics:
|
|
- HIGH: depth >= 2 AND coherence >= 0.5 AND not pure code
|
|
- MEDIUM: depth >= 1 OR (coherence >= 0.5 AND prose/philosophy)
|
|
- LOW: everything else
|
|
"""
|
|
depth = echo.depth
|
|
coherence = surface.coherence_score or 0.0
|
|
eos_ratio = surface.hit_eos_count / len(surface.completions) if surface.completions else 0
|
|
|
|
# Count echo types
|
|
expands = sum(1 for t in echo.echo_types if t == EchoType.EXPANDS)
|
|
collapses = sum(1 for t in echo.echo_types if t == EchoType.COLLAPSE)
|
|
circulars = sum(1 for t in echo.echo_types if t == EchoType.CIRCULAR)
|
|
|
|
# Build reasoning
|
|
reasons = []
|
|
|
|
# HIGH: Good depth + coherence + productive valley
|
|
if depth >= 2 and coherence >= 0.4:
|
|
if dominant_valley not in [CompletionCategory.CODE]:
|
|
reasons.append(f"depth={depth} (strong conceptual expansion)")
|
|
reasons.append(f"coherence={coherence:.2f} (consistent associations)")
|
|
reasons.append(f"valley={dominant_valley} (productive for training)")
|
|
return ReadinessLevel.HIGH, "; ".join(reasons)
|
|
|
|
# HIGH: Exceptional depth even with lower coherence
|
|
if depth >= 3:
|
|
reasons.append(f"depth={depth} (exceptional expansion)")
|
|
reasons.append(f"all {expands} echoes expand")
|
|
return ReadinessLevel.HIGH, "; ".join(reasons)
|
|
|
|
# MEDIUM: Some depth or good coherence in prose
|
|
if depth >= 1:
|
|
reasons.append(f"depth={depth} (some expansion capability)")
|
|
if dominant_valley in [CompletionCategory.PROSE, 'prose', 'definition']:
|
|
reasons.append(f"valley={dominant_valley} (trainable with scaffolding)")
|
|
return ReadinessLevel.MEDIUM, "; ".join(reasons)
|
|
|
|
if coherence >= 0.5 and dominant_valley not in [CompletionCategory.CODE, 'code']:
|
|
reasons.append(f"coherence={coherence:.2f} (consistent surface)")
|
|
reasons.append(f"valley={dominant_valley}")
|
|
reasons.append("but limited depth - needs bridging concepts")
|
|
return ReadinessLevel.MEDIUM, "; ".join(reasons)
|
|
|
|
# LOW: Trapped in code, circular, or incoherent
|
|
if dominant_valley in [CompletionCategory.CODE, 'code']:
|
|
reasons.append(f"valley=CODE (trapped in technical patterns)")
|
|
if circulars >= 2:
|
|
reasons.append(f"{circulars} circular echoes (surface-only knowledge)")
|
|
if collapses >= 1:
|
|
reasons.append(f"{collapses} collapses (unstable representations)")
|
|
if coherence < 0.4:
|
|
reasons.append(f"coherence={coherence:.2f} (scattered associations)")
|
|
|
|
return ReadinessLevel.LOW, "; ".join(reasons) if reasons else "insufficient depth and coherence"
|
|
|
|
def score_batch(self, terms: List[str]) -> List[ReadinessResult]:
|
|
"""Score multiple terms."""
|
|
return [self.score(term) for term in terms]
|
|
|
|
def summary(self, result: ReadinessResult) -> str:
|
|
"""Generate human-readable summary."""
|
|
symbols = {
|
|
ReadinessLevel.HIGH: "🟢",
|
|
ReadinessLevel.MEDIUM: "🟡",
|
|
ReadinessLevel.LOW: "🔴",
|
|
}
|
|
|
|
surface_summary = f"coherence={result.surface.coherence_score:.2f}" if result.surface else "N/A"
|
|
echo_summary = f"depth={result.echo.depth}" if result.echo else "N/A"
|
|
|
|
lines = [
|
|
f"{symbols[result.level]} {result.term}: {result.level.value}",
|
|
f" Action: {result.action}",
|
|
f" Surface: {surface_summary}",
|
|
f" Echo: {echo_summary}",
|
|
f" Reasoning: {result.reasoning}",
|
|
]
|
|
return "\n".join(lines)
|
|
|
|
def curriculum_report(self, results: List[ReadinessResult]) -> str:
|
|
"""Generate curriculum planning report."""
|
|
high = [r for r in results if r.level == ReadinessLevel.HIGH]
|
|
medium = [r for r in results if r.level == ReadinessLevel.MEDIUM]
|
|
low = [r for r in results if r.level == ReadinessLevel.LOW]
|
|
|
|
lines = [
|
|
"=" * 60,
|
|
"CURRICULUM READINESS REPORT",
|
|
"=" * 60,
|
|
"",
|
|
f"🟢 HIGH ({len(high)} terms) - Ready for state machine:",
|
|
]
|
|
for r in high:
|
|
lines.append(f" • {r.term}")
|
|
|
|
lines.extend([
|
|
"",
|
|
f"🟡 MEDIUM ({len(medium)} terms) - Need scaffolding:",
|
|
])
|
|
for r in medium:
|
|
lines.append(f" • {r.term}: {r.reasoning[:60]}...")
|
|
|
|
lines.extend([
|
|
"",
|
|
f"🔴 LOW ({len(low)} terms) - Require foundational work:",
|
|
])
|
|
for r in low:
|
|
lines.append(f" • {r.term}: {r.reasoning[:60]}...")
|
|
|
|
lines.extend([
|
|
"",
|
|
"=" * 60,
|
|
f"Summary: {len(high)}/{len(results)} ready, {len(medium)} scaffolding, {len(low)} foundational",
|
|
"=" * 60,
|
|
])
|
|
|
|
return "\n".join(lines)
|