feat: complete Phase 1 - vocabulary expansion & DriftProbe infrastructure

- CLI: nyx-probe scan with --summary/--delta/--full flags
- DriftProbe: training safety with Gini coefficient + Angular Drift
- Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical)
- Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system

Key findings:
- German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse)
- Super Cluster validated: heart cross-lang sim = 1.000
- Isolated Zone confirmed: being EN↔DE sim = 0.195
- Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-06 22:39:03 +01:00
parent 9853f4767b
commit f640dbdd65
29 changed files with 6164 additions and 1 deletions

View File

@@ -0,0 +1,97 @@
"""
Result dataclasses for probing operations.
These structures capture what we learn about each term.
"""
from dataclasses import dataclass, field
from typing import List, Optional, Literal
from datetime import datetime
from enum import Enum
class EchoType(str, Enum):
"""Classification of echo probe responses."""
EXPANDS = "EXPANDS" # Real depth - adds new information
CONFIRMS = "CONFIRMS" # Shallow but solid - reinforces without adding
CIRCULAR = "CIRCULAR" # Surface only - returns to original term
DIVERGENT = "DIVERGENT" # Wrong direction - unrelated tangent
COLLAPSE = "COLLAPSE" # Nothing there - incoherent or empty
class ReadinessLevel(str, Enum):
"""Readiness classification for curriculum design."""
HIGH = "HIGH" # Ready for state machine / direct training
MEDIUM = "MEDIUM" # Needs scaffolding / bridging concepts
LOW = "LOW" # Requires foundational work first
@dataclass
class SurfaceProbeResult:
"""Result from a surface probe (single word → completions)."""
term: str
completions: List[str]
hit_eos_count: int # How many completions ended with EOS
avg_tokens: float # Average completion length
# Optional analysis
coherence_score: Optional[float] = None # 0-1, how related are completions
timestamp: datetime = field(default_factory=datetime.now)
@dataclass
class EchoProbeResult:
"""Result from an echo probe (iterative depth measurement)."""
term: str
rounds: int
chain: List[str] # The sequence of prompts/completions
echo_types: List[EchoType] # Classification of each round
# Derived metrics
depth: int = 0 # How many EXPANDS before plateau
timestamp: datetime = field(default_factory=datetime.now)
@dataclass
class ReadinessResult:
"""Combined analysis for curriculum readiness."""
term: str
level: ReadinessLevel
action: str # Recommended curriculum action
# Supporting evidence
surface: Optional[SurfaceProbeResult] = None
echo: Optional[EchoProbeResult] = None
# Reasoning
reasoning: str = ""
timestamp: datetime = field(default_factory=datetime.now)
def to_dict(self) -> dict:
"""Convert to JSON-serializable dict."""
return {
"term": self.term,
"readiness": {
"level": self.level.value,
"action": self.action,
"reasoning": self.reasoning,
},
"surface": {
"completions": self.surface.completions if self.surface else [],
"coherence": self.surface.coherence_score if self.surface else None,
"hit_eos_count": self.surface.hit_eos_count if self.surface else 0,
} if self.surface else None,
"echo": {
"depth": self.echo.depth if self.echo else 0,
"types": [t.value for t in self.echo.echo_types] if self.echo else [],
"chain": self.echo.chain if self.echo else [],
} if self.echo else None,
"timestamp": self.timestamp.isoformat(),
}