feat: complete Phase 1 - vocabulary expansion & DriftProbe infrastructure
- CLI: nyx-probe scan with --summary/--delta/--full flags - DriftProbe: training safety with Gini coefficient + Angular Drift - Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical) - Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system Key findings: - German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse) - Super Cluster validated: heart cross-lang sim = 1.000 - Isolated Zone confirmed: being EN↔DE sim = 0.195 - Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
139
test_triangulation.py
Normal file
139
test_triangulation.py
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test the Multilingual Triangulation Probe
|
||||
|
||||
First test: "consciousness" concept
|
||||
- Ground: EN, ZH, AR
|
||||
- Deepen: DE (Bewusstsein)
|
||||
- Triangulate: back to EN
|
||||
"""
|
||||
import sys
|
||||
sys.path.insert(0, '.')
|
||||
|
||||
from nyx_probing.core.model import NyxModel
|
||||
from nyx_probing.probes.multilingual_probe import (
|
||||
MultilingualTriangulationProbe,
|
||||
LANGUAGES,
|
||||
LanguageZone,
|
||||
)
|
||||
|
||||
print("=" * 70)
|
||||
print("🌙 MULTILINGUAL TRIANGULATION PROBE TEST")
|
||||
print("=" * 70)
|
||||
|
||||
# Test concepts with translations
|
||||
CONCEPTS = {
|
||||
"consciousness": {
|
||||
"EN": "consciousness",
|
||||
"DE": "Bewusstsein",
|
||||
"ZH": "意识",
|
||||
"AR": "الوعي",
|
||||
"FR": "conscience",
|
||||
},
|
||||
"heart": {
|
||||
"EN": "heart",
|
||||
"DE": "Herz",
|
||||
"ZH": "心",
|
||||
"AR": "قلب",
|
||||
"FR": "cœur",
|
||||
},
|
||||
"emergence": {
|
||||
"EN": "emergence",
|
||||
"DE": "Entstehung",
|
||||
"ZH": "涌现",
|
||||
"AR": "ظهور",
|
||||
"FR": "émergence",
|
||||
},
|
||||
"being": {
|
||||
"EN": "being",
|
||||
"DE": "Sein",
|
||||
"ZH": "存在",
|
||||
"AR": "كينونة",
|
||||
"FR": "être",
|
||||
},
|
||||
}
|
||||
|
||||
# Load model
|
||||
print("\n📦 Loading model...")
|
||||
model = NyxModel()
|
||||
model.load()
|
||||
|
||||
# Create probe
|
||||
print("\n🔬 Creating triangulation probe...")
|
||||
probe = MultilingualTriangulationProbe(
|
||||
model,
|
||||
grounding_languages=["EN", "ZH", "AR"],
|
||||
deepening_language="DE",
|
||||
triangulation_target="EN",
|
||||
)
|
||||
|
||||
# Test each concept
|
||||
print("\n" + "=" * 70)
|
||||
print("🧪 RUNNING TRIANGULATION TESTS")
|
||||
print("=" * 70)
|
||||
|
||||
results = []
|
||||
for concept_name, translations in CONCEPTS.items():
|
||||
print(f"\n{'─' * 70}")
|
||||
print(f"Testing: {concept_name.upper()}")
|
||||
print('─' * 70)
|
||||
|
||||
try:
|
||||
result = probe.probe(concept_name, translations)
|
||||
results.append(result)
|
||||
|
||||
# Print summary
|
||||
print(probe.summary(result))
|
||||
|
||||
# Print some details
|
||||
print(f"\n Deepening completion preview:")
|
||||
print(f" '{result.deepening.completion[:100]}...'")
|
||||
|
||||
print(f"\n Triangulation completion:")
|
||||
print(f" Prompt: '{result.triangulation.translation_prompt}'")
|
||||
print(f" Output: '{result.triangulation.model_completion[:100]}...'")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Summary table
|
||||
print("\n" + "=" * 70)
|
||||
print("📊 SUMMARY TABLE")
|
||||
print("=" * 70)
|
||||
|
||||
print(f"\n{'Concept':<15} | {'Grounding':<10} | {'Depth':<6} | {'Valley':<12} | {'Transfer':<10} | {'Recommendation'}")
|
||||
print("-" * 100)
|
||||
|
||||
for r in results:
|
||||
print(f"{r.concept:<15} | {r.grounding.average_convergence:.3f} | {r.deepening.depth_score}/3 | {r.deepening.valley_type:<12} | {'✓' if r.depth_transferable else '✗':<10} | {r.curriculum_recommendation[:30]}")
|
||||
|
||||
# Final analysis
|
||||
print("\n" + "=" * 70)
|
||||
print("🌙 CURRICULUM IMPLICATIONS")
|
||||
print("=" * 70)
|
||||
|
||||
# Group by recommendation
|
||||
high_depth = [r for r in results if r.depth_accessible and r.depth_transferable]
|
||||
needs_work = [r for r in results if not r.depth_accessible]
|
||||
needs_bridge = [r for r in results if r.depth_accessible and not r.depth_transferable]
|
||||
|
||||
if high_depth:
|
||||
print(f"\n✅ READY FOR MULTILINGUAL CURRICULUM:")
|
||||
for r in high_depth:
|
||||
print(f" - {r.concept}: {r.curriculum_recommendation}")
|
||||
|
||||
if needs_bridge:
|
||||
print(f"\n⚠️ NEEDS BRIDGING (depth accessible but not transferable):")
|
||||
for r in needs_bridge:
|
||||
print(f" - {r.concept}: depth={r.deepening.depth_score} but transfer failed")
|
||||
|
||||
if needs_work:
|
||||
print(f"\n❌ NEEDS GROUNDING FIRST:")
|
||||
for r in needs_work:
|
||||
print(f" - {r.concept}: depth only {r.deepening.depth_score}/3")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("✅ TRIANGULATION PROBE TEST COMPLETE")
|
||||
print("=" * 70)
|
||||
Reference in New Issue
Block a user