Files
nyx-probing/test_triangulation.py
dafit f640dbdd65 feat: complete Phase 1 - vocabulary expansion & DriftProbe infrastructure
- CLI: nyx-probe scan with --summary/--delta/--full flags
- DriftProbe: training safety with Gini coefficient + Angular Drift
- Vocabulary: 54 terms (30 nimmerverse + 24 German philosophical)
- Sentinels: ANCHOR/BRIDGE/CANARY/TARGET monitoring system

Key findings:
- German philosophical terms: 37.5% depth≥2 hit rate (vs 3.3% nimmerverse)
- Super Cluster validated: heart cross-lang sim = 1.000
- Isolated Zone confirmed: being EN↔DE sim = 0.195
- Gini signature: Philosophy ~0.5 (diffuse), Technical ~0.8 (sparse)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-06 22:39:03 +01:00

140 lines
3.7 KiB
Python

#!/usr/bin/env python3
"""
Test the Multilingual Triangulation Probe
First test: "consciousness" concept
- Ground: EN, ZH, AR
- Deepen: DE (Bewusstsein)
- Triangulate: back to EN
"""
import sys
sys.path.insert(0, '.')
from nyx_probing.core.model import NyxModel
from nyx_probing.probes.multilingual_probe import (
MultilingualTriangulationProbe,
LANGUAGES,
LanguageZone,
)
print("=" * 70)
print("🌙 MULTILINGUAL TRIANGULATION PROBE TEST")
print("=" * 70)
# Test concepts with translations
CONCEPTS = {
"consciousness": {
"EN": "consciousness",
"DE": "Bewusstsein",
"ZH": "意识",
"AR": "الوعي",
"FR": "conscience",
},
"heart": {
"EN": "heart",
"DE": "Herz",
"ZH": "",
"AR": "قلب",
"FR": "cœur",
},
"emergence": {
"EN": "emergence",
"DE": "Entstehung",
"ZH": "涌现",
"AR": "ظهور",
"FR": "émergence",
},
"being": {
"EN": "being",
"DE": "Sein",
"ZH": "存在",
"AR": "كينونة",
"FR": "être",
},
}
# Load model
print("\n📦 Loading model...")
model = NyxModel()
model.load()
# Create probe
print("\n🔬 Creating triangulation probe...")
probe = MultilingualTriangulationProbe(
model,
grounding_languages=["EN", "ZH", "AR"],
deepening_language="DE",
triangulation_target="EN",
)
# Test each concept
print("\n" + "=" * 70)
print("🧪 RUNNING TRIANGULATION TESTS")
print("=" * 70)
results = []
for concept_name, translations in CONCEPTS.items():
print(f"\n{'' * 70}")
print(f"Testing: {concept_name.upper()}")
print('' * 70)
try:
result = probe.probe(concept_name, translations)
results.append(result)
# Print summary
print(probe.summary(result))
# Print some details
print(f"\n Deepening completion preview:")
print(f" '{result.deepening.completion[:100]}...'")
print(f"\n Triangulation completion:")
print(f" Prompt: '{result.triangulation.translation_prompt}'")
print(f" Output: '{result.triangulation.model_completion[:100]}...'")
except Exception as e:
print(f" ERROR: {e}")
import traceback
traceback.print_exc()
# Summary table
print("\n" + "=" * 70)
print("📊 SUMMARY TABLE")
print("=" * 70)
print(f"\n{'Concept':<15} | {'Grounding':<10} | {'Depth':<6} | {'Valley':<12} | {'Transfer':<10} | {'Recommendation'}")
print("-" * 100)
for r in results:
print(f"{r.concept:<15} | {r.grounding.average_convergence:.3f} | {r.deepening.depth_score}/3 | {r.deepening.valley_type:<12} | {'' if r.depth_transferable else '':<10} | {r.curriculum_recommendation[:30]}")
# Final analysis
print("\n" + "=" * 70)
print("🌙 CURRICULUM IMPLICATIONS")
print("=" * 70)
# Group by recommendation
high_depth = [r for r in results if r.depth_accessible and r.depth_transferable]
needs_work = [r for r in results if not r.depth_accessible]
needs_bridge = [r for r in results if r.depth_accessible and not r.depth_transferable]
if high_depth:
print(f"\n✅ READY FOR MULTILINGUAL CURRICULUM:")
for r in high_depth:
print(f" - {r.concept}: {r.curriculum_recommendation}")
if needs_bridge:
print(f"\n⚠️ NEEDS BRIDGING (depth accessible but not transferable):")
for r in needs_bridge:
print(f" - {r.concept}: depth={r.deepening.depth_score} but transfer failed")
if needs_work:
print(f"\n❌ NEEDS GROUNDING FIRST:")
for r in needs_work:
print(f" - {r.concept}: depth only {r.deepening.depth_score}/3")
print("\n" + "=" * 70)
print("✅ TRIANGULATION PROBE TEST COMPLETE")
print("=" * 70)