feat: Add Oghma RAG Proxy for SkyrimNet lore injection

RAG proxy that intercepts SkyrimNet LLM requests and enriches them
with relevant Tamrielic lore from CHIM's Oghma Infinium database.

Features:
- FastAPI proxy compatible with OpenAI API
- ChromaDB semantic search for lore retrieval
- NPC profile extraction from SkyrimNet prompts
- Google Sheets ingestion for CHIM's Oghma data
- Kubernetes deployment manifests
- Debug endpoint for RAG operation monitoring

Collections ingested to iris-dev ChromaDB:
- oghma_lore: 1951 entries (scholar knowledge)
- oghma_basic: 1949 entries (commoner knowledge)
- oghma_visual: 1151 entries (Omnisight perception)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
dafit
2026-03-30 23:22:46 +02:00
parent 62dcee5fbf
commit 3926ab676f
20 changed files with 2367 additions and 0 deletions

View File

@@ -0,0 +1,147 @@
"""NPC Profile Extractor - Parses SkyrimNet prompts to extract NPC context."""
from __future__ import annotations
import re
import structlog
from .models import NPCProfile
logger = structlog.get_logger()
class NPCExtractor:
"""Extracts NPC profile information from SkyrimNet prompts."""
# Regex patterns for extraction
PATTERNS = {
# Character bio header
"bio_header": re.compile(
r"## (?P<name>[\w\s'-]+) Bio\s*\n"
r"- Gender: (?P<gender>\w+)\s*\n"
r"- Race: (?P<race>[\w\s]+)",
re.MULTILINE,
),
# Alternative role description
"role_intro": re.compile(
r"You are (?P<name>[^,\n]+),?\s*(?:a |an )?(?P<descriptor>[^.\n]+)",
re.IGNORECASE,
),
# Faction membership
"faction": re.compile(
r"(?:member of|belongs to|joined|part of) (?:the )?(?P<faction>[\w\s]+?)(?:\.|,|\n|$)",
re.IGNORECASE,
),
# Location mentions
"location": re.compile(
r"(?:in|at|near|from) (?P<location>Whiterun|Windhelm|Solitude|Riften|"
r"Markarth|Morthal|Dawnstar|Winterhold|Falkreath|Riverwood|Rorikstead|"
r"Ivarstead|Solstheim|Raven Rock)",
re.IGNORECASE,
),
# Profession/occupation
"occupation": re.compile(
r"(?:works as|profession:|occupation:|is a|as a) (?P<profession>[\w\s]+?)(?:\.|,|\n|$)",
re.IGNORECASE,
),
}
# Known professions for fuzzy matching
KNOWN_PROFESSIONS = {
"priest", "priestess", "mage", "wizard", "scholar", "blacksmith",
"guard", "soldier", "warrior", "thief", "merchant", "innkeeper",
"hunter", "farmer", "peasant", "noble", "jarl", "bard", "alchemist",
"healer", "assassin", "spy", "courier", "carriage driver", "fisherman",
"miller", "brewer", "smith", "armorer", "fletcher", "jeweler",
}
def extract(self, messages: list[dict]) -> NPCProfile:
"""Extract NPC profile from chat messages."""
# Combine all message content for analysis
full_text = "\n".join(
msg.get("content", "") for msg in messages if msg.get("content")
)
profile = NPCProfile()
# Try bio header first (most reliable)
if match := self.PATTERNS["bio_header"].search(full_text):
profile.name = match.group("name").strip()
profile.gender = match.group("gender").strip()
profile.race = match.group("race").strip()
logger.debug("Extracted from bio header", name=profile.name, race=profile.race)
# Fallback to role intro
elif match := self.PATTERNS["role_intro"].search(full_text):
profile.name = match.group("name").strip()
descriptor = match.group("descriptor")
# Try to parse race from descriptor
profile.race = self._extract_race_from_descriptor(descriptor)
logger.debug("Extracted from role intro", name=profile.name)
# Extract location
if match := self.PATTERNS["location"].search(full_text):
profile.location = match.group("location").strip()
# Extract factions
for match in self.PATTERNS["faction"].finditer(full_text):
faction = match.group("faction").strip()
if faction and faction not in profile.factions:
profile.factions.append(faction)
# Extract profession
if match := self.PATTERNS["occupation"].search(full_text):
profession = match.group("profession").strip().lower()
# Validate against known professions
for known in self.KNOWN_PROFESSIONS:
if known in profession:
profile.profession = known
break
# Compute knowledge classes
profile.compute_knowledge_classes()
logger.info(
"Extracted NPC profile",
name=profile.name,
race=profile.race,
profession=profile.profession,
factions=profile.factions,
location=profile.location,
knowledge_classes=profile.knowledge_classes,
education_level=profile.education_level.value,
)
return profile
def _extract_race_from_descriptor(self, descriptor: str) -> str:
"""Try to extract race from a descriptor string."""
races = [
"Nord", "Dunmer", "Dark Elf", "Altmer", "High Elf",
"Bosmer", "Wood Elf", "Argonian", "Khajiit", "Breton",
"Redguard", "Orsimer", "Orc", "Imperial",
]
descriptor_lower = descriptor.lower()
for race in races:
if race.lower() in descriptor_lower:
# Normalize to single-word form
return race.replace(" ", "")
return "Unknown"
def extract_conversation_context(self, messages: list[dict]) -> str:
"""Extract the current conversation topic for RAG query."""
# Get the last few user/assistant exchanges
recent_content = []
for msg in reversed(messages[-6:]):
content = msg.get("content", "")
if content and msg.get("role") in ("user", "assistant"):
# Skip very long content (likely system prompts)
if len(content) < 500:
recent_content.append(content)
if not recent_content:
return ""
# Combine recent conversation as the query context
return " ".join(reversed(recent_content[-3:]))