RAG proxy that intercepts SkyrimNet LLM requests and enriches them with relevant Tamrielic lore from CHIM's Oghma Infinium database. Features: - FastAPI proxy compatible with OpenAI API - ChromaDB semantic search for lore retrieval - NPC profile extraction from SkyrimNet prompts - Google Sheets ingestion for CHIM's Oghma data - Kubernetes deployment manifests - Debug endpoint for RAG operation monitoring Collections ingested to iris-dev ChromaDB: - oghma_lore: 1951 entries (scholar knowledge) - oghma_basic: 1949 entries (commoner knowledge) - oghma_visual: 1151 entries (Omnisight perception) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
148 lines
5.5 KiB
Python
148 lines
5.5 KiB
Python
"""NPC Profile Extractor - Parses SkyrimNet prompts to extract NPC context."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
import structlog
|
|
|
|
from .models import NPCProfile
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class NPCExtractor:
|
|
"""Extracts NPC profile information from SkyrimNet prompts."""
|
|
|
|
# Regex patterns for extraction
|
|
PATTERNS = {
|
|
# Character bio header
|
|
"bio_header": re.compile(
|
|
r"## (?P<name>[\w\s'-]+) Bio\s*\n"
|
|
r"- Gender: (?P<gender>\w+)\s*\n"
|
|
r"- Race: (?P<race>[\w\s]+)",
|
|
re.MULTILINE,
|
|
),
|
|
# Alternative role description
|
|
"role_intro": re.compile(
|
|
r"You are (?P<name>[^,\n]+),?\s*(?:a |an )?(?P<descriptor>[^.\n]+)",
|
|
re.IGNORECASE,
|
|
),
|
|
# Faction membership
|
|
"faction": re.compile(
|
|
r"(?:member of|belongs to|joined|part of) (?:the )?(?P<faction>[\w\s]+?)(?:\.|,|\n|$)",
|
|
re.IGNORECASE,
|
|
),
|
|
# Location mentions
|
|
"location": re.compile(
|
|
r"(?:in|at|near|from) (?P<location>Whiterun|Windhelm|Solitude|Riften|"
|
|
r"Markarth|Morthal|Dawnstar|Winterhold|Falkreath|Riverwood|Rorikstead|"
|
|
r"Ivarstead|Solstheim|Raven Rock)",
|
|
re.IGNORECASE,
|
|
),
|
|
# Profession/occupation
|
|
"occupation": re.compile(
|
|
r"(?:works as|profession:|occupation:|is a|as a) (?P<profession>[\w\s]+?)(?:\.|,|\n|$)",
|
|
re.IGNORECASE,
|
|
),
|
|
}
|
|
|
|
# Known professions for fuzzy matching
|
|
KNOWN_PROFESSIONS = {
|
|
"priest", "priestess", "mage", "wizard", "scholar", "blacksmith",
|
|
"guard", "soldier", "warrior", "thief", "merchant", "innkeeper",
|
|
"hunter", "farmer", "peasant", "noble", "jarl", "bard", "alchemist",
|
|
"healer", "assassin", "spy", "courier", "carriage driver", "fisherman",
|
|
"miller", "brewer", "smith", "armorer", "fletcher", "jeweler",
|
|
}
|
|
|
|
def extract(self, messages: list[dict]) -> NPCProfile:
|
|
"""Extract NPC profile from chat messages."""
|
|
# Combine all message content for analysis
|
|
full_text = "\n".join(
|
|
msg.get("content", "") for msg in messages if msg.get("content")
|
|
)
|
|
|
|
profile = NPCProfile()
|
|
|
|
# Try bio header first (most reliable)
|
|
if match := self.PATTERNS["bio_header"].search(full_text):
|
|
profile.name = match.group("name").strip()
|
|
profile.gender = match.group("gender").strip()
|
|
profile.race = match.group("race").strip()
|
|
logger.debug("Extracted from bio header", name=profile.name, race=profile.race)
|
|
|
|
# Fallback to role intro
|
|
elif match := self.PATTERNS["role_intro"].search(full_text):
|
|
profile.name = match.group("name").strip()
|
|
descriptor = match.group("descriptor")
|
|
# Try to parse race from descriptor
|
|
profile.race = self._extract_race_from_descriptor(descriptor)
|
|
logger.debug("Extracted from role intro", name=profile.name)
|
|
|
|
# Extract location
|
|
if match := self.PATTERNS["location"].search(full_text):
|
|
profile.location = match.group("location").strip()
|
|
|
|
# Extract factions
|
|
for match in self.PATTERNS["faction"].finditer(full_text):
|
|
faction = match.group("faction").strip()
|
|
if faction and faction not in profile.factions:
|
|
profile.factions.append(faction)
|
|
|
|
# Extract profession
|
|
if match := self.PATTERNS["occupation"].search(full_text):
|
|
profession = match.group("profession").strip().lower()
|
|
# Validate against known professions
|
|
for known in self.KNOWN_PROFESSIONS:
|
|
if known in profession:
|
|
profile.profession = known
|
|
break
|
|
|
|
# Compute knowledge classes
|
|
profile.compute_knowledge_classes()
|
|
|
|
logger.info(
|
|
"Extracted NPC profile",
|
|
name=profile.name,
|
|
race=profile.race,
|
|
profession=profile.profession,
|
|
factions=profile.factions,
|
|
location=profile.location,
|
|
knowledge_classes=profile.knowledge_classes,
|
|
education_level=profile.education_level.value,
|
|
)
|
|
|
|
return profile
|
|
|
|
def _extract_race_from_descriptor(self, descriptor: str) -> str:
|
|
"""Try to extract race from a descriptor string."""
|
|
races = [
|
|
"Nord", "Dunmer", "Dark Elf", "Altmer", "High Elf",
|
|
"Bosmer", "Wood Elf", "Argonian", "Khajiit", "Breton",
|
|
"Redguard", "Orsimer", "Orc", "Imperial",
|
|
]
|
|
descriptor_lower = descriptor.lower()
|
|
for race in races:
|
|
if race.lower() in descriptor_lower:
|
|
# Normalize to single-word form
|
|
return race.replace(" ", "")
|
|
return "Unknown"
|
|
|
|
def extract_conversation_context(self, messages: list[dict]) -> str:
|
|
"""Extract the current conversation topic for RAG query."""
|
|
# Get the last few user/assistant exchanges
|
|
recent_content = []
|
|
for msg in reversed(messages[-6:]):
|
|
content = msg.get("content", "")
|
|
if content and msg.get("role") in ("user", "assistant"):
|
|
# Skip very long content (likely system prompts)
|
|
if len(content) < 500:
|
|
recent_content.append(content)
|
|
|
|
if not recent_content:
|
|
return ""
|
|
|
|
# Combine recent conversation as the query context
|
|
return " ".join(reversed(recent_content[-3:]))
|