"""Oghma Lore Retriever - Queries ChromaDB for relevant Tamrielic lore.""" from __future__ import annotations import time from functools import lru_cache from typing import TYPE_CHECKING import chromadb import structlog from chromadb.config import Settings from .models import EducationLevel, LoreEntry, NPCProfile if TYPE_CHECKING: from chromadb import Collection logger = structlog.get_logger() class OghmaRetriever: """Retrieves relevant lore from Oghma ChromaDB collections.""" def __init__( self, host: str = "iris-dev.eachpath.local", port: int = 35000, collection_lore: str = "oghma_lore", collection_basic: str = "oghma_basic", max_results: int = 5, min_score: float = 0.55, ): self.host = host self.port = port self.collection_lore_name = collection_lore self.collection_basic_name = collection_basic self.max_results = max_results self.min_score = min_score self._client: chromadb.HttpClient | None = None self._collection_lore: Collection | None = None self._collection_basic: Collection | None = None def _get_client(self) -> chromadb.HttpClient: """Get or create ChromaDB client.""" if self._client is None: self._client = chromadb.HttpClient( host=self.host, port=self.port, settings=Settings(anonymized_telemetry=False), ) logger.info("Connected to ChromaDB", host=self.host, port=self.port) return self._client def _get_collection(self, education_level: EducationLevel) -> Collection: """Get the appropriate collection based on education level.""" client = self._get_client() if education_level == EducationLevel.SCHOLAR: if self._collection_lore is None: self._collection_lore = client.get_collection(self.collection_lore_name) return self._collection_lore else: if self._collection_basic is None: self._collection_basic = client.get_collection(self.collection_basic_name) return self._collection_basic def retrieve( self, query: str, npc_profile: NPCProfile, ) -> tuple[list[LoreEntry], float]: """ Retrieve relevant lore entries for an NPC. Args: query: Conversation context to search for npc_profile: NPC profile for knowledge filtering Returns: Tuple of (lore entries, query time in ms) """ if not query.strip(): return [], 0.0 start_time = time.perf_counter() try: collection = self._get_collection(npc_profile.education_level) # Build metadata filter for knowledge classes # NOTE: Currently disabled because CHIM's Oghma data doesn't have # knowledge_class populated consistently. Enable when data is enriched. where_filter = None # TODO: Re-enable when knowledge_class data is available # if npc_profile.knowledge_classes: # if len(npc_profile.knowledge_classes) == 1: # where_filter = {"knowledge_classes": {"$contains": npc_profile.knowledge_classes[0]}} # else: # where_filter = { # "$or": [ # {"knowledge_classes": {"$contains": kc}} # for kc in npc_profile.knowledge_classes # ] # } # Query ChromaDB results = collection.query( query_texts=[query], n_results=self.max_results, where=where_filter, include=["documents", "metadatas", "distances"], ) # Parse results entries = [] if results and results["documents"] and results["documents"][0]: for i, doc in enumerate(results["documents"][0]): metadata = results["metadatas"][0][i] if results["metadatas"] else {} distance = results["distances"][0][i] if results["distances"] else 1.0 # Convert distance to similarity score (ChromaDB uses L2 distance) # Lower distance = higher similarity score = 1.0 / (1.0 + distance) if score >= self.min_score: entries.append( LoreEntry( topic=metadata.get("topic", "Unknown"), content=doc, category=metadata.get("category", "Unknown"), score=score, knowledge_classes=metadata.get("knowledge_classes", "").split(","), ) ) query_time = (time.perf_counter() - start_time) * 1000 logger.info( "Retrieved lore entries", query_preview=query[:100], npc_name=npc_profile.name, education=npc_profile.education_level.value, entries_found=len(entries), query_time_ms=round(query_time, 2), ) return entries, query_time except Exception as e: logger.error("Failed to retrieve lore", error=str(e)) query_time = (time.perf_counter() - start_time) * 1000 return [], query_time def health_check(self) -> bool: """Check if ChromaDB is reachable.""" try: client = self._get_client() client.heartbeat() return True except Exception as e: logger.error("ChromaDB health check failed", error=str(e)) return False # Cached retriever instance @lru_cache(maxsize=1) def get_retriever( host: str = "iris-dev.eachpath.local", port: int = 35000, ) -> OghmaRetriever: """Get cached retriever instance.""" return OghmaRetriever(host=host, port=port)