reorg. destillation of oghma kownledge packs out of iris-dev

This commit is contained in:
2026-04-16 04:20:18 +02:00
parent e5e426e65c
commit ccb66d5a7d
33 changed files with 56197 additions and 6 deletions

View File

@@ -41,6 +41,7 @@ ingest = [
[project.scripts]
oghma-proxy = "oghma_proxy.main:main"
oghma-ingest = "oghma_proxy.ingest:main"
oghma-export-packs = "oghma_proxy.export_packs:main"
[tool.ruff]
line-length = 100

View File

@@ -0,0 +1,263 @@
"""Oghma Pack Exporter — materializes ChromaDB collections into SkyrimNet .sknpack files.
One pack per source category. Lore + basic entries merge into the same pack and are
distinguished by the `importance` field (lore=0.75, basic=0.40) so the model naturally
prefers the deeper entry when token budget allows. Visual descriptions export to a
separate single pack.
Produces packs matching SkyrimNet beta format_version=1.
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path
import chromadb
import structlog
from chromadb.config import Settings
logger = structlog.get_logger()
PACK_FORMAT_VERSION = 1
PACK_VERSION = "1.0.0"
PACK_AUTHOR = "nimmerverse/oghma-proxy"
# importance grading per collection
IMPORTANCE_LORE = 0.75
IMPORTANCE_BASIC = 0.40
IMPORTANCE_VISUAL = 0.50
# map ChromaDB category → SkyrimNet entry type
TYPE_BY_CATEGORY = {
"spells": "SKILL",
}
def category_to_type(category: str) -> str:
if category in TYPE_BY_CATEGORY:
return TYPE_BY_CATEGORY[category]
if category.startswith("locations_"):
return "LOCATION"
return "KNOWLEDGE"
def category_to_location(category: str) -> str:
"""For locations_whiterun → 'Whiterun'. Else empty."""
if not category.startswith("locations_"):
return ""
hold = category[len("locations_"):]
if hold == "other":
return ""
return hold.replace("_", " ").title()
def category_to_pack_name(category: str) -> str:
"""figures_gods → 'Oghma - Figures & Gods'."""
pretty = {
"figures_gods": "Figures & Gods",
"armor_weapons": "Armor & Weapons",
"groups_lore": "Groups & Books",
}
if category in pretty:
return f"Oghma - {pretty[category]}"
if category.startswith("locations_"):
hold = category[len("locations_"):]
if hold == "other":
return "Oghma - Locations - Other"
return f"Oghma - Locations - {hold.replace('_', ' ').title()}"
return f"Oghma - {category.replace('_', ' ').title()}"
def pack_filename(pack_name: str) -> str:
"""Filesystem-safe filename matching SkyrimNet export style."""
safe = re.sub(r"[^\w\-\s]", "", pack_name)
safe = re.sub(r"\s+", "_", safe.strip())
return f"{safe}.sknpack"
def build_entry(
content: str,
topic: str,
category: str,
importance: float,
tags: list[str],
) -> dict:
return {
"always_inject": False,
"condition_expr": "",
"content": content,
"display_name": topic,
"emotion": "",
"importance": importance,
"location": category_to_location(category),
"tags": tags,
"type": category_to_type(category),
}
def build_pack(pack_name: str, description: str, entries: list[dict]) -> dict:
return {
"skyrimnet_knowledge_pack": {
"author": PACK_AUTHOR,
"description": description,
"entries": entries,
"entry_count": len(entries),
"exported_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
"format_version": PACK_FORMAT_VERSION,
"name": pack_name,
"npc_groups": [],
"version": PACK_VERSION,
}
}
def fetch_all(collection) -> tuple[list[str], list[dict]]:
"""Return (documents, metadatas) for every entry in the collection."""
result = collection.get(include=["documents", "metadatas"])
return result["documents"], result["metadatas"]
def export_oghma_packs(
chromadb_host: str,
chromadb_port: int,
output_dir: Path,
dry_run: bool = False,
) -> dict:
output_dir.mkdir(parents=True, exist_ok=True)
client = chromadb.HttpClient(
host=chromadb_host,
port=chromadb_port,
settings=Settings(anonymized_telemetry=False),
)
# category → list[entry] — lore + basic merge by category
by_category: dict[str, list[dict]] = defaultdict(list)
for coll_name, importance in (
("oghma_lore", IMPORTANCE_LORE),
("oghma_basic", IMPORTANCE_BASIC),
):
collection = client.get_collection(coll_name)
docs, metas = fetch_all(collection)
logger.info("Fetched collection", collection=coll_name, count=len(docs))
for doc, meta in zip(docs, metas):
category = meta.get("category", "uncategorized")
topic = meta.get("topic", "")
raw_tags = (meta.get("tags") or "").strip()
tag_list = [t.strip() for t in raw_tags.split(",") if t.strip()] if raw_tags else []
by_category[category].append(
build_entry(
content=doc,
topic=topic,
category=category,
importance=importance,
tags=tag_list,
)
)
stats = {"packs": 0, "entries": 0, "files": []}
for category, entries in sorted(by_category.items()):
pack_name = category_to_pack_name(category)
description = (
f"Tamrielic lore from CHIM's Oghma Infinium — {category.replace('_', ' ')}. "
f"Merges educated and common-knowledge entries graded by importance."
)
pack = build_pack(pack_name, description, entries)
fname = pack_filename(pack_name)
path = output_dir / fname
if not dry_run:
path.write_text(json.dumps(pack, indent=2, ensure_ascii=False))
stats["packs"] += 1
stats["entries"] += len(entries)
stats["files"].append(str(path))
logger.info("Wrote pack", file=fname, entries=len(entries), category=category)
# Visual descriptions — separate single pack, different source schema
visual = client.get_collection("oghma_visual")
vdocs, vmetas = fetch_all(visual)
logger.info("Fetched collection", collection="oghma_visual", count=len(vdocs))
visual_entries = []
for doc, meta in zip(vdocs, vmetas):
visual_entries.append(
{
"always_inject": False,
"condition_expr": "",
"content": doc,
"display_name": meta.get("name", ""),
"emotion": "",
"importance": IMPORTANCE_VISUAL,
"location": "",
"tags": [],
"type": "KNOWLEDGE",
}
)
visual_pack = build_pack(
"Oghma - Visual Descriptions",
"Visual descriptions for NPC perception (Omnisight) from CHIM's Oghma Infinium.",
visual_entries,
)
vpath = output_dir / "Oghma_-_Visual_Descriptions.sknpack"
if not dry_run:
vpath.write_text(json.dumps(visual_pack, indent=2, ensure_ascii=False))
stats["packs"] += 1
stats["entries"] += len(visual_entries)
stats["files"].append(str(vpath))
logger.info("Wrote pack", file=vpath.name, entries=len(visual_entries), category="visual")
return stats
def main() -> None:
parser = argparse.ArgumentParser(
description="Export ChromaDB Oghma collections into SkyrimNet .sknpack files"
)
parser.add_argument("--host", default="iris-dev.eachpath.local")
parser.add_argument("--port", type=int, default=35000)
parser.add_argument(
"--output-dir",
default="/home/dafit/nimmerverse/nimmersky/sknpack",
)
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args()
structlog.configure(
processors=[
structlog.stdlib.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.dev.ConsoleRenderer(),
],
)
try:
stats = export_oghma_packs(
chromadb_host=args.host,
chromadb_port=args.port,
output_dir=Path(args.output_dir),
dry_run=args.dry_run,
)
logger.info(
"Export complete",
packs=stats["packs"],
entries=stats["entries"],
output_dir=args.output_dir,
)
except Exception as e:
logger.error("Export failed", error=str(e))
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -113,7 +113,31 @@ Note: Reference this knowledge naturally when relevant to the conversation. Do n
position=self.position,
)
else:
logger.warning("Could not find injection point", position=self.position)
seen_headers: list[str] = []
system_msg_count = 0
system_content_chars = 0
for msg in modified_messages:
if msg.get("role") == "system":
system_msg_count += 1
content = msg.get("content", "")
system_content_chars += len(content)
for line in content.splitlines():
stripped = line.strip()
if stripped.startswith("## "):
seen_headers.append(stripped)
if len(seen_headers) >= 20:
break
if len(seen_headers) >= 20:
break
logger.warning(
"Could not find injection point",
position=self.position,
npc_name=npc_profile.name,
system_messages=system_msg_count,
system_content_chars=system_content_chars,
seen_headers=seen_headers,
)
result = InjectionResult(
npc_profile=npc_profile,

View File

@@ -246,10 +246,15 @@ async def chat_completions(request: Request):
async def stream_upstream(url: str, headers: dict, body: dict):
"""Stream response from upstream."""
async with http_client.stream("POST", url, json=body, headers=headers) as response:
async for chunk in response.aiter_bytes():
yield chunk
"""Stream response from upstream with error handling."""
try:
async with http_client.stream("POST", url, json=body, headers=headers) as response:
async for chunk in response.aiter_bytes():
yield chunk
except httpx.ReadError as e:
logger.error("Upstream stream dropped", error=str(e))
except httpx.HTTPError as e:
logger.error("Upstream stream error", error=str(e))
@app.post("/v1/completions")

View File

@@ -4,4 +4,4 @@ cd /home/dafit/nimmerverse/nimmersky/oghma-proxy
Endpoints:
- http://localhost:8100/health - Health check
- http://localhost:8100/debug/rag - See recent RAG operations
- http://localhost:8100/v1/chat/completions - The proxy endpoint (point SkyrimNet here)
- http://127.0.0.1:8100/v1/chat/completions - The proxy endpoint (point SkyrimNet here)