reorg. destillation of oghma kownledge packs out of iris-dev
This commit is contained in:
@@ -41,6 +41,7 @@ ingest = [
|
||||
[project.scripts]
|
||||
oghma-proxy = "oghma_proxy.main:main"
|
||||
oghma-ingest = "oghma_proxy.ingest:main"
|
||||
oghma-export-packs = "oghma_proxy.export_packs:main"
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 100
|
||||
|
||||
263
oghma-proxy/src/oghma_proxy/export_packs.py
Normal file
263
oghma-proxy/src/oghma_proxy/export_packs.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""Oghma Pack Exporter — materializes ChromaDB collections into SkyrimNet .sknpack files.
|
||||
|
||||
One pack per source category. Lore + basic entries merge into the same pack and are
|
||||
distinguished by the `importance` field (lore=0.75, basic=0.40) so the model naturally
|
||||
prefers the deeper entry when token budget allows. Visual descriptions export to a
|
||||
separate single pack.
|
||||
|
||||
Produces packs matching SkyrimNet beta format_version=1.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import chromadb
|
||||
import structlog
|
||||
from chromadb.config import Settings
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
PACK_FORMAT_VERSION = 1
|
||||
PACK_VERSION = "1.0.0"
|
||||
PACK_AUTHOR = "nimmerverse/oghma-proxy"
|
||||
|
||||
# importance grading per collection
|
||||
IMPORTANCE_LORE = 0.75
|
||||
IMPORTANCE_BASIC = 0.40
|
||||
IMPORTANCE_VISUAL = 0.50
|
||||
|
||||
# map ChromaDB category → SkyrimNet entry type
|
||||
TYPE_BY_CATEGORY = {
|
||||
"spells": "SKILL",
|
||||
}
|
||||
|
||||
|
||||
def category_to_type(category: str) -> str:
|
||||
if category in TYPE_BY_CATEGORY:
|
||||
return TYPE_BY_CATEGORY[category]
|
||||
if category.startswith("locations_"):
|
||||
return "LOCATION"
|
||||
return "KNOWLEDGE"
|
||||
|
||||
|
||||
def category_to_location(category: str) -> str:
|
||||
"""For locations_whiterun → 'Whiterun'. Else empty."""
|
||||
if not category.startswith("locations_"):
|
||||
return ""
|
||||
hold = category[len("locations_"):]
|
||||
if hold == "other":
|
||||
return ""
|
||||
return hold.replace("_", " ").title()
|
||||
|
||||
|
||||
def category_to_pack_name(category: str) -> str:
|
||||
"""figures_gods → 'Oghma - Figures & Gods'."""
|
||||
pretty = {
|
||||
"figures_gods": "Figures & Gods",
|
||||
"armor_weapons": "Armor & Weapons",
|
||||
"groups_lore": "Groups & Books",
|
||||
}
|
||||
if category in pretty:
|
||||
return f"Oghma - {pretty[category]}"
|
||||
if category.startswith("locations_"):
|
||||
hold = category[len("locations_"):]
|
||||
if hold == "other":
|
||||
return "Oghma - Locations - Other"
|
||||
return f"Oghma - Locations - {hold.replace('_', ' ').title()}"
|
||||
return f"Oghma - {category.replace('_', ' ').title()}"
|
||||
|
||||
|
||||
def pack_filename(pack_name: str) -> str:
|
||||
"""Filesystem-safe filename matching SkyrimNet export style."""
|
||||
safe = re.sub(r"[^\w\-\s]", "", pack_name)
|
||||
safe = re.sub(r"\s+", "_", safe.strip())
|
||||
return f"{safe}.sknpack"
|
||||
|
||||
|
||||
def build_entry(
|
||||
content: str,
|
||||
topic: str,
|
||||
category: str,
|
||||
importance: float,
|
||||
tags: list[str],
|
||||
) -> dict:
|
||||
return {
|
||||
"always_inject": False,
|
||||
"condition_expr": "",
|
||||
"content": content,
|
||||
"display_name": topic,
|
||||
"emotion": "",
|
||||
"importance": importance,
|
||||
"location": category_to_location(category),
|
||||
"tags": tags,
|
||||
"type": category_to_type(category),
|
||||
}
|
||||
|
||||
|
||||
def build_pack(pack_name: str, description: str, entries: list[dict]) -> dict:
|
||||
return {
|
||||
"skyrimnet_knowledge_pack": {
|
||||
"author": PACK_AUTHOR,
|
||||
"description": description,
|
||||
"entries": entries,
|
||||
"entry_count": len(entries),
|
||||
"exported_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"format_version": PACK_FORMAT_VERSION,
|
||||
"name": pack_name,
|
||||
"npc_groups": [],
|
||||
"version": PACK_VERSION,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def fetch_all(collection) -> tuple[list[str], list[dict]]:
|
||||
"""Return (documents, metadatas) for every entry in the collection."""
|
||||
result = collection.get(include=["documents", "metadatas"])
|
||||
return result["documents"], result["metadatas"]
|
||||
|
||||
|
||||
def export_oghma_packs(
|
||||
chromadb_host: str,
|
||||
chromadb_port: int,
|
||||
output_dir: Path,
|
||||
dry_run: bool = False,
|
||||
) -> dict:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
client = chromadb.HttpClient(
|
||||
host=chromadb_host,
|
||||
port=chromadb_port,
|
||||
settings=Settings(anonymized_telemetry=False),
|
||||
)
|
||||
|
||||
# category → list[entry] — lore + basic merge by category
|
||||
by_category: dict[str, list[dict]] = defaultdict(list)
|
||||
|
||||
for coll_name, importance in (
|
||||
("oghma_lore", IMPORTANCE_LORE),
|
||||
("oghma_basic", IMPORTANCE_BASIC),
|
||||
):
|
||||
collection = client.get_collection(coll_name)
|
||||
docs, metas = fetch_all(collection)
|
||||
logger.info("Fetched collection", collection=coll_name, count=len(docs))
|
||||
|
||||
for doc, meta in zip(docs, metas):
|
||||
category = meta.get("category", "uncategorized")
|
||||
topic = meta.get("topic", "")
|
||||
raw_tags = (meta.get("tags") or "").strip()
|
||||
tag_list = [t.strip() for t in raw_tags.split(",") if t.strip()] if raw_tags else []
|
||||
|
||||
by_category[category].append(
|
||||
build_entry(
|
||||
content=doc,
|
||||
topic=topic,
|
||||
category=category,
|
||||
importance=importance,
|
||||
tags=tag_list,
|
||||
)
|
||||
)
|
||||
|
||||
stats = {"packs": 0, "entries": 0, "files": []}
|
||||
|
||||
for category, entries in sorted(by_category.items()):
|
||||
pack_name = category_to_pack_name(category)
|
||||
description = (
|
||||
f"Tamrielic lore from CHIM's Oghma Infinium — {category.replace('_', ' ')}. "
|
||||
f"Merges educated and common-knowledge entries graded by importance."
|
||||
)
|
||||
pack = build_pack(pack_name, description, entries)
|
||||
fname = pack_filename(pack_name)
|
||||
path = output_dir / fname
|
||||
|
||||
if not dry_run:
|
||||
path.write_text(json.dumps(pack, indent=2, ensure_ascii=False))
|
||||
stats["packs"] += 1
|
||||
stats["entries"] += len(entries)
|
||||
stats["files"].append(str(path))
|
||||
logger.info("Wrote pack", file=fname, entries=len(entries), category=category)
|
||||
|
||||
# Visual descriptions — separate single pack, different source schema
|
||||
visual = client.get_collection("oghma_visual")
|
||||
vdocs, vmetas = fetch_all(visual)
|
||||
logger.info("Fetched collection", collection="oghma_visual", count=len(vdocs))
|
||||
|
||||
visual_entries = []
|
||||
for doc, meta in zip(vdocs, vmetas):
|
||||
visual_entries.append(
|
||||
{
|
||||
"always_inject": False,
|
||||
"condition_expr": "",
|
||||
"content": doc,
|
||||
"display_name": meta.get("name", ""),
|
||||
"emotion": "",
|
||||
"importance": IMPORTANCE_VISUAL,
|
||||
"location": "",
|
||||
"tags": [],
|
||||
"type": "KNOWLEDGE",
|
||||
}
|
||||
)
|
||||
|
||||
visual_pack = build_pack(
|
||||
"Oghma - Visual Descriptions",
|
||||
"Visual descriptions for NPC perception (Omnisight) from CHIM's Oghma Infinium.",
|
||||
visual_entries,
|
||||
)
|
||||
vpath = output_dir / "Oghma_-_Visual_Descriptions.sknpack"
|
||||
if not dry_run:
|
||||
vpath.write_text(json.dumps(visual_pack, indent=2, ensure_ascii=False))
|
||||
stats["packs"] += 1
|
||||
stats["entries"] += len(visual_entries)
|
||||
stats["files"].append(str(vpath))
|
||||
logger.info("Wrote pack", file=vpath.name, entries=len(visual_entries), category="visual")
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Export ChromaDB Oghma collections into SkyrimNet .sknpack files"
|
||||
)
|
||||
parser.add_argument("--host", default="iris-dev.eachpath.local")
|
||||
parser.add_argument("--port", type=int, default=35000)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
default="/home/dafit/nimmerverse/nimmersky/sknpack",
|
||||
)
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.dev.ConsoleRenderer(),
|
||||
],
|
||||
)
|
||||
|
||||
try:
|
||||
stats = export_oghma_packs(
|
||||
chromadb_host=args.host,
|
||||
chromadb_port=args.port,
|
||||
output_dir=Path(args.output_dir),
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
logger.info(
|
||||
"Export complete",
|
||||
packs=stats["packs"],
|
||||
entries=stats["entries"],
|
||||
output_dir=args.output_dir,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Export failed", error=str(e))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -113,7 +113,31 @@ Note: Reference this knowledge naturally when relevant to the conversation. Do n
|
||||
position=self.position,
|
||||
)
|
||||
else:
|
||||
logger.warning("Could not find injection point", position=self.position)
|
||||
seen_headers: list[str] = []
|
||||
system_msg_count = 0
|
||||
system_content_chars = 0
|
||||
for msg in modified_messages:
|
||||
if msg.get("role") == "system":
|
||||
system_msg_count += 1
|
||||
content = msg.get("content", "")
|
||||
system_content_chars += len(content)
|
||||
for line in content.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("## "):
|
||||
seen_headers.append(stripped)
|
||||
if len(seen_headers) >= 20:
|
||||
break
|
||||
if len(seen_headers) >= 20:
|
||||
break
|
||||
|
||||
logger.warning(
|
||||
"Could not find injection point",
|
||||
position=self.position,
|
||||
npc_name=npc_profile.name,
|
||||
system_messages=system_msg_count,
|
||||
system_content_chars=system_content_chars,
|
||||
seen_headers=seen_headers,
|
||||
)
|
||||
|
||||
result = InjectionResult(
|
||||
npc_profile=npc_profile,
|
||||
|
||||
@@ -246,10 +246,15 @@ async def chat_completions(request: Request):
|
||||
|
||||
|
||||
async def stream_upstream(url: str, headers: dict, body: dict):
|
||||
"""Stream response from upstream."""
|
||||
async with http_client.stream("POST", url, json=body, headers=headers) as response:
|
||||
async for chunk in response.aiter_bytes():
|
||||
yield chunk
|
||||
"""Stream response from upstream with error handling."""
|
||||
try:
|
||||
async with http_client.stream("POST", url, json=body, headers=headers) as response:
|
||||
async for chunk in response.aiter_bytes():
|
||||
yield chunk
|
||||
except httpx.ReadError as e:
|
||||
logger.error("Upstream stream dropped", error=str(e))
|
||||
except httpx.HTTPError as e:
|
||||
logger.error("Upstream stream error", error=str(e))
|
||||
|
||||
|
||||
@app.post("/v1/completions")
|
||||
|
||||
@@ -4,4 +4,4 @@ cd /home/dafit/nimmerverse/nimmersky/oghma-proxy
|
||||
Endpoints:
|
||||
- http://localhost:8100/health - Health check
|
||||
- http://localhost:8100/debug/rag - See recent RAG operations
|
||||
- http://localhost:8100/v1/chat/completions - The proxy endpoint (point SkyrimNet here)
|
||||
- http://127.0.0.1:8100/v1/chat/completions - The proxy endpoint (point SkyrimNet here)
|
||||
|
||||
Reference in New Issue
Block a user