RAG proxy that intercepts SkyrimNet LLM requests and enriches them with relevant Tamrielic lore from CHIM's Oghma Infinium database. Features: - FastAPI proxy compatible with OpenAI API - ChromaDB semantic search for lore retrieval - NPC profile extraction from SkyrimNet prompts - Google Sheets ingestion for CHIM's Oghma data - Kubernetes deployment manifests - Debug endpoint for RAG operation monitoring Collections ingested to iris-dev ChromaDB: - oghma_lore: 1951 entries (scholar knowledge) - oghma_basic: 1949 entries (commoner knowledge) - oghma_visual: 1151 entries (Omnisight perception) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
109 lines
2.7 KiB
YAML
109 lines
2.7 KiB
YAML
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: oghma-proxy
|
|
namespace: nimmersky
|
|
labels:
|
|
app.kubernetes.io/name: oghma-proxy
|
|
app.kubernetes.io/part-of: nimmerverse
|
|
app.kubernetes.io/component: inference-proxy
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: oghma-proxy
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/name: oghma-proxy
|
|
app.kubernetes.io/part-of: nimmerverse
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "8100"
|
|
prometheus.io/path: "/metrics"
|
|
spec:
|
|
serviceAccountName: oghma-proxy
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: 1000
|
|
fsGroup: 1000
|
|
|
|
containers:
|
|
- name: oghma-proxy
|
|
image: registry.eachpath.local/nimmerverse/oghma-proxy:latest
|
|
imagePullPolicy: Always
|
|
|
|
ports:
|
|
- name: http
|
|
containerPort: 8100
|
|
protocol: TCP
|
|
|
|
env:
|
|
- name: OPENROUTER_API_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: oghma-proxy-secrets
|
|
key: OPENROUTER_API_KEY
|
|
- name: UPSTREAM_URL
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: oghma-proxy-secrets
|
|
key: UPSTREAM_URL
|
|
|
|
volumeMounts:
|
|
- name: config
|
|
mountPath: /app/config.yaml
|
|
subPath: config.yaml
|
|
readOnly: true
|
|
|
|
resources:
|
|
requests:
|
|
memory: "256Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "512Mi"
|
|
cpu: "500m"
|
|
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 30
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
timeoutSeconds: 3
|
|
failureThreshold: 3
|
|
|
|
volumes:
|
|
- name: config
|
|
configMap:
|
|
name: oghma-proxy-config
|
|
|
|
# Prefer scheduling near inference workloads
|
|
affinity:
|
|
podAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- weight: 100
|
|
podAffinityTerm:
|
|
labelSelector:
|
|
matchLabels:
|
|
app.kubernetes.io/component: inference
|
|
topologyKey: kubernetes.io/hostname
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: oghma-proxy
|
|
namespace: nimmersky
|
|
labels:
|
|
app.kubernetes.io/name: oghma-proxy
|