Initial release: Multi-provider AI chat with RAG

FastAPI backend (wiki-vector-chat.py) with Odysseus-style frontend.
Features: multi-provider LLM, Wiki KB + VectorDB RAG, session history,
chat modes, save-to-wiki, markdown rendering, SSE streaming.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
admin
2026-06-03 10:25:29 +00:00
Unverified
commit ae621ecbb5
10 changed files with 3004 additions and 0 deletions

503
wiki-vector-chat.py Normal file
View File

@@ -0,0 +1,503 @@
#!/usr/bin/env python3
"""Wiki VectorDB Chat — Multi-Provider AI Chat with RAG (KB + VectorDB).
Serves at port 8770, proxied via nginx at /zportal/wiki/api/chat
Uses wiki-api (:8097) for KB search and vector-db (:8099) for vector search.
"""
import asyncio
import json
import os
import re
import time
import urllib.request
from pathlib import Path
PROVIDERS_FILE = Path("/opt/blog/wiki-chat-providers.json")
CUSTOM_PROVIDERS_FILE = Path("/opt/blog/wiki-chat-providers.json")
WIKI_API = "http://127.0.0.1:8097"
VECTOR_DB = "http://127.0.0.1:8099"
# Shared API token for wiki-api and vector-db
_API_TOKEN = ""
try:
_API_TOKEN = Path("/opt/blog/.wiki-api-token").read_text().strip()
except Exception:
pass
PRESETS = [
{
"id": "zai-coding",
"name": "Z.ai Coding Plan",
"base_url": "https://api.z.ai/api/coding/paas/v4",
"model": "glm-4-plus",
"format": "openai",
"icon": "\u26a1",
"description": "Official Z.ai coding plan API",
},
{
"id": "openadapter",
"name": "OpenAdapter",
"base_url": "https://api.openadapter.com/v1",
"model": "gpt-4o-mini",
"format": "openai",
"icon": "\u1f512",
"description": "OpenAdapter unified API",
},
{
"id": "openrouter",
"name": "OpenRouter",
"base_url": "https://openrouter.ai/api/v1",
"model": "anthropic/claude-sonnet-4",
"format": "openrouter",
"icon": "\u1f6e3",
"description": "Model router across providers",
},
{
"id": "crofai",
"name": "Crof.AI",
"base_url": "https://api.crof.ai/v1",
"model": "crof-4-plus",
"format": "openai",
"icon": "\u1f42a",
"description": "Crof AI models",
},
{
"id": "opencode-zen",
"name": "Opencode Zen",
"base_url": "https://api.zen.opencode.com/v1",
"model": "glm-4-plus",
"format": "openai",
"icon": "\u1f9e0",
"description": "Opencode Zen hosted models",
},
]
def load_custom_providers():
try:
if CUSTOM_PROVIDERS_FILE.exists():
return json.loads(CUSTOM_PROVIDERS_FILE.read_text())
except Exception:
pass
return []
def save_custom_providers(providers):
CUSTOM_PROVIDERS_FILE.write_text(json.dumps(providers, indent=2))
def get_all_providers():
"""Return presets + custom providers."""
custom = load_custom_providers()
seen = {p["id"] for p in PRESETS}
result = list(PRESETS)
for p in custom:
if p.get("id") not in seen:
seen.add(p["id"])
result.append(p)
return result
def detect_provider_format(base_url: str) -> str:
from urllib.parse import urlparse
host = urlparse(base_url).hostname.lower()
if "ollama" in host or host in ("localhost", "127.0.0.1"):
return "ollama"
if "anthropic" in host:
return "anthropic"
if "openrouter" in host:
return "openrouter"
if "groq" in host:
return "groq"
return "openai"
async def search_kb(query: str, limit: int = 3) -> str:
"""Search wiki-kb.json via wiki-api."""
try:
url = f"{WIKI_API}/search?q={urllib.parse.quote(query)}&limit={limit}&token={_API_TOKEN}"
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=5) as resp:
data = json.loads(resp.read())
results = data.get("results", [])
if not results:
return ""
lines = []
for r in results[:limit]:
q_text = r.get("q", "")
a_text = r.get("a", "")
topic = r.get("topic", "")
score = r.get("_score", 0)
lines.append(f"[{topic}] Q: {q_text}\nA: {a_text}")
return "\n\n".join(lines)
except Exception as e:
return f"(KB search error: {e})"
async def search_vector(query: str, top_k: int = 5) -> str:
"""Search vector-db for related Discord/Reddit messages."""
try:
data = json.dumps({"query": query, "top_k": top_k}).encode()
req = urllib.request.Request(
f"{VECTOR_DB}/vector/search",
data=data,
headers={"Content-Type": "application/json", "x-api-key": _API_TOKEN},
)
with urllib.request.urlopen(req, timeout=8) as resp:
result = json.loads(resp.read())
hits = result.get("results", [])
if not hits:
return ""
lines = []
for h in hits[:top_k]:
text = h.get("text", "")[:300]
score = h.get("score", 0)
source = h.get("source", "unknown")
meta = h.get("metadata", {})
author = meta.get("author", "")
channel = meta.get("channel", "")
preview = text.replace("\n", " ")[:200]
lines.append(f"[{source}] @{author} in #{channel}: {preview} (score: {score:.2f})")
return "\n\n".join(lines)
except Exception as e:
return f"(Vector search error: {e})"
async def build_rag_context(user_message: str, rag_wiki: bool = True, rag_vector: bool = True) -> str:
"""Build RAG context from KB + VectorDB searches."""
kb_results = ""
vec_results = ""
tasks = []
if rag_wiki:
tasks.append(search_kb(user_message, 3))
if rag_vector:
tasks.append(search_vector(user_message, 5))
if tasks:
results = await asyncio.gather(*tasks)
idx = 0
if rag_wiki:
kb_results = results[idx]; idx += 1
if rag_vector:
vec_results = results[idx]
parts = [
"You are Z.ai Wiki Assistant. Use ALL the knowledge sources below to answer the user's question.",
"Draw from both the Wiki KB and Community Messages. Synthesize information even from partial matches.",
"If the context mentions anything relevant, include it in your answer. Be specific — quote authors, channels, and details when available.",
"Only say you don't have information if the sources are truly empty or completely unrelated.",
"",
]
if rag_wiki:
parts += ["=== Wiki Knowledge Base ===", kb_results or "(no KB results found)", ""]
if rag_vector:
parts += ["=== Related Community Messages (Discord/Reddit) ===", vec_results or "(no community messages found)"]
if not rag_wiki and not rag_vector:
parts.append("(RAG sources disabled for this session)")
return "\n".join(parts)
# ── LLM Provider Calls ──
def format_messages_openai(system: str, messages: list, model: str) -> dict:
"""Format for OpenAI-compatible /chat/completions endpoint."""
return {"model": model, "messages": [{"role": "system", "content": system}] + messages,
"temperature": 0.7, "max_tokens": 2048, "stream": True}
def format_messages_anthropic(system: str, messages: list, model: str) -> dict:
"""Convert OpenAI-format messages to Anthropic format."""
anthropic_msgs = []
for m in messages:
role = "user" if m["role"] == "user" else "assistant"
anthropic_msgs.append({"role": role, "content": m["content"]})
return {"model": model, "system": system, "messages": anthropic_msgs,
"max_tokens": 2048, "stream": True}
def format_messages_ollama(system: str, messages: list, model: str) -> dict:
"""Format for Ollama /api/chat endpoint."""
ollama_msgs = []
for m in messages:
role = "user" if m["role"] == "user" else "assistant"
ollama_msgs.append({"role": role, "content": m["content"]})
return {"model": model, "messages": ollama_msgs, "stream": True}
async def call_llm_stream(provider: dict, system: str, messages: list):
"""Call LLM provider and yield SSE delta chunks."""
base_url = provider["base_url"].rstrip("/")
fmt = provider.get("format", detect_provider_format(base_url))
api_key = provider.get("api_key", "")
model = provider.get("model", "gpt-4o-mini")
if fmt == "anthropic":
payload = format_messages_anthropic(system, messages, model)
url = f"{base_url}/v1/messages"
headers = {"x-api-key": api_key, "Content-Type": "application/json",
"anthropic-version": "2023-06-01"}
elif fmt == "ollama":
payload = format_messages_ollama(system, messages, model)
url = f"{base_url}/api/chat"
headers = {"Content-Type": "application/json"}
else:
# openai / openrouter / groq / default
payload = format_messages_openai(system, messages, model)
url = f"{base_url}/chat/completions"
headers = {}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
if fmt == "openrouter":
headers["HTTP-OpenRouter-AI-Model"] = model
headers["Content-Type"] = "application/json"
data = json.dumps(payload).encode()
req = urllib.request.Request(url, data=data, headers=headers)
try:
with urllib.request.urlopen(req, timeout=60) as resp:
reader = resp
buf = b""
while True:
chunk = reader.read(4096)
if not chunk:
break
buf += chunk
while b"\n" in buf:
line, buf = buf.split(b"\n", 1)
line = line.decode("utf-8", errors="replace").strip()
if not line:
continue
if line.startswith("data: "):
data_str = line[5:].strip()
if data_str == "[DONE]":
yield {"type": "done"}
return
try:
chunk_data = json.loads(data_str)
except json.JSONDecodeError:
yield {"delta": data_str, "type": "raw"}
continue
# Anthropic SSE format
if fmt == "anthropic":
evt_type = chunk_data.get("type", "")
if evt_type == "content_block_delta":
text = chunk_data.get("delta", {}).get("text", "")
if text:
yield {"delta": text, "type": "delta"}
elif evt_type == "message_stop":
yield {"type": "done"}
return
elif evt_type == "error":
err_msg = chunk_data.get("error", {}).get("message", str(chunk_data))
yield {"type": "error", "delta": err_msg}
return
continue
# OpenAI-compatible SSE format
deltas = chunk_data.get("choices", [{}])[0].get("delta", {})
content = deltas.get("content", "")
if content:
yield {"delta": content, "type": "delta"}
tool_calls = deltas.get("tool_calls")
if tool_calls:
names = [tc.get("function", {}).get("name", "?") for tc in tool_calls]
yield {"delta": f"\n[Using tools: {', '.join(names)}]", "type": "tool"}
finish = chunk_data.get("finish_reason")
if finish:
yield {"type": "done"}
elif line.startswith("event:"):
pass # SSE event name, skip
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")[:500]
yield {"type": "error", "delta": f"HTTP {e.code}: {body}"}
except Exception as e:
yield {"type": "error", "delta": str(e)}
# ── FastAPI App ──
try:
from fastapi import FastAPI
from fastapi.responses import StreamingResponse, JSONResponse, Response
from pydantic import BaseModel
except ImportError:
print("Installing fastapi...")
os.system("pip install fastapi uvicorn httpx -q")
from fastapi import FastAPI
from fastapi.responses import StreamingResponse, JSONResponse, Response
from pydantic import BaseModel
app = FastAPI(title="Wiki VectorDB Chat")
class ChatMessage(BaseModel):
message: str
provider_id: str = "zai-coding"
history: list = []
rag_wiki: bool = True
rag_vector: bool = True
mode: str = "chat"
class ProviderSave(BaseModel):
id: str
name: str
base_url: str
model: str
api_key: str = ""
format: str = "openai"
icon: str = "\u2b99"
description: str = ""
@app.get("/providers/presets")
async def get_presets():
return PRESETS
@app.get("/providers")
async def list_providers():
return get_all_providers()
@app.post("/providers/save")
async def save_provider(p: ProviderSave):
custom = load_custom_providers()
p_dict = p.model_dump()
# Update or append
found = False
for i, existing in enumerate(custom):
if existing.get("id") == p.id:
custom[i] = p_dict
found = True
break
if not found:
custom.append(p_dict)
save_custom_providers(custom)
return {"ok": True, "provider": p_dict}
@app.delete("/providers/{provider_id}")
async def delete_provider(provider_id: str):
custom = load_custom_providers()
custom = [p for p in custom if p.get("id") != provider_id]
save_custom_providers(custom)
return {"ok": True}
@app.post("/chat/message")
async def chat_message(msg: ChatMessage):
async def generate():
providers = get_all_providers()
provider = next((p for p in providers if p.get("id") == msg.provider_id), None)
if not provider:
yield f"data: {json.dumps({'type':'error','delta':'Provider not found'})}\n\n"
return
# Build conversation history
messages = []
for h in msg.history[-10:]:
messages.append(h)
messages.append({"role": "user", "content": msg.message})
# Build RAG context with per-session toggles
rag_context = await build_rag_context(msg.message, msg.rag_wiki, msg.rag_vector)
# Mode-specific system prompt additions
mode_hints = {
"chat": "",
"code": "\n\nMODE: Coding. The user is working on code. Provide precise, well-structured code examples with explanations. Use markdown code blocks. Be concise and technical.",
"brain": "\n\nMODE: Brainstorm. The user wants creative exploration. Think freely, offer multiple perspectives, suggest unconventional approaches. Be enthusiastic and expansive.",
}
system_prompt = rag_context + mode_hints.get(msg.mode, "")
async for chunk in call_llm_stream(provider, system_prompt, messages):
data = json.dumps(chunk, ensure_ascii=False)
yield f"data: {data}\n\n"
yield f"data: {json.dumps({'type':'done'})}\n\n"
return StreamingResponse(generate(), media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
@app.post("/chat/tunnel")
async def chat_tunnel(msg: ChatMessage):
"""Server-side token chat — uses ZAI_API_TOKEN env var if available."""
async def generate():
providers = get_all_providers()
provider = next((p for p in providers if p.get("id") == msg.provider_id), None)
if not provider:
yield f"data: {json.dumps({'type':'error','delta':'Provider not found'})}\n\n"
return
# Use server-side token if available (for tunnel mode)
token = os.environ.get("ZAI_API_TOKEN", "")
if token and not provider.get("api_key"):
provider = dict(provider)
provider["api_key"] = token
messages = []
for h in msg.history[-10:]:
messages.append(h)
messages.append({"role": "user", "content": msg.message})
rag_context = await build_rag_context(msg.message)
async for chunk in call_llm_stream(provider, rag_context, messages):
data = json.dumps(chunk, ensure_ascii=False)
yield f"data: {data}\n\n"
yield f"data: {json.dumps({'type':'done'})}\n\n"
return StreamingResponse(generate(), media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
@app.get("/health")
async def health():
return {"status": "ok", "providers": len(get_all_providers())}
class WikiSave(BaseModel):
question: str
answer: str
topic: str = "chat-saved"
@app.post("/chat/save-to-wiki")
async def save_to_wiki(item: WikiSave):
"""Save a Q&A pair directly to wiki-kb.json."""
try:
kb_path = Path("/opt/blog/wiki-kb.json")
kb = json.loads(kb_path.read_text())
entry = {
"q": item.question,
"a": item.answer,
"topic": item.topic,
"author": "chat-assistant",
"source": "chat-saved",
"timestamp": time.strftime("%Y-%m-%d %H:%M"),
}
kb.append(entry)
kb_path.write_text(json.dumps(kb, ensure_ascii=False, indent=2))
return {"ok": True, "total": len(kb)}
except Exception as e:
return {"ok": False, "error": str(e)}
def main():
import uvicorn
port = 8770
for i, arg in enumerate(__import__("sys").argv):
if arg == "--port" and i + 1 < len(__import__("sys").argv):
port = int(__import__("sys").argv[i + 1])
print(f"Wiki VectorDB Chat starting on port {port}")
uvicorn.run(app, host="127.0.0.1", port=port, log_level="warning")
if __name__ == "__main__":
main()