Initial release: Multi-provider AI chat with RAG
FastAPI backend (wiki-vector-chat.py) with Odysseus-style frontend. Features: multi-provider LLM, Wiki KB + VectorDB RAG, session history, chat modes, save-to-wiki, markdown rendering, SSE streaming. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
503
wiki-vector-chat.py
Normal file
503
wiki-vector-chat.py
Normal file
@@ -0,0 +1,503 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Wiki VectorDB Chat — Multi-Provider AI Chat with RAG (KB + VectorDB).
|
||||
|
||||
Serves at port 8770, proxied via nginx at /zportal/wiki/api/chat
|
||||
Uses wiki-api (:8097) for KB search and vector-db (:8099) for vector search.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
PROVIDERS_FILE = Path("/opt/blog/wiki-chat-providers.json")
|
||||
CUSTOM_PROVIDERS_FILE = Path("/opt/blog/wiki-chat-providers.json")
|
||||
|
||||
WIKI_API = "http://127.0.0.1:8097"
|
||||
VECTOR_DB = "http://127.0.0.1:8099"
|
||||
|
||||
# Shared API token for wiki-api and vector-db
|
||||
_API_TOKEN = ""
|
||||
try:
|
||||
_API_TOKEN = Path("/opt/blog/.wiki-api-token").read_text().strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
PRESETS = [
|
||||
{
|
||||
"id": "zai-coding",
|
||||
"name": "Z.ai Coding Plan",
|
||||
"base_url": "https://api.z.ai/api/coding/paas/v4",
|
||||
"model": "glm-4-plus",
|
||||
"format": "openai",
|
||||
"icon": "\u26a1",
|
||||
"description": "Official Z.ai coding plan API",
|
||||
},
|
||||
{
|
||||
"id": "openadapter",
|
||||
"name": "OpenAdapter",
|
||||
"base_url": "https://api.openadapter.com/v1",
|
||||
"model": "gpt-4o-mini",
|
||||
"format": "openai",
|
||||
"icon": "\u1f512",
|
||||
"description": "OpenAdapter unified API",
|
||||
},
|
||||
{
|
||||
"id": "openrouter",
|
||||
"name": "OpenRouter",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"model": "anthropic/claude-sonnet-4",
|
||||
"format": "openrouter",
|
||||
"icon": "\u1f6e3",
|
||||
"description": "Model router across providers",
|
||||
},
|
||||
{
|
||||
"id": "crofai",
|
||||
"name": "Crof.AI",
|
||||
"base_url": "https://api.crof.ai/v1",
|
||||
"model": "crof-4-plus",
|
||||
"format": "openai",
|
||||
"icon": "\u1f42a",
|
||||
"description": "Crof AI models",
|
||||
},
|
||||
{
|
||||
"id": "opencode-zen",
|
||||
"name": "Opencode Zen",
|
||||
"base_url": "https://api.zen.opencode.com/v1",
|
||||
"model": "glm-4-plus",
|
||||
"format": "openai",
|
||||
"icon": "\u1f9e0",
|
||||
"description": "Opencode Zen hosted models",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def load_custom_providers():
|
||||
try:
|
||||
if CUSTOM_PROVIDERS_FILE.exists():
|
||||
return json.loads(CUSTOM_PROVIDERS_FILE.read_text())
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def save_custom_providers(providers):
|
||||
CUSTOM_PROVIDERS_FILE.write_text(json.dumps(providers, indent=2))
|
||||
|
||||
|
||||
def get_all_providers():
|
||||
"""Return presets + custom providers."""
|
||||
custom = load_custom_providers()
|
||||
seen = {p["id"] for p in PRESETS}
|
||||
result = list(PRESETS)
|
||||
for p in custom:
|
||||
if p.get("id") not in seen:
|
||||
seen.add(p["id"])
|
||||
result.append(p)
|
||||
return result
|
||||
|
||||
|
||||
def detect_provider_format(base_url: str) -> str:
|
||||
from urllib.parse import urlparse
|
||||
host = urlparse(base_url).hostname.lower()
|
||||
if "ollama" in host or host in ("localhost", "127.0.0.1"):
|
||||
return "ollama"
|
||||
if "anthropic" in host:
|
||||
return "anthropic"
|
||||
if "openrouter" in host:
|
||||
return "openrouter"
|
||||
if "groq" in host:
|
||||
return "groq"
|
||||
return "openai"
|
||||
|
||||
|
||||
async def search_kb(query: str, limit: int = 3) -> str:
|
||||
"""Search wiki-kb.json via wiki-api."""
|
||||
try:
|
||||
url = f"{WIKI_API}/search?q={urllib.parse.quote(query)}&limit={limit}&token={_API_TOKEN}"
|
||||
req = urllib.request.Request(url)
|
||||
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||
data = json.loads(resp.read())
|
||||
results = data.get("results", [])
|
||||
if not results:
|
||||
return ""
|
||||
lines = []
|
||||
for r in results[:limit]:
|
||||
q_text = r.get("q", "")
|
||||
a_text = r.get("a", "")
|
||||
topic = r.get("topic", "")
|
||||
score = r.get("_score", 0)
|
||||
lines.append(f"[{topic}] Q: {q_text}\nA: {a_text}")
|
||||
return "\n\n".join(lines)
|
||||
except Exception as e:
|
||||
return f"(KB search error: {e})"
|
||||
|
||||
|
||||
async def search_vector(query: str, top_k: int = 5) -> str:
|
||||
"""Search vector-db for related Discord/Reddit messages."""
|
||||
try:
|
||||
data = json.dumps({"query": query, "top_k": top_k}).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{VECTOR_DB}/vector/search",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json", "x-api-key": _API_TOKEN},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=8) as resp:
|
||||
result = json.loads(resp.read())
|
||||
hits = result.get("results", [])
|
||||
if not hits:
|
||||
return ""
|
||||
lines = []
|
||||
for h in hits[:top_k]:
|
||||
text = h.get("text", "")[:300]
|
||||
score = h.get("score", 0)
|
||||
source = h.get("source", "unknown")
|
||||
meta = h.get("metadata", {})
|
||||
author = meta.get("author", "")
|
||||
channel = meta.get("channel", "")
|
||||
preview = text.replace("\n", " ")[:200]
|
||||
lines.append(f"[{source}] @{author} in #{channel}: {preview} (score: {score:.2f})")
|
||||
return "\n\n".join(lines)
|
||||
except Exception as e:
|
||||
return f"(Vector search error: {e})"
|
||||
|
||||
|
||||
async def build_rag_context(user_message: str, rag_wiki: bool = True, rag_vector: bool = True) -> str:
|
||||
"""Build RAG context from KB + VectorDB searches."""
|
||||
kb_results = ""
|
||||
vec_results = ""
|
||||
tasks = []
|
||||
if rag_wiki:
|
||||
tasks.append(search_kb(user_message, 3))
|
||||
if rag_vector:
|
||||
tasks.append(search_vector(user_message, 5))
|
||||
|
||||
if tasks:
|
||||
results = await asyncio.gather(*tasks)
|
||||
idx = 0
|
||||
if rag_wiki:
|
||||
kb_results = results[idx]; idx += 1
|
||||
if rag_vector:
|
||||
vec_results = results[idx]
|
||||
|
||||
parts = [
|
||||
"You are Z.ai Wiki Assistant. Use ALL the knowledge sources below to answer the user's question.",
|
||||
"Draw from both the Wiki KB and Community Messages. Synthesize information even from partial matches.",
|
||||
"If the context mentions anything relevant, include it in your answer. Be specific — quote authors, channels, and details when available.",
|
||||
"Only say you don't have information if the sources are truly empty or completely unrelated.",
|
||||
"",
|
||||
]
|
||||
if rag_wiki:
|
||||
parts += ["=== Wiki Knowledge Base ===", kb_results or "(no KB results found)", ""]
|
||||
if rag_vector:
|
||||
parts += ["=== Related Community Messages (Discord/Reddit) ===", vec_results or "(no community messages found)"]
|
||||
if not rag_wiki and not rag_vector:
|
||||
parts.append("(RAG sources disabled for this session)")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
# ── LLM Provider Calls ──
|
||||
|
||||
def format_messages_openai(system: str, messages: list, model: str) -> dict:
|
||||
"""Format for OpenAI-compatible /chat/completions endpoint."""
|
||||
return {"model": model, "messages": [{"role": "system", "content": system}] + messages,
|
||||
"temperature": 0.7, "max_tokens": 2048, "stream": True}
|
||||
|
||||
|
||||
def format_messages_anthropic(system: str, messages: list, model: str) -> dict:
|
||||
"""Convert OpenAI-format messages to Anthropic format."""
|
||||
anthropic_msgs = []
|
||||
for m in messages:
|
||||
role = "user" if m["role"] == "user" else "assistant"
|
||||
anthropic_msgs.append({"role": role, "content": m["content"]})
|
||||
return {"model": model, "system": system, "messages": anthropic_msgs,
|
||||
"max_tokens": 2048, "stream": True}
|
||||
|
||||
|
||||
def format_messages_ollama(system: str, messages: list, model: str) -> dict:
|
||||
"""Format for Ollama /api/chat endpoint."""
|
||||
ollama_msgs = []
|
||||
for m in messages:
|
||||
role = "user" if m["role"] == "user" else "assistant"
|
||||
ollama_msgs.append({"role": role, "content": m["content"]})
|
||||
return {"model": model, "messages": ollama_msgs, "stream": True}
|
||||
|
||||
|
||||
async def call_llm_stream(provider: dict, system: str, messages: list):
|
||||
"""Call LLM provider and yield SSE delta chunks."""
|
||||
base_url = provider["base_url"].rstrip("/")
|
||||
fmt = provider.get("format", detect_provider_format(base_url))
|
||||
api_key = provider.get("api_key", "")
|
||||
model = provider.get("model", "gpt-4o-mini")
|
||||
|
||||
if fmt == "anthropic":
|
||||
payload = format_messages_anthropic(system, messages, model)
|
||||
url = f"{base_url}/v1/messages"
|
||||
headers = {"x-api-key": api_key, "Content-Type": "application/json",
|
||||
"anthropic-version": "2023-06-01"}
|
||||
elif fmt == "ollama":
|
||||
payload = format_messages_ollama(system, messages, model)
|
||||
url = f"{base_url}/api/chat"
|
||||
headers = {"Content-Type": "application/json"}
|
||||
else:
|
||||
# openai / openrouter / groq / default
|
||||
payload = format_messages_openai(system, messages, model)
|
||||
url = f"{base_url}/chat/completions"
|
||||
headers = {}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
if fmt == "openrouter":
|
||||
headers["HTTP-OpenRouter-AI-Model"] = model
|
||||
headers["Content-Type"] = "application/json"
|
||||
|
||||
data = json.dumps(payload).encode()
|
||||
req = urllib.request.Request(url, data=data, headers=headers)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
reader = resp
|
||||
buf = b""
|
||||
while True:
|
||||
chunk = reader.read(4096)
|
||||
if not chunk:
|
||||
break
|
||||
buf += chunk
|
||||
while b"\n" in buf:
|
||||
line, buf = buf.split(b"\n", 1)
|
||||
line = line.decode("utf-8", errors="replace").strip()
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("data: "):
|
||||
data_str = line[5:].strip()
|
||||
if data_str == "[DONE]":
|
||||
yield {"type": "done"}
|
||||
return
|
||||
try:
|
||||
chunk_data = json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
yield {"delta": data_str, "type": "raw"}
|
||||
continue
|
||||
|
||||
# Anthropic SSE format
|
||||
if fmt == "anthropic":
|
||||
evt_type = chunk_data.get("type", "")
|
||||
if evt_type == "content_block_delta":
|
||||
text = chunk_data.get("delta", {}).get("text", "")
|
||||
if text:
|
||||
yield {"delta": text, "type": "delta"}
|
||||
elif evt_type == "message_stop":
|
||||
yield {"type": "done"}
|
||||
return
|
||||
elif evt_type == "error":
|
||||
err_msg = chunk_data.get("error", {}).get("message", str(chunk_data))
|
||||
yield {"type": "error", "delta": err_msg}
|
||||
return
|
||||
continue
|
||||
|
||||
# OpenAI-compatible SSE format
|
||||
deltas = chunk_data.get("choices", [{}])[0].get("delta", {})
|
||||
content = deltas.get("content", "")
|
||||
if content:
|
||||
yield {"delta": content, "type": "delta"}
|
||||
tool_calls = deltas.get("tool_calls")
|
||||
if tool_calls:
|
||||
names = [tc.get("function", {}).get("name", "?") for tc in tool_calls]
|
||||
yield {"delta": f"\n[Using tools: {', '.join(names)}]", "type": "tool"}
|
||||
finish = chunk_data.get("finish_reason")
|
||||
if finish:
|
||||
yield {"type": "done"}
|
||||
elif line.startswith("event:"):
|
||||
pass # SSE event name, skip
|
||||
except urllib.error.HTTPError as e:
|
||||
body = e.read().decode("utf-8", errors="replace")[:500]
|
||||
yield {"type": "error", "delta": f"HTTP {e.code}: {body}"}
|
||||
except Exception as e:
|
||||
yield {"type": "error", "delta": str(e)}
|
||||
|
||||
|
||||
# ── FastAPI App ──
|
||||
|
||||
try:
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import StreamingResponse, JSONResponse, Response
|
||||
from pydantic import BaseModel
|
||||
except ImportError:
|
||||
print("Installing fastapi...")
|
||||
os.system("pip install fastapi uvicorn httpx -q")
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import StreamingResponse, JSONResponse, Response
|
||||
from pydantic import BaseModel
|
||||
|
||||
app = FastAPI(title="Wiki VectorDB Chat")
|
||||
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
message: str
|
||||
provider_id: str = "zai-coding"
|
||||
history: list = []
|
||||
rag_wiki: bool = True
|
||||
rag_vector: bool = True
|
||||
mode: str = "chat"
|
||||
|
||||
|
||||
class ProviderSave(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
base_url: str
|
||||
model: str
|
||||
api_key: str = ""
|
||||
format: str = "openai"
|
||||
icon: str = "\u2b99"
|
||||
description: str = ""
|
||||
|
||||
|
||||
@app.get("/providers/presets")
|
||||
async def get_presets():
|
||||
return PRESETS
|
||||
|
||||
|
||||
@app.get("/providers")
|
||||
async def list_providers():
|
||||
return get_all_providers()
|
||||
|
||||
|
||||
@app.post("/providers/save")
|
||||
async def save_provider(p: ProviderSave):
|
||||
custom = load_custom_providers()
|
||||
p_dict = p.model_dump()
|
||||
# Update or append
|
||||
found = False
|
||||
for i, existing in enumerate(custom):
|
||||
if existing.get("id") == p.id:
|
||||
custom[i] = p_dict
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
custom.append(p_dict)
|
||||
save_custom_providers(custom)
|
||||
return {"ok": True, "provider": p_dict}
|
||||
|
||||
|
||||
@app.delete("/providers/{provider_id}")
|
||||
async def delete_provider(provider_id: str):
|
||||
custom = load_custom_providers()
|
||||
custom = [p for p in custom if p.get("id") != provider_id]
|
||||
save_custom_providers(custom)
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@app.post("/chat/message")
|
||||
async def chat_message(msg: ChatMessage):
|
||||
async def generate():
|
||||
providers = get_all_providers()
|
||||
provider = next((p for p in providers if p.get("id") == msg.provider_id), None)
|
||||
if not provider:
|
||||
yield f"data: {json.dumps({'type':'error','delta':'Provider not found'})}\n\n"
|
||||
return
|
||||
|
||||
# Build conversation history
|
||||
messages = []
|
||||
for h in msg.history[-10:]:
|
||||
messages.append(h)
|
||||
|
||||
messages.append({"role": "user", "content": msg.message})
|
||||
|
||||
# Build RAG context with per-session toggles
|
||||
rag_context = await build_rag_context(msg.message, msg.rag_wiki, msg.rag_vector)
|
||||
|
||||
# Mode-specific system prompt additions
|
||||
mode_hints = {
|
||||
"chat": "",
|
||||
"code": "\n\nMODE: Coding. The user is working on code. Provide precise, well-structured code examples with explanations. Use markdown code blocks. Be concise and technical.",
|
||||
"brain": "\n\nMODE: Brainstorm. The user wants creative exploration. Think freely, offer multiple perspectives, suggest unconventional approaches. Be enthusiastic and expansive.",
|
||||
}
|
||||
system_prompt = rag_context + mode_hints.get(msg.mode, "")
|
||||
|
||||
async for chunk in call_llm_stream(provider, system_prompt, messages):
|
||||
data = json.dumps(chunk, ensure_ascii=False)
|
||||
yield f"data: {data}\n\n"
|
||||
yield f"data: {json.dumps({'type':'done'})}\n\n"
|
||||
|
||||
return StreamingResponse(generate(), media_type="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
|
||||
|
||||
|
||||
@app.post("/chat/tunnel")
|
||||
async def chat_tunnel(msg: ChatMessage):
|
||||
"""Server-side token chat — uses ZAI_API_TOKEN env var if available."""
|
||||
async def generate():
|
||||
providers = get_all_providers()
|
||||
provider = next((p for p in providers if p.get("id") == msg.provider_id), None)
|
||||
if not provider:
|
||||
yield f"data: {json.dumps({'type':'error','delta':'Provider not found'})}\n\n"
|
||||
return
|
||||
|
||||
# Use server-side token if available (for tunnel mode)
|
||||
token = os.environ.get("ZAI_API_TOKEN", "")
|
||||
if token and not provider.get("api_key"):
|
||||
provider = dict(provider)
|
||||
provider["api_key"] = token
|
||||
|
||||
messages = []
|
||||
for h in msg.history[-10:]:
|
||||
messages.append(h)
|
||||
messages.append({"role": "user", "content": msg.message})
|
||||
|
||||
rag_context = await build_rag_context(msg.message)
|
||||
|
||||
async for chunk in call_llm_stream(provider, rag_context, messages):
|
||||
data = json.dumps(chunk, ensure_ascii=False)
|
||||
yield f"data: {data}\n\n"
|
||||
yield f"data: {json.dumps({'type':'done'})}\n\n"
|
||||
|
||||
return StreamingResponse(generate(), media_type="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok", "providers": len(get_all_providers())}
|
||||
|
||||
|
||||
class WikiSave(BaseModel):
|
||||
question: str
|
||||
answer: str
|
||||
topic: str = "chat-saved"
|
||||
|
||||
|
||||
@app.post("/chat/save-to-wiki")
|
||||
async def save_to_wiki(item: WikiSave):
|
||||
"""Save a Q&A pair directly to wiki-kb.json."""
|
||||
try:
|
||||
kb_path = Path("/opt/blog/wiki-kb.json")
|
||||
kb = json.loads(kb_path.read_text())
|
||||
entry = {
|
||||
"q": item.question,
|
||||
"a": item.answer,
|
||||
"topic": item.topic,
|
||||
"author": "chat-assistant",
|
||||
"source": "chat-saved",
|
||||
"timestamp": time.strftime("%Y-%m-%d %H:%M"),
|
||||
}
|
||||
kb.append(entry)
|
||||
kb_path.write_text(json.dumps(kb, ensure_ascii=False, indent=2))
|
||||
return {"ok": True, "total": len(kb)}
|
||||
except Exception as e:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
|
||||
def main():
|
||||
import uvicorn
|
||||
port = 8770
|
||||
for i, arg in enumerate(__import__("sys").argv):
|
||||
if arg == "--port" and i + 1 < len(__import__("sys").argv):
|
||||
port = int(__import__("sys").argv[i + 1])
|
||||
print(f"Wiki VectorDB Chat starting on port {port}")
|
||||
uvicorn.run(app, host="127.0.0.1", port=port, log_level="warning")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user