From ae621ecbb54d0848e24b20e055bb0610bfa492ee Mon Sep 17 00:00:00 2001 From: admin Date: Wed, 3 Jun 2026 10:25:29 +0000 Subject: [PATCH] Initial release: Multi-provider AI chat with RAG FastAPI backend (wiki-vector-chat.py) with Odysseus-style frontend. Features: multi-provider LLM, Wiki KB + VectorDB RAG, session history, chat modes, save-to-wiki, markdown rendering, SSE streaming. Co-Authored-By: Claude Opus 4.7 --- .gitignore | 18 + CHANGELOG.md | 48 ++ README.md | 75 +++ inject_wiki_chat.py | 347 ++++++++++ vector-db-service.py | 225 +++++++ wiki-api.py | 171 +++++ wiki-chat-proxy.py | 54 ++ wiki-chat-server.py | 90 +++ wiki-vector-chat.py | 503 +++++++++++++++ zportal-chat.html | 1473 ++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 3004 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 README.md create mode 100644 inject_wiki_chat.py create mode 100644 vector-db-service.py create mode 100755 wiki-api.py create mode 100755 wiki-chat-proxy.py create mode 100755 wiki-chat-server.py create mode 100644 wiki-vector-chat.py create mode 100644 zportal-chat.html diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..080e057 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +# Secrets +.wiki-api-token +.htpasswd-wiki +wiki-chat-providers.json + +# Data files +wiki-kb.json +vector-db/ +data/ + +# Backups +*.bak +*.bak-zcode + +# Old wiki HTML (not part of this project) +zai-ambassador-team-wiki-old.html +zai-ambassador-team-wiki.html.bak +zai-ambassadors-wiki.html diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..063da73 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,48 @@ +# Changelog + +## v2.0.0 (2026-06-03) — Odysseus UI + Multi-Provider + RAG + +### Added +- Complete Odysseus-style UI with Tokyo Night color palette +- Multi-provider LLM support (OpenAI, Anthropic, Ollama, OpenRouter, Groq, custom) +- RAG pipeline: dual-source context from Wiki KB (:8097) + VectorDB (:8099) +- Per-session RAG toggles (Wiki KB / VectorDB enable/disable) +- Chat modes: Chat, Code, Brainstorm with mode-specific system prompts +- Session history with localStorage persistence (up to 50 sessions) +- Save AI answers as new Q&A entries to Wiki KB +- Message action buttons: Copy, Redo, Save-to-Wiki on AI responses +- Markdown rendering in AI replies (bold, italic, code blocks, lists) +- Provider management: CRUD for custom providers, preset forking with API keys +- SSE streaming with format detection (OpenAI vs Anthropic) +- Model picker dropdown in input bar +- Manage Providers modal with select/edit/delete +- Quick action chips on welcome screen +- Mobile responsive layout with sidebar drawer + +### Backend (wiki-vector-chat.py) +- FastAPI service on port 8770 +- Shared API token auth from `/opt/blog/.wiki-api-token` +- `build_rag_context()` with per-source toggle support +- `call_llm_stream()` with format auto-detection +- Anthropic SSE parsing (content_block_delta, message_stop) +- `POST /chat/save-to-wiki` writes directly to wiki-kb.json +- `ChatMessage` model extended with rag_wiki, rag_vector, mode fields + +### Fixed +- "Save failed: fetch().json is not a function" — double await +- RAG chevron toggle checked CSS class instead of state +- addMessage wrong arguments (empty objects vs string roles) +- SSE done not breaking outer loop — streamDone flag +- Welcome screen never hidden — .hidden CSS class + toggle +- Preset providers not editable — forking with custom- prefix +- RAG not working — auth tokens for wiki-api and vector-db +- Anthropic message format — {role, content} not {type, text} +- Anthropic SSE parsing missing — added content_block_delta handler +- LLM ignoring RAG context — improved system prompt instructions +- Markdown showing raw tags in AI replies — renderMd() conversion + +## v1.0.0 (2026-05-28) — Initial + +- Basic chat UI embedded in wiki +- Single-provider Z.ai GLM-4 proxy (wiki-chat-server.js on :8098) +- Wiki KB search integration diff --git a/README.md b/README.md new file mode 100644 index 0000000..f54f2ae --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +# Zportal Wiki VectorDB Chat + +Multi-provider AI chat with RAG (Wiki KB + VectorDB) for the Z.ai portal. + +## Architecture + +| Component | Port | File | Purpose | +|-----------|------|------|---------| +| **wiki-vector-chat** | 8770 | `wiki-vector-chat.py` | FastAPI chat backend — multi-provider LLM, RAG pipeline, SSE streaming | +| **wiki-api** | 8097 | `wiki-api.py` | KB keyword search over `wiki-kb.json` (1,301 Q&A entries) | +| **vector-db-service** | 8099 | `vector-db-service.py` | TF-IDF vector search on Discord/Reddit messages | +| **Frontend** | static | `zportal-chat.html` | Odysseus-style chat UI at `/zportal/chat` | +| **wiki-chat-server** | 8098 | `wiki-chat-server.py` | Legacy Z.ai GLM-4 proxy (being replaced) | +| **wiki-chat-proxy** | — | `wiki-chat-proxy.py` | Legacy chat proxy helper | + +## Features + +- **Multi-provider LLM** — OpenAI, Anthropic, Ollama, OpenRouter, Groq, custom endpoints +- **RAG pipeline** — Dual-source context from Wiki KB + VectorDB with per-session toggles +- **Chat modes** — Chat, Code, Brainstorm with mode-specific system prompts +- **Session history** — localStorage persistence, switch between sessions, auto-save +- **Save to Wiki** — Save AI answers as new Q&A entries in the Wiki KB +- **Message actions** — Copy, Redo, Save-to-Wiki buttons on AI responses +- **Markdown rendering** — Bold, italic, code blocks, lists in AI replies +- **SSE streaming** — Server-Sent Events for real-time token streaming +- **Provider management** — CRUD for custom providers, preset forking with API keys +- **Odysseus UI** — Tokyo Night palette, sidebar, chat bubbles, model picker + +## Provider Presets + +| ID | Name | Format | +|----|------|--------| +| zai-coding | Z.ai Coding Plan | OpenAI | +| openadapter | OpenAdapter | OpenAI | +| openrouter | OpenRouter | OpenRouter | +| crofai | Crof.AI | OpenAI | +| opencode-zen | Opencode Zen | OpenAI | + +## Nginx Config + +```nginx +location = /zportal/chat { + default_type text/html; + alias /opt/zportal/chat.html; +} +location ^~ /zportal/wiki/api/chat/ { + proxy_pass http://127.0.0.1:8770/; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 120s; +} +``` + +## Systemd + +```bash +sudo systemctl restart wiki-vector-chat +sudo systemctl status wiki-vector-chat +``` + +## API Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/chat/message` | Main chat (SSE stream) | +| POST | `/chat/tunnel` | Server-side token chat | +| POST | `/chat/save-to-wiki` | Save Q&A to wiki KB | +| GET | `/providers` | List all providers | +| GET | `/providers/presets` | Built-in presets | +| POST | `/providers/save` | Save/edit custom provider | +| DELETE | `/providers/{id}` | Remove custom provider | +| GET | `/health` | Health check | diff --git a/inject_wiki_chat.py b/inject_wiki_chat.py new file mode 100644 index 0000000..e3a8cab --- /dev/null +++ b/inject_wiki_chat.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python3 +"""Inject VectorDB Chat panel into wiki HTML files. + +Usage: python3 inject_wiki_chat.py [--file /path/to/wiki.html] +If no --file, modifies both ambassador and support wikis in-place. +""" + +import os +import re +import sys + +WIKI_FILES = [ + "/opt/blog/zai-ambassador-team-wiki.html", + "/opt/blog/zai-support-wiki.html", +] + +CHAT_CSS = """ +/* ── VectorDB Chat Panel ── */ +.vdb-fab{position:fixed;bottom:24px;right:24px;width:56px;height:56px;border-radius:28px; +background:linear-gradient(135deg,#4a9eff,#a78bfa);border:none;color:#fff;font-size:22px; +cursor:pointer;box-shadow:0 4px 20px rgba(74,158,255,.35);z-index:999;transition:all .2s;display:flex;align-items:center;justify-content:center} +.vdb-fab:hover{transform:scale(1.08);box-shadow:0 6px 28px rgba(74,158,255,.45)} +.vdb-chat-container{position:fixed;bottom:0;right:0;width:420px;height:560px;background:#131620; +border:1px solid #252a3b;border-radius:16px 16px 0 0;z-index:998;display:flex; +flex-direction:column;box-shadow:-4px 0 30px rgba(0,0,0,.4);font-family:'Inter',system-ui,sans-serif; +transition:opacity .25s, transform .25s cubic-bezier(.175,.885,.32,1.275)} +.vdb-chat-container.hidden{opacity:0;pointer-events:none;transform:translateY(20px)} +.vdb-chat-header{display:flex;align-items:center;gap:10px;padding:14px 16px; +background:#0c0e14;border-bottom:1px solid #252a3b;flex-shrink:0} +.vdb-chat-title{font-size:13px;font-weight:600;color:#e8eaed;flex:1;white-space:nowrap;overflow:hidden;text-overflow:ellipsis} +.vdb-provider-select{font-size:11px;padding:4px 8px;border:1px solid #333;border-radius:6px; +background:#1a1d26;color:#ccc;cursor:pointer;outline:none;color:#e8eaed;min-width:120px} +.vdb-provider-select:focus{border-color:#4a9eff} +.vdb-toggle{background:none;border:none;color:#888;font-size:18px;cursor:pointer;padding:4px 8px;line-height:1} +.vdb-toggle:hover{color:#fff} +.vdb-messages{flex:1;overflow-y:auto;padding:12px 16px;display:flex;flex-direction:column;gap:8px; +scrollbar-width:4px;scrollbar-thumb:#333} +.vdb-msg{max-width:85%;padding:10px 14px;border-radius:12px;font-size:13px;line-height:1.5; +color:#b0b5bc;word-wrap:break-word} +.vdb-msg-user{align-self:flex-end;background:#1e3a5f;border:1px solid #2a4070;margin-left:auto} +.vdb-msg-assistant{align-self:flex-start;background:#1a1f2e;border:1px solid #252a3b} +.vdb-msg-meta{font-size:10px;color:#666;margin-top:4px;display:flex;gap:8px;align-items:center} +.vdb-msg-provider{background:rgba(74,158,255,.1);color:#4a9eff;padding:1px 6px;border-radius:3px;font-weight:600} +.vdb-msg-error{background:rgba(248,113,113,.1);color:#f87171;border-color:rgba(248,113,113,.3)} +.vdb-typing{font-size:11px;color:#7c8497;font-style:italic;padding:8px 16px 0;display:none} +.vdb-typing.active{display:block} +.vdb-input-row{display:flex;gap:8px;padding:12px 16px;border-top:1px solid #252a3b; +background:#0c0e14;flex-shrink:0} +.vdb-input{flex:1;padding:10px 12px;border:1px solid #333;border-radius:8px; +background:#1a1d26;color:#e8eaed;font-size:13px;font-family:inherit;resize:none; +outline:none;min-height:20px;max-height:80px;line-height:1.4} +.vdb-input:focus{border-color:#4a9eff} +.vdb-send{padding:10px 18px;border:1px solid #333;border-radius:8px;background:rgba(74,158,255,.1); +color:#4a9eff;font-weight:600;font-size:12px;cursor:pointer;white-space:nowrap; +transition:all .15s} +.vdb-send:hover{background:rgba(74,158,255,.2);color:#fff} +.vdb-send:disabled{opacity:.4;cursor:not-allowed} +.vdb-settings{border-top:1px solid #252a3b;padding:12px 16px;display:none} +.vdb-settings-row{display:flex;gap:8px;margin-bottom:8px;align-items:center} +.vdb-settings-row label{font-size:11px;color:#888;width:70px;flex-shrink:0} +.vdb-settings-row input,.vdb-settings-row select{flex:1;padding:6px 8px;border:1px solid #333; +border-radius:4px;background:#1a1d26;color:#e8eaed;font-size:11px} +.vdb-settings-btn{padding:4px 12px;border-radius:4px;font-size:10px;cursor:pointer; +border:1px solid #333;background:#1a1d26;color:#aaa;transition:all .15s} +.vdb-settings-btn:hover{color:#fff;border-color:#444} +.vdb-settings-btn.danger{color:#f87171;border-color:rgba(248,113,113,.3)} +@media(max-width:600px){ +.vdb-chat-container{width:100vw;height:100vh;border-radius:0;right:0;bottom:0} +} +""" + +CHAT_HTML = """ + +
+
+
+ + +
+ + + +""" + +CHAT_JS = """ +(function(){ +const CHAT_API = (function(){ + const p = location.pathname; + return p.endsWith('/') ? p : p + '/'; +})(); +const $ = id => document.getElementById(id); +let _providers = []; +let _activeProvider = null; +let _history = []; +let _isStreaming = false; + +function esc(s){ if(!s) return ''; const d=document.createElement('div'); d.textContent=s; return d.innerHTML; } + +function providerIcon(p){ return p.icon || '\u2B99'; } +function providerLabel(p){ return p.icon + ' ' + p.name; } + +function renderProviders(){ + const sel = $('vdb-provider-select'); + sel.innerHTML = ''; + _providers.forEach(p => { + const opt = document.createElement('option'); + opt.value = p.id; + opt.textContent = providerLabel(p); + if(_activeProvider && p.id === _activeProvider.id) opt.selected = true; + sel.appendChild(opt); + }); +} + +function addMsg(content, isUser, meta){ + const msgs = $('vdb-chat-messages'); + const div = document.createElement('div'); + div.className = 'vdb-msg vdb-msg-' + (isUser ? 'user' : 'assistant'); + let html = ''; + if(meta && meta.provider) html += '
' + providerIcon(meta.provider) + ' ' + meta.provider.name + '
'; + if(meta && meta.error) { div.className += ' vdb-msg-error'; html = content; } + else { html += esc(content).replace(/\\n/g, '
'); } + div.innerHTML = html; + msgs.appendChild(div); + msgs.scrollTop = msgs.scrollHeight; + return div; +} + +function setStreaming(on){ + _isStreaming = on; + $('vdb-typing').className = 'vdb-typing' + (on ? ' active' : ''); + $('vdb-send').disabled = on; +} + +async function sendMessage(){ + const input = $('vdb-input'); + const text = (input.value || '').trim(); + if(!text || !_activeProvider || _isStreaming) return; + _history.push({role:'user', content:text}); + addMsg(text, true, null); + input.value = ''; + input.style.height = 'auto'; + setStreaming(true); + + try{ + const resp = await fetch(CHAT_API + 'chat/message', { + method:'POST', + headers:{'Content-Type':'application/json'}, + body:JSON.stringify({message:text, provider_id:_activeProvider.id, history:_history}) + }); + const reader = resp.body.getReader(); + const decoder = new TextDecoder(); + let buf = ''; + let fullText = ''; + let assistantDiv = null; + + while(true){ + const {done, value} = await reader.read(); + if(value) buf += decoder.decode(value, {stream:true}); + if(done) break; + const lines = buf.split('\\n'); + buf = lines.pop(); // keep incomplete line + for(const line of lines){ + if(!line.startsWith('data: ') || !line.slice(5)) continue; + try{ + const chunk = JSON.parse(line.slice(5)); + if(chunk.type === 'done'){ setStreaming(false); break; } + if(chunk.type === 'error'){ + if(!assistantDiv) assistantDiv = addMsg(chunk.delta, false, {error:true}); + else assistantDiv.textContent += chunk.delta; + continue; + } + if(chunk.type === 'delta'){ + if(!assistantDiv) assistantDiv = addMsg('', false, {provider:_activeProvider}); + fullText += chunk.delta; + assistantDiv.innerHTML = esc(fullText.replace(/\\n/g, '
')); + assistantDiv.scrollIntoView({block:'nearest', behavior:'smooth'}); + } else if(chunk.type === 'tool' || chunk.type === 'raw'){ + if(!assistantDiv) assistantDiv = addMsg('', false, {provider:_activeProvider}); + assistantDiv.innerHTML += esc(chunk.delta).replace(/\\n/g, '
'); + } + }catch(e){ /* skip malformed */ } + } + } + // Process remaining buffer + if(buf){ + for(const line of buf.split('\\n')){ + if(!line.startsWith('data: ')) continue; + try{ + const chunk = JSON.parse(line.slice(5)); + if(chunk.type === 'delta' && chunk.delta){ + if(!assistantDiv) assistantDiv = addMsg('', false, {provider:_activeProvider}); + fullText += chunk.delta; + assistantDiv.innerHTML = esc(fullText.replace(/\\n/g, '
')); + } + }catch(e){} + } + } + _history.push({role:'assistant', content:fullText || '(no response)'}); + } catch(e){ + addMsg('Error: ' + e.message, false, {error:true}); + } + setStreaming(false); +} + +// Provider selection +$('vdb-provider-select').addEventListener('change', function(){ + const pid = this.value; + _activeProvider = _providers.find(p => p.id === pid) || null; +}); + +// Send button +$('vdb-send').addEventListener('click', sendMessage); +$('vdb-input').addEventListener('keydown', function(e){ if(e.key === 'Enter' && !e.shiftKey){ e.preventDefault(); sendMessage(); }}); + +// Toggle chat panel +let _chatOpen = false; +$('vdb-chat-btn').addEventListener('click', function(){ + _chatOpen = !_chatOpen; + const chat = $('vdb-chat'); + chat.classList.toggle('hidden', !_chatOpen); + this.textContent = _chatOpen ? '\u2715' : '\u1F4AC'; + this.title = _chatOpen ? 'Close Chat' : 'Open Wiki Chat'; +}); + +$('vdb-toggle').addEventListener('click', function(){ + _chatOpen = false; + $('vdb-chat').classList.add('hidden'); + this.textContent = '\u25B2'; + $('vdb-chat-btn').textContent = '\u1F4AC'; + $('vdb-chat-btn').title = 'Open Wiki Chat'; +}); + +// Settings +$('vdb-cust-save').addEventListener('click', async function(){ + const name = $('vdb-cust-name').value.trim(); + const url = $('vdb-cust-url').value.trim(); + const model = $('vdb-cust-model').value.trim(); + const key = $('vdb-cust-key').value.trim(); + if(!name || !url || !model){ alert('Name, URL, and Model are required'); return; } + const provider = {id:'custom-'+Date.now(), name:name, base_url:url, model:model, + api_key:key, format:'openai', icon:'\u2699', description:'Custom'}; + // Save via API + try{ + await fetch(CHAT_API + 'providers/save', { + method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(provider) + }); + _providers = await (await fetch(CHAT_API + 'providers')).json(); + renderProviders(); + _activeProvider = provider; + $('vdb-settings').style.display = 'none'; + alert('Provider saved!'); + } catch(e){ alert('Save failed: ' + e.message); } +}); + +$('vdb-cust-cancel').addEventListener('click', function(){ + $('vdb-settings').style.display = 'none'; +}); + +// Auto-open settings if no providers loaded +function checkProviders(){ + if(!_providers.length){ + $('vdb-settings').style.display = ''; + } +} + +// Init +async function init(){ + try{ + const [presetsResp, customResp] = await Promise.all([ + fetch(CHAT_API + 'providers/presets'), + fetch(CHAT_API + 'providers') + ]); + _presets = await presetsResp.json(); + _custom = await customResp.json(); + _providers = [..._presets, ..._custom]; + renderProviders(); + // Auto-select first available provider + if(_providers.length > 0 && !_activeProvider){ + _activeProvider = _providers[0]; + renderProviders(); + } + checkProviders(); + } catch(e){ + console.error('Chat init error:', e); + $('vdb-chat-messages').innerHTML = '
Failed to load chat service.
'; + } +} + +init(); +})(); +""" + + +def inject_chat(html_content: str) -> str: + """Inject chat CSS, HTML, and JS into wiki HTML.""" + # Inject CSS before + if "" in html_content: + html_content = html_content.replace("", CHAT_CSS + "", 1) + + # Inject HTML before + if "" in html_content: + html_content = html_content.replace("", CHAT_HTML + "\n", 1) + + return html_content + + +def main(): + files = WIKI_FILES + # Check for --file argument + if "--file" in sys.argv: + idx = sys.argv.index("--file") + 1 + if idx < len(sys.argv): + files = [sys.argv[idx]] + + for fpath in files: + if not os.path.exists(fpath): + print(f"SKIP: {fpath} not found") + continue + with open(fpath, "r") as f: + content = f.read() + # Check if already injected + if "vdb-chat-container" in content: + print(f"SKIP: {fpath} already has chat injected") + continue + new_content = inject_chat(content) + with open(fpath, "w") as f: + f.write(new_content) + print(f"OK: {fpath} ({len(new_content)} bytes)") + + +if __name__ == "__main__": + main() diff --git a/vector-db-service.py b/vector-db-service.py new file mode 100644 index 0000000..20eac79 --- /dev/null +++ b/vector-db-service.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +"""Z.ai Wiki Vector DB Service + +Stores Discord messages as vector embeddings using sentence-transformers. +Provides a search API for the wiki chat system to query as an additional data source. + +Data sources: +- Server 1346756824233148527 (Z.ai Community) +- Server 1410352583364841555 (Z.ai Mod Server) +- Channel 1476364011091136544 (Z.ai Mod Channel) + +Endpoints: +- POST /vector/search - Search with a query, return top-K matches +- POST /vector/index - Add messages to the index +- GET /vector/stats - Get index statistics +- POST /vector/rebuild - Rebuild from stored messages +""" + +import json +import os +import glob +import numpy as np +from flask import Flask, request, jsonify, make_response +from sentence_transformers import SentenceTransformer + +app = Flask(__name__) + +DATA_DIR = os.environ.get('VECTOR_DB_DIR', '/opt/blog/vector-db') +MESSAGES_FILE = os.path.join(DATA_DIR, 'messages.json') +EMBEDDINGS_FILE = os.path.join(DATA_DIR, 'embeddings.npy') +META_FILE = os.path.join(DATA_DIR, 'meta.json') + +os.makedirs(DATA_DIR, exist_ok=True) + +model = None +embeddings = None +meta = [] # parallel array: [{id, content, source, author, channel, server, timestamp, link}, ...] + + +def get_model(): + global model + if model is None: + print('[VectorDB] Loading sentence-transformer model...') + model = SentenceTransformer('all-MiniLM-L6-v2') + print('[VectorDB] Model loaded') + return model + + +def load_index(): + global embeddings, meta + if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(META_FILE): + embeddings = np.load(EMBEDDINGS_FILE) + meta = json.load(open(META_FILE)) + print(f'[VectorDB] Loaded index: {len(meta)} entries, dim={embeddings.shape[1]}') + else: + embeddings = np.empty((0, 384), dtype=np.float32) + meta = [] + print('[VectorDB] No existing index, starting fresh') + + +def save_index(): + np.save(EMBEDDINGS_FILE, embeddings) + json.dump(meta, open(META_FILE, 'w')) + print(f'[VectorDB] Saved index: {len(meta)} entries') + + +def load_messages(): + """Load raw messages from JSON files""" + msgs = [] + for f in glob.glob(os.path.join(DATA_DIR, 'messages_*.json')): + msgs.extend(json.load(open(f))) + return msgs + + +@app.after_request +def add_cors(response): + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['Access-Control-Allow-Methods'] = 'POST, GET, OPTIONS' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization' + return response + + +@app.route('/vector/stats', methods=['GET', 'OPTIONS']) +def stats(): + if request.method == 'OPTIONS': + return make_response('', 200) + sources = {} + for m in meta: + s = m.get('source', 'unknown') + sources[s] = sources.get(s, 0) + 1 + return jsonify({ + 'total': len(meta), + 'dimension': int(embeddings.shape[1]) if embeddings.shape[0] > 0 else 0, + 'sources': sources, + 'is_indexed': len(meta) > 0, + }) + + +@app.route('/vector/search', methods=['POST', 'OPTIONS']) +def search(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + body = request.get_json(force=True) + query = body.get('query', '') + top_k = min(body.get('top_k', 10), 50) + + if not query or embeddings.shape[0] == 0: + return jsonify({'results': [], 'query': query}) + + mdl = get_model() + q_emb = mdl.encode([query], normalize_embeddings=True).astype(np.float32) + + # Cosine similarity (embeddings are already normalized) + scores = (embeddings @ q_emb.T).flatten() + + top_idx = np.argsort(scores)[::-1][:top_k] + + results = [] + for i in top_idx: + if scores[i] < 0.1: # threshold + break + results.append({ + 'content': meta[i]['content'], + 'source': meta[i].get('source', ''), + 'author': meta[i].get('author', ''), + 'channel': meta[i].get('channel', ''), + 'server': meta[i].get('server', ''), + 'timestamp': meta[i].get('timestamp', ''), + 'link': meta[i].get('link', ''), + 'score': float(scores[i]), + }) + + return jsonify({'results': results, 'query': query, 'total': len(meta)}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@app.route('/vector/index', methods=['POST', 'OPTIONS']) +def index_messages(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + body = request.get_json(force=True) + messages = body.get('messages', []) + source = body.get('source', 'unknown') + + if not messages: + return jsonify({'error': 'messages required'}), 400 + + global embeddings, meta + + mdl = get_model() + texts = [m.get('content', '')[:2000] for m in messages] + new_emb = mdl.encode(texts, normalize_embeddings=True, show_progress_bar=False, + batch_size=64).astype(np.float32) + + start_idx = len(meta) + for m in messages: + meta.append({ + 'id': m.get('id', ''), + 'content': m.get('content', ''), + 'source': source, + 'author': m.get('author', ''), + 'channel': m.get('channel', ''), + 'server': m.get('server', ''), + 'timestamp': m.get('timestamp', ''), + 'link': m.get('link', ''), + }) + + if embeddings.shape[0] == 0: + embeddings = new_emb + else: + embeddings = np.vstack([embeddings, new_emb]) + + save_index() + return jsonify({'indexed': len(messages), 'total': len(meta)}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@app.route('/vector/rebuild', methods=['POST', 'OPTIONS']) +def rebuild(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + messages = load_messages() + if not messages: + return jsonify({'error': 'No messages found. Run scraper first.'}), 404 + + global embeddings, meta + meta = [] + mdl = get_model() + + # Filter out empty/short messages + valid = [m for m in messages if len(m.get('content', '').strip()) > 10] + print(f'[VectorDB] Rebuilding index from {len(valid)} valid messages...') + + texts = [m.get('content', '')[:2000] for m in valid] + embeddings = mdl.encode(texts, normalize_embeddings=True, + show_progress_bar=True, batch_size=128).astype(np.float32) + + for m in valid: + meta.append({ + 'id': m.get('id', ''), + 'content': m.get('content', ''), + 'source': m.get('source', ''), + 'author': m.get('author', ''), + 'channel': m.get('channel', ''), + 'server': m.get('server', ''), + 'timestamp': m.get('timestamp', ''), + 'link': m.get('link', ''), + }) + + save_index() + return jsonify({'indexed': len(meta), 'total_valid': len(valid), 'total_raw': len(messages)}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +if __name__ == '__main__': + load_index() + port = int(os.environ.get('VECTOR_DB_PORT', 8099)) + print(f'[VectorDB] Service running on port {port}') + app.run(host='0.0.0.0', port=port) diff --git a/wiki-api.py b/wiki-api.py new file mode 100755 index 0000000..6fbe90c --- /dev/null +++ b/wiki-api.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +"""Z.ai Wiki KB Search API - token-protected""" +import json, os, sys, urllib.parse, hashlib +from http.server import HTTPServer, BaseHTTPRequestHandler + +KB_PATH = "/opt/blog/wiki-kb.json" +TOKEN_PATH = "/opt/blog/.wiki-api-token" +PORT = 8097 +LOG_PATH = "/opt/blog/data/search-logs.json" +MAX_LOG_ENTRIES = 5000 + +def load_logs(): + if os.path.exists(LOG_PATH): + try: + with open(LOG_PATH) as f: + return json.load(f) + except: + pass + return [] + +def save_logs(logs): + os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True) + # Keep only last MAX_LOG_ENTRIES + with open(LOG_PATH, 'w') as f: + json.dump(logs[-MAX_LOG_ENTRIES:], f) + +def log_search(source, query, results, meta=None): + logs = load_logs() + entry = { + "ts": __import__('time').strftime("%Y-%m-%dT%H:%M:%SZ"), + "source": source, + "query": query, + "result_count": len(results) if isinstance(results, list) else 0, + "top_results": [ + {"q": r.get("q", r.get("content", ""))[:100], "score": r.get("score", 0), "source": r.get("source", "")} + for r in (results[:3] if isinstance(results, list) else []) + ], + } + if meta: + entry["meta"] = meta + logs.append(entry) + save_logs(logs) + return entry + + + +# Load or generate token +def load_token(): + if os.path.exists(TOKEN_PATH): + with open(TOKEN_PATH) as f: + return f.read().strip() + return None + +API_TOKEN = load_token() + +with open(KB_PATH) as f: + KB = json.load(f) + +def search_kb(query, topic=None, limit=20): + query_lower = query.lower() + query_words = set(query_lower.split()) + results = [] + for entry in KB: + score = 0 + q_text = entry.get("q", "").lower() + a_text = entry.get("a", "").lower() + q_words = set(q_text.split()) + a_words = set(a_text.split()) + score += len(query_words & q_words) * 3 + score += len(query_words & a_words) * 1 + if query_lower in q_text: score += 10 + if query_lower in a_text: score += 5 + if topic and entry.get("topic", "").lower() != topic.lower(): score -= 50 + if score > 0: + results.append({**entry, "score": score}) + results.sort(key=lambda x: -x["score"]) + return results[:limit] + +def check_auth(params, headers): + if not API_TOKEN: + return False + # Check query param ?token=... + token = params.get("token", [""])[0] + if token == API_TOKEN: + return True + # Check header Authorization: Bearer ... + auth = headers.get("Authorization", "") + if auth.startswith("Bearer "): + if auth[7:] == API_TOKEN: + return True + # Check header X-API-Key + api_key = headers.get("X-Api-Key", "") + if api_key == API_TOKEN: + return True + return False + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + parsed = urllib.parse.urlparse(self.path) + params = urllib.parse.parse_qs(parsed.query) + + if not check_auth(params, self.headers): + self.send_response(401) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.end_headers() + self.wfile.write(json.dumps({"error": "Unauthorized. Provide ?token=YOUR_TOKEN or Authorization: Bearer YOUR_TOKEN"}).encode()) + return + + if parsed.path == "/search": + query = params.get("q", [""])[0] + topic = params.get("topic", [None])[0] + limit = int(params.get("limit", [20])[0]) + if not query: + body = json.dumps({"error": "Missing ?q= parameter"}).encode() + else: + results = search_kb(query, topic, limit) + log_search("kb", query, results, {"topic": topic, "limit": limit, "ip": self.headers.get("X-Real-IP", self.client_address[0])}) + body = json.dumps({"query": query, "count": len(results), "results": results}, ensure_ascii=False).encode() + elif parsed.path == "/kb": + body = json.dumps(KB, ensure_ascii=False).encode() + elif parsed.path == "/logs": + logs = load_logs() + body = json.dumps({"total": len(logs), "logs": logs[::-1]}, ensure_ascii=False).encode() + else: + self.send_response(404) + self.end_headers() + return + + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Content-Length", len(body)) + self.end_headers() + self.wfile.write(body) + + def do_DELETE(self): + parsed = urllib.parse.urlparse(self.path) + params = urllib.parse.parse_qs(parsed.query) + if not check_auth(params, self.headers): + self.send_response(401) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.end_headers() + self.wfile.write(json.dumps({"error": "Unauthorized"}).encode()) + return + if parsed.path == "/logs": + save_logs([]) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.end_headers() + self.wfile.write(json.dumps({"cleared": True}).encode()) + return + self.send_response(404) + self.end_headers() + + def do_OPTIONS(self): + self.send_response(200) + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Authorization, X-Api-Key, Content-Type") + self.end_headers() + + def log_message(self, format, *args): + pass + +if __name__ == "__main__": + print(f"Wiki KB API running on port {PORT}") + server = HTTPServer(("127.0.0.1", PORT), Handler) + server.serve_forever() diff --git a/wiki-chat-proxy.py b/wiki-chat-proxy.py new file mode 100755 index 0000000..9b4a2f0 --- /dev/null +++ b/wiki-chat-proxy.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +"""Wiki Chat Proxy - Proxies chat requests to z.ai GLM-4-Plus""" + +import json +import os +import urllib.request + +ZAI_TOKEN = os.environ.get('ZAI_API_TOKEN', '') + +def handler(event): + try: + if event.get('method') != 'POST': + return {'status': 405, 'body': 'Method not allowed'} + + body = json.loads(event.get('body', '{}')) + messages = body.get('messages', []) + + if not messages: + return {'status': 400, 'body': json.dumps({'error': 'messages required'})} + + api_url = 'https://api.z.ai/api/coding/paas/v4/chat/completions' + + payload = json.dumps({ + 'model': 'glm-4-plus', + 'messages': messages, + 'temperature': 0.7, + 'max_tokens': 2000, + }).encode() + + headers = {'Content-Type': 'application/json'} + if ZAI_TOKEN: + headers['Authorization'] = 'Bearer ' + ZAI_TOKEN + + req = urllib.request.Request(api_url, data=payload, headers=headers, method='POST') + + try: + with urllib.request.urlopen(req, timeout=60) as resp: + data = json.loads(resp.read().decode()) + content = data.get('choices', [{}])[0].get('message', {}).get('content', '') + return { + 'status': 200, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps({'response': content}) + } + except urllib.error.HTTPError as e: + err_body = e.read().decode() if e.fp else '' + try: + err_json = json.loads(err_body) + err_msg = err_json.get('error', {}).get('message', err_json.get('message', str(e))) + except: + err_msg = str(e) + return {'status': e.code, 'body': json.dumps({'error': err_msg})} + except Exception as e: + return {'status': 500, 'body': json.dumps({'error': str(e)})} diff --git a/wiki-chat-server.py b/wiki-chat-server.py new file mode 100755 index 0000000..5a3d7c7 --- /dev/null +++ b/wiki-chat-server.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +"""Wiki Chat LLM Proxy Server on port 8098 using Flask""" + +import json +import os +import urllib.request +from flask import Flask, request, jsonify, make_response + +app = Flask(__name__) + +ZAI_TOKEN = os.environ.get('ZAI_API_TOKEN', '') + +@app.after_request +def add_cors(response): + h = 'Access-Control-Allow-Origin' + response.headers[h] = '*' + response.headers['Access-Control-Allow-Methods'] = 'POST, OPTIONS' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization' + return response + +def call_zai(messages, token=None): + api_url = 'https://api.z.ai/api/coding/paas/v4/chat/completions' + payload = json.dumps({ + 'model': 'glm-4-plus', + 'messages': messages, + 'temperature': 0.7, + 'max_tokens': 2000, + }).encode() + headers = {'Content-Type': 'application/json'} + if token: + headers['Authorization'] = 'Bearer ' + token + req = urllib.request.Request(api_url, data=payload, headers=headers, method='POST') + with urllib.request.urlopen(req, timeout=60) as resp: + data = json.loads(resp.read().decode()) + return data.get('choices', [{}])[0].get('message', {}).get('content', '') + +@app.route('/chat/wiki', methods=['POST', 'OPTIONS']) +def chat_wiki(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + body = request.get_json(force=True) + messages = body.get('messages', []) + if not messages: + return jsonify({'error': 'messages required'}), 400 + token = body.get('token') or ZAI_TOKEN + if not token: + return jsonify({'error': 'No token provided'}), 401 + content = call_zai(messages, token) + return jsonify({'response': content}) + except urllib.error.HTTPError as e: + err_body = e.read().decode() if e.fp else '' + try: + err_json = json.loads(err_body) + err_msg = err_json.get('error', {}).get('message', err_json.get('message', str(e))) + except Exception: + err_msg = str(e) + return jsonify({'error': err_msg}), e.code + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@app.route('/chat/wiki-tunnel', methods=['POST', 'OPTIONS']) +def chat_wiki_tunnel(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + body = request.get_json(force=True) + messages = body.get('messages', []) + if not messages: + return jsonify({'error': 'messages required'}), 400 + token = ZAI_TOKEN + if not token: + return jsonify({'error': 'No server token configured. Use Token mode and paste your API key.'}), 503 + content = call_zai(messages, token) + return jsonify({'response': content}) + except urllib.error.HTTPError as e: + err_body = e.read().decode() if e.fp else '' + try: + err_json = json.loads(err_body) + err_msg = err_json.get('error', {}).get('message', err_json.get('message', str(e))) + except Exception: + err_msg = str(e) + return jsonify({'error': err_msg}), e.code + except Exception as e: + return jsonify({'error': str(e)}), 500 + +if __name__ == '__main__': + port = int(os.environ.get('WIKI_CHAT_PORT', 8098)) + print(f'[WikiChat] LLM proxy running on port {port}') + app.run(host='0.0.0.0', port=port) diff --git a/wiki-vector-chat.py b/wiki-vector-chat.py new file mode 100644 index 0000000..c3d0a76 --- /dev/null +++ b/wiki-vector-chat.py @@ -0,0 +1,503 @@ +#!/usr/bin/env python3 +"""Wiki VectorDB Chat — Multi-Provider AI Chat with RAG (KB + VectorDB). + +Serves at port 8770, proxied via nginx at /zportal/wiki/api/chat +Uses wiki-api (:8097) for KB search and vector-db (:8099) for vector search. +""" + +import asyncio +import json +import os +import re +import time +import urllib.request +from pathlib import Path + +PROVIDERS_FILE = Path("/opt/blog/wiki-chat-providers.json") +CUSTOM_PROVIDERS_FILE = Path("/opt/blog/wiki-chat-providers.json") + +WIKI_API = "http://127.0.0.1:8097" +VECTOR_DB = "http://127.0.0.1:8099" + +# Shared API token for wiki-api and vector-db +_API_TOKEN = "" +try: + _API_TOKEN = Path("/opt/blog/.wiki-api-token").read_text().strip() +except Exception: + pass + +PRESETS = [ + { + "id": "zai-coding", + "name": "Z.ai Coding Plan", + "base_url": "https://api.z.ai/api/coding/paas/v4", + "model": "glm-4-plus", + "format": "openai", + "icon": "\u26a1", + "description": "Official Z.ai coding plan API", + }, + { + "id": "openadapter", + "name": "OpenAdapter", + "base_url": "https://api.openadapter.com/v1", + "model": "gpt-4o-mini", + "format": "openai", + "icon": "\u1f512", + "description": "OpenAdapter unified API", + }, + { + "id": "openrouter", + "name": "OpenRouter", + "base_url": "https://openrouter.ai/api/v1", + "model": "anthropic/claude-sonnet-4", + "format": "openrouter", + "icon": "\u1f6e3", + "description": "Model router across providers", + }, + { + "id": "crofai", + "name": "Crof.AI", + "base_url": "https://api.crof.ai/v1", + "model": "crof-4-plus", + "format": "openai", + "icon": "\u1f42a", + "description": "Crof AI models", + }, + { + "id": "opencode-zen", + "name": "Opencode Zen", + "base_url": "https://api.zen.opencode.com/v1", + "model": "glm-4-plus", + "format": "openai", + "icon": "\u1f9e0", + "description": "Opencode Zen hosted models", + }, +] + + +def load_custom_providers(): + try: + if CUSTOM_PROVIDERS_FILE.exists(): + return json.loads(CUSTOM_PROVIDERS_FILE.read_text()) + except Exception: + pass + return [] + + +def save_custom_providers(providers): + CUSTOM_PROVIDERS_FILE.write_text(json.dumps(providers, indent=2)) + + +def get_all_providers(): + """Return presets + custom providers.""" + custom = load_custom_providers() + seen = {p["id"] for p in PRESETS} + result = list(PRESETS) + for p in custom: + if p.get("id") not in seen: + seen.add(p["id"]) + result.append(p) + return result + + +def detect_provider_format(base_url: str) -> str: + from urllib.parse import urlparse + host = urlparse(base_url).hostname.lower() + if "ollama" in host or host in ("localhost", "127.0.0.1"): + return "ollama" + if "anthropic" in host: + return "anthropic" + if "openrouter" in host: + return "openrouter" + if "groq" in host: + return "groq" + return "openai" + + +async def search_kb(query: str, limit: int = 3) -> str: + """Search wiki-kb.json via wiki-api.""" + try: + url = f"{WIKI_API}/search?q={urllib.parse.quote(query)}&limit={limit}&token={_API_TOKEN}" + req = urllib.request.Request(url) + with urllib.request.urlopen(req, timeout=5) as resp: + data = json.loads(resp.read()) + results = data.get("results", []) + if not results: + return "" + lines = [] + for r in results[:limit]: + q_text = r.get("q", "") + a_text = r.get("a", "") + topic = r.get("topic", "") + score = r.get("_score", 0) + lines.append(f"[{topic}] Q: {q_text}\nA: {a_text}") + return "\n\n".join(lines) + except Exception as e: + return f"(KB search error: {e})" + + +async def search_vector(query: str, top_k: int = 5) -> str: + """Search vector-db for related Discord/Reddit messages.""" + try: + data = json.dumps({"query": query, "top_k": top_k}).encode() + req = urllib.request.Request( + f"{VECTOR_DB}/vector/search", + data=data, + headers={"Content-Type": "application/json", "x-api-key": _API_TOKEN}, + ) + with urllib.request.urlopen(req, timeout=8) as resp: + result = json.loads(resp.read()) + hits = result.get("results", []) + if not hits: + return "" + lines = [] + for h in hits[:top_k]: + text = h.get("text", "")[:300] + score = h.get("score", 0) + source = h.get("source", "unknown") + meta = h.get("metadata", {}) + author = meta.get("author", "") + channel = meta.get("channel", "") + preview = text.replace("\n", " ")[:200] + lines.append(f"[{source}] @{author} in #{channel}: {preview} (score: {score:.2f})") + return "\n\n".join(lines) + except Exception as e: + return f"(Vector search error: {e})" + + +async def build_rag_context(user_message: str, rag_wiki: bool = True, rag_vector: bool = True) -> str: + """Build RAG context from KB + VectorDB searches.""" + kb_results = "" + vec_results = "" + tasks = [] + if rag_wiki: + tasks.append(search_kb(user_message, 3)) + if rag_vector: + tasks.append(search_vector(user_message, 5)) + + if tasks: + results = await asyncio.gather(*tasks) + idx = 0 + if rag_wiki: + kb_results = results[idx]; idx += 1 + if rag_vector: + vec_results = results[idx] + + parts = [ + "You are Z.ai Wiki Assistant. Use ALL the knowledge sources below to answer the user's question.", + "Draw from both the Wiki KB and Community Messages. Synthesize information even from partial matches.", + "If the context mentions anything relevant, include it in your answer. Be specific — quote authors, channels, and details when available.", + "Only say you don't have information if the sources are truly empty or completely unrelated.", + "", + ] + if rag_wiki: + parts += ["=== Wiki Knowledge Base ===", kb_results or "(no KB results found)", ""] + if rag_vector: + parts += ["=== Related Community Messages (Discord/Reddit) ===", vec_results or "(no community messages found)"] + if not rag_wiki and not rag_vector: + parts.append("(RAG sources disabled for this session)") + return "\n".join(parts) + + +# ── LLM Provider Calls ── + +def format_messages_openai(system: str, messages: list, model: str) -> dict: + """Format for OpenAI-compatible /chat/completions endpoint.""" + return {"model": model, "messages": [{"role": "system", "content": system}] + messages, + "temperature": 0.7, "max_tokens": 2048, "stream": True} + + +def format_messages_anthropic(system: str, messages: list, model: str) -> dict: + """Convert OpenAI-format messages to Anthropic format.""" + anthropic_msgs = [] + for m in messages: + role = "user" if m["role"] == "user" else "assistant" + anthropic_msgs.append({"role": role, "content": m["content"]}) + return {"model": model, "system": system, "messages": anthropic_msgs, + "max_tokens": 2048, "stream": True} + + +def format_messages_ollama(system: str, messages: list, model: str) -> dict: + """Format for Ollama /api/chat endpoint.""" + ollama_msgs = [] + for m in messages: + role = "user" if m["role"] == "user" else "assistant" + ollama_msgs.append({"role": role, "content": m["content"]}) + return {"model": model, "messages": ollama_msgs, "stream": True} + + +async def call_llm_stream(provider: dict, system: str, messages: list): + """Call LLM provider and yield SSE delta chunks.""" + base_url = provider["base_url"].rstrip("/") + fmt = provider.get("format", detect_provider_format(base_url)) + api_key = provider.get("api_key", "") + model = provider.get("model", "gpt-4o-mini") + + if fmt == "anthropic": + payload = format_messages_anthropic(system, messages, model) + url = f"{base_url}/v1/messages" + headers = {"x-api-key": api_key, "Content-Type": "application/json", + "anthropic-version": "2023-06-01"} + elif fmt == "ollama": + payload = format_messages_ollama(system, messages, model) + url = f"{base_url}/api/chat" + headers = {"Content-Type": "application/json"} + else: + # openai / openrouter / groq / default + payload = format_messages_openai(system, messages, model) + url = f"{base_url}/chat/completions" + headers = {} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + if fmt == "openrouter": + headers["HTTP-OpenRouter-AI-Model"] = model + headers["Content-Type"] = "application/json" + + data = json.dumps(payload).encode() + req = urllib.request.Request(url, data=data, headers=headers) + + try: + with urllib.request.urlopen(req, timeout=60) as resp: + reader = resp + buf = b"" + while True: + chunk = reader.read(4096) + if not chunk: + break + buf += chunk + while b"\n" in buf: + line, buf = buf.split(b"\n", 1) + line = line.decode("utf-8", errors="replace").strip() + if not line: + continue + if line.startswith("data: "): + data_str = line[5:].strip() + if data_str == "[DONE]": + yield {"type": "done"} + return + try: + chunk_data = json.loads(data_str) + except json.JSONDecodeError: + yield {"delta": data_str, "type": "raw"} + continue + + # Anthropic SSE format + if fmt == "anthropic": + evt_type = chunk_data.get("type", "") + if evt_type == "content_block_delta": + text = chunk_data.get("delta", {}).get("text", "") + if text: + yield {"delta": text, "type": "delta"} + elif evt_type == "message_stop": + yield {"type": "done"} + return + elif evt_type == "error": + err_msg = chunk_data.get("error", {}).get("message", str(chunk_data)) + yield {"type": "error", "delta": err_msg} + return + continue + + # OpenAI-compatible SSE format + deltas = chunk_data.get("choices", [{}])[0].get("delta", {}) + content = deltas.get("content", "") + if content: + yield {"delta": content, "type": "delta"} + tool_calls = deltas.get("tool_calls") + if tool_calls: + names = [tc.get("function", {}).get("name", "?") for tc in tool_calls] + yield {"delta": f"\n[Using tools: {', '.join(names)}]", "type": "tool"} + finish = chunk_data.get("finish_reason") + if finish: + yield {"type": "done"} + elif line.startswith("event:"): + pass # SSE event name, skip + except urllib.error.HTTPError as e: + body = e.read().decode("utf-8", errors="replace")[:500] + yield {"type": "error", "delta": f"HTTP {e.code}: {body}"} + except Exception as e: + yield {"type": "error", "delta": str(e)} + + +# ── FastAPI App ── + +try: + from fastapi import FastAPI + from fastapi.responses import StreamingResponse, JSONResponse, Response + from pydantic import BaseModel +except ImportError: + print("Installing fastapi...") + os.system("pip install fastapi uvicorn httpx -q") + from fastapi import FastAPI + from fastapi.responses import StreamingResponse, JSONResponse, Response + from pydantic import BaseModel + +app = FastAPI(title="Wiki VectorDB Chat") + + +class ChatMessage(BaseModel): + message: str + provider_id: str = "zai-coding" + history: list = [] + rag_wiki: bool = True + rag_vector: bool = True + mode: str = "chat" + + +class ProviderSave(BaseModel): + id: str + name: str + base_url: str + model: str + api_key: str = "" + format: str = "openai" + icon: str = "\u2b99" + description: str = "" + + +@app.get("/providers/presets") +async def get_presets(): + return PRESETS + + +@app.get("/providers") +async def list_providers(): + return get_all_providers() + + +@app.post("/providers/save") +async def save_provider(p: ProviderSave): + custom = load_custom_providers() + p_dict = p.model_dump() + # Update or append + found = False + for i, existing in enumerate(custom): + if existing.get("id") == p.id: + custom[i] = p_dict + found = True + break + if not found: + custom.append(p_dict) + save_custom_providers(custom) + return {"ok": True, "provider": p_dict} + + +@app.delete("/providers/{provider_id}") +async def delete_provider(provider_id: str): + custom = load_custom_providers() + custom = [p for p in custom if p.get("id") != provider_id] + save_custom_providers(custom) + return {"ok": True} + + +@app.post("/chat/message") +async def chat_message(msg: ChatMessage): + async def generate(): + providers = get_all_providers() + provider = next((p for p in providers if p.get("id") == msg.provider_id), None) + if not provider: + yield f"data: {json.dumps({'type':'error','delta':'Provider not found'})}\n\n" + return + + # Build conversation history + messages = [] + for h in msg.history[-10:]: + messages.append(h) + + messages.append({"role": "user", "content": msg.message}) + + # Build RAG context with per-session toggles + rag_context = await build_rag_context(msg.message, msg.rag_wiki, msg.rag_vector) + + # Mode-specific system prompt additions + mode_hints = { + "chat": "", + "code": "\n\nMODE: Coding. The user is working on code. Provide precise, well-structured code examples with explanations. Use markdown code blocks. Be concise and technical.", + "brain": "\n\nMODE: Brainstorm. The user wants creative exploration. Think freely, offer multiple perspectives, suggest unconventional approaches. Be enthusiastic and expansive.", + } + system_prompt = rag_context + mode_hints.get(msg.mode, "") + + async for chunk in call_llm_stream(provider, system_prompt, messages): + data = json.dumps(chunk, ensure_ascii=False) + yield f"data: {data}\n\n" + yield f"data: {json.dumps({'type':'done'})}\n\n" + + return StreamingResponse(generate(), media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}) + + +@app.post("/chat/tunnel") +async def chat_tunnel(msg: ChatMessage): + """Server-side token chat — uses ZAI_API_TOKEN env var if available.""" + async def generate(): + providers = get_all_providers() + provider = next((p for p in providers if p.get("id") == msg.provider_id), None) + if not provider: + yield f"data: {json.dumps({'type':'error','delta':'Provider not found'})}\n\n" + return + + # Use server-side token if available (for tunnel mode) + token = os.environ.get("ZAI_API_TOKEN", "") + if token and not provider.get("api_key"): + provider = dict(provider) + provider["api_key"] = token + + messages = [] + for h in msg.history[-10:]: + messages.append(h) + messages.append({"role": "user", "content": msg.message}) + + rag_context = await build_rag_context(msg.message) + + async for chunk in call_llm_stream(provider, rag_context, messages): + data = json.dumps(chunk, ensure_ascii=False) + yield f"data: {data}\n\n" + yield f"data: {json.dumps({'type':'done'})}\n\n" + + return StreamingResponse(generate(), media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}) + + +@app.get("/health") +async def health(): + return {"status": "ok", "providers": len(get_all_providers())} + + +class WikiSave(BaseModel): + question: str + answer: str + topic: str = "chat-saved" + + +@app.post("/chat/save-to-wiki") +async def save_to_wiki(item: WikiSave): + """Save a Q&A pair directly to wiki-kb.json.""" + try: + kb_path = Path("/opt/blog/wiki-kb.json") + kb = json.loads(kb_path.read_text()) + entry = { + "q": item.question, + "a": item.answer, + "topic": item.topic, + "author": "chat-assistant", + "source": "chat-saved", + "timestamp": time.strftime("%Y-%m-%d %H:%M"), + } + kb.append(entry) + kb_path.write_text(json.dumps(kb, ensure_ascii=False, indent=2)) + return {"ok": True, "total": len(kb)} + except Exception as e: + return {"ok": False, "error": str(e)} + + +def main(): + import uvicorn + port = 8770 + for i, arg in enumerate(__import__("sys").argv): + if arg == "--port" and i + 1 < len(__import__("sys").argv): + port = int(__import__("sys").argv[i + 1]) + print(f"Wiki VectorDB Chat starting on port {port}") + uvicorn.run(app, host="127.0.0.1", port=port, log_level="warning") + + +if __name__ == "__main__": + main() diff --git a/zportal-chat.html b/zportal-chat.html new file mode 100644 index 0000000..f853de8 --- /dev/null +++ b/zportal-chat.html @@ -0,0 +1,1473 @@ + + + + + +Z.ai Chat + + + + + + + +
+ + + + + + + + +
+ +
+ +
+ Z.ai Wiki Assistant + +
+
+ + +
+
+ + Z.ai Chat +
+
Ask about Z.ai wiki knowledge, community discussions, API docs — powered by VectorDB RAG with multi-provider AI.
+
+ + + + + + +
+
+ + +
+ + + + + +
+
+ 🔍 Knowledge Sources Used + +
+
+
+ + +
+
+ +
+ +
+
+
+
+
+ +
+
+
+ + + +
+ +
+
+
Press Enter to send · Shift+Enter for newline · RAG auto-injects Wiki KB + VectorDB context
+
+ +
+
+ + + + + + + + +
+ +