commit ae621ecbb54d0848e24b20e055bb0610bfa492ee
Author: admin
'); }
+ div.innerHTML = html;
+ msgs.appendChild(div);
+ msgs.scrollTop = msgs.scrollHeight;
+ return div;
+}
+
+function setStreaming(on){
+ _isStreaming = on;
+ $('vdb-typing').className = 'vdb-typing' + (on ? ' active' : '');
+ $('vdb-send').disabled = on;
+}
+
+async function sendMessage(){
+ const input = $('vdb-input');
+ const text = (input.value || '').trim();
+ if(!text || !_activeProvider || _isStreaming) return;
+ _history.push({role:'user', content:text});
+ addMsg(text, true, null);
+ input.value = '';
+ input.style.height = 'auto';
+ setStreaming(true);
+
+ try{
+ const resp = await fetch(CHAT_API + 'chat/message', {
+ method:'POST',
+ headers:{'Content-Type':'application/json'},
+ body:JSON.stringify({message:text, provider_id:_activeProvider.id, history:_history})
+ });
+ const reader = resp.body.getReader();
+ const decoder = new TextDecoder();
+ let buf = '';
+ let fullText = '';
+ let assistantDiv = null;
+
+ while(true){
+ const {done, value} = await reader.read();
+ if(value) buf += decoder.decode(value, {stream:true});
+ if(done) break;
+ const lines = buf.split('\\n');
+ buf = lines.pop(); // keep incomplete line
+ for(const line of lines){
+ if(!line.startsWith('data: ') || !line.slice(5)) continue;
+ try{
+ const chunk = JSON.parse(line.slice(5));
+ if(chunk.type === 'done'){ setStreaming(false); break; }
+ if(chunk.type === 'error'){
+ if(!assistantDiv) assistantDiv = addMsg(chunk.delta, false, {error:true});
+ else assistantDiv.textContent += chunk.delta;
+ continue;
+ }
+ if(chunk.type === 'delta'){
+ if(!assistantDiv) assistantDiv = addMsg('', false, {provider:_activeProvider});
+ fullText += chunk.delta;
+ assistantDiv.innerHTML = esc(fullText.replace(/\\n/g, '
'));
+ assistantDiv.scrollIntoView({block:'nearest', behavior:'smooth'});
+ } else if(chunk.type === 'tool' || chunk.type === 'raw'){
+ if(!assistantDiv) assistantDiv = addMsg('', false, {provider:_activeProvider});
+ assistantDiv.innerHTML += esc(chunk.delta).replace(/\\n/g, '
');
+ }
+ }catch(e){ /* skip malformed */ }
+ }
+ }
+ // Process remaining buffer
+ if(buf){
+ for(const line of buf.split('\\n')){
+ if(!line.startsWith('data: ')) continue;
+ try{
+ const chunk = JSON.parse(line.slice(5));
+ if(chunk.type === 'delta' && chunk.delta){
+ if(!assistantDiv) assistantDiv = addMsg('', false, {provider:_activeProvider});
+ fullText += chunk.delta;
+ assistantDiv.innerHTML = esc(fullText.replace(/\\n/g, '
'));
+ }
+ }catch(e){}
+ }
+ }
+ _history.push({role:'assistant', content:fullText || '(no response)'});
+ } catch(e){
+ addMsg('Error: ' + e.message, false, {error:true});
+ }
+ setStreaming(false);
+}
+
+// Provider selection
+$('vdb-provider-select').addEventListener('change', function(){
+ const pid = this.value;
+ _activeProvider = _providers.find(p => p.id === pid) || null;
+});
+
+// Send button
+$('vdb-send').addEventListener('click', sendMessage);
+$('vdb-input').addEventListener('keydown', function(e){ if(e.key === 'Enter' && !e.shiftKey){ e.preventDefault(); sendMessage(); }});
+
+// Toggle chat panel
+let _chatOpen = false;
+$('vdb-chat-btn').addEventListener('click', function(){
+ _chatOpen = !_chatOpen;
+ const chat = $('vdb-chat');
+ chat.classList.toggle('hidden', !_chatOpen);
+ this.textContent = _chatOpen ? '\u2715' : '\u1F4AC';
+ this.title = _chatOpen ? 'Close Chat' : 'Open Wiki Chat';
+});
+
+$('vdb-toggle').addEventListener('click', function(){
+ _chatOpen = false;
+ $('vdb-chat').classList.add('hidden');
+ this.textContent = '\u25B2';
+ $('vdb-chat-btn').textContent = '\u1F4AC';
+ $('vdb-chat-btn').title = 'Open Wiki Chat';
+});
+
+// Settings
+$('vdb-cust-save').addEventListener('click', async function(){
+ const name = $('vdb-cust-name').value.trim();
+ const url = $('vdb-cust-url').value.trim();
+ const model = $('vdb-cust-model').value.trim();
+ const key = $('vdb-cust-key').value.trim();
+ if(!name || !url || !model){ alert('Name, URL, and Model are required'); return; }
+ const provider = {id:'custom-'+Date.now(), name:name, base_url:url, model:model,
+ api_key:key, format:'openai', icon:'\u2699', description:'Custom'};
+ // Save via API
+ try{
+ await fetch(CHAT_API + 'providers/save', {
+ method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(provider)
+ });
+ _providers = await (await fetch(CHAT_API + 'providers')).json();
+ renderProviders();
+ _activeProvider = provider;
+ $('vdb-settings').style.display = 'none';
+ alert('Provider saved!');
+ } catch(e){ alert('Save failed: ' + e.message); }
+});
+
+$('vdb-cust-cancel').addEventListener('click', function(){
+ $('vdb-settings').style.display = 'none';
+});
+
+// Auto-open settings if no providers loaded
+function checkProviders(){
+ if(!_providers.length){
+ $('vdb-settings').style.display = '';
+ }
+}
+
+// Init
+async function init(){
+ try{
+ const [presetsResp, customResp] = await Promise.all([
+ fetch(CHAT_API + 'providers/presets'),
+ fetch(CHAT_API + 'providers')
+ ]);
+ _presets = await presetsResp.json();
+ _custom = await customResp.json();
+ _providers = [..._presets, ..._custom];
+ renderProviders();
+ // Auto-select first available provider
+ if(_providers.length > 0 && !_activeProvider){
+ _activeProvider = _providers[0];
+ renderProviders();
+ }
+ checkProviders();
+ } catch(e){
+ console.error('Chat init error:', e);
+ $('vdb-chat-messages').innerHTML = '
+ if "" in html_content: + html_content = html_content.replace("", CHAT_HTML + "\n", 1) + + return html_content + + +def main(): + files = WIKI_FILES + # Check for --file argument + if "--file" in sys.argv: + idx = sys.argv.index("--file") + 1 + if idx < len(sys.argv): + files = [sys.argv[idx]] + + for fpath in files: + if not os.path.exists(fpath): + print(f"SKIP: {fpath} not found") + continue + with open(fpath, "r") as f: + content = f.read() + # Check if already injected + if "vdb-chat-container" in content: + print(f"SKIP: {fpath} already has chat injected") + continue + new_content = inject_chat(content) + with open(fpath, "w") as f: + f.write(new_content) + print(f"OK: {fpath} ({len(new_content)} bytes)") + + +if __name__ == "__main__": + main() diff --git a/vector-db-service.py b/vector-db-service.py new file mode 100644 index 0000000..20eac79 --- /dev/null +++ b/vector-db-service.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +"""Z.ai Wiki Vector DB Service + +Stores Discord messages as vector embeddings using sentence-transformers. +Provides a search API for the wiki chat system to query as an additional data source. + +Data sources: +- Server 1346756824233148527 (Z.ai Community) +- Server 1410352583364841555 (Z.ai Mod Server) +- Channel 1476364011091136544 (Z.ai Mod Channel) + +Endpoints: +- POST /vector/search - Search with a query, return top-K matches +- POST /vector/index - Add messages to the index +- GET /vector/stats - Get index statistics +- POST /vector/rebuild - Rebuild from stored messages +""" + +import json +import os +import glob +import numpy as np +from flask import Flask, request, jsonify, make_response +from sentence_transformers import SentenceTransformer + +app = Flask(__name__) + +DATA_DIR = os.environ.get('VECTOR_DB_DIR', '/opt/blog/vector-db') +MESSAGES_FILE = os.path.join(DATA_DIR, 'messages.json') +EMBEDDINGS_FILE = os.path.join(DATA_DIR, 'embeddings.npy') +META_FILE = os.path.join(DATA_DIR, 'meta.json') + +os.makedirs(DATA_DIR, exist_ok=True) + +model = None +embeddings = None +meta = [] # parallel array: [{id, content, source, author, channel, server, timestamp, link}, ...] + + +def get_model(): + global model + if model is None: + print('[VectorDB] Loading sentence-transformer model...') + model = SentenceTransformer('all-MiniLM-L6-v2') + print('[VectorDB] Model loaded') + return model + + +def load_index(): + global embeddings, meta + if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(META_FILE): + embeddings = np.load(EMBEDDINGS_FILE) + meta = json.load(open(META_FILE)) + print(f'[VectorDB] Loaded index: {len(meta)} entries, dim={embeddings.shape[1]}') + else: + embeddings = np.empty((0, 384), dtype=np.float32) + meta = [] + print('[VectorDB] No existing index, starting fresh') + + +def save_index(): + np.save(EMBEDDINGS_FILE, embeddings) + json.dump(meta, open(META_FILE, 'w')) + print(f'[VectorDB] Saved index: {len(meta)} entries') + + +def load_messages(): + """Load raw messages from JSON files""" + msgs = [] + for f in glob.glob(os.path.join(DATA_DIR, 'messages_*.json')): + msgs.extend(json.load(open(f))) + return msgs + + +@app.after_request +def add_cors(response): + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['Access-Control-Allow-Methods'] = 'POST, GET, OPTIONS' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization' + return response + + +@app.route('/vector/stats', methods=['GET', 'OPTIONS']) +def stats(): + if request.method == 'OPTIONS': + return make_response('', 200) + sources = {} + for m in meta: + s = m.get('source', 'unknown') + sources[s] = sources.get(s, 0) + 1 + return jsonify({ + 'total': len(meta), + 'dimension': int(embeddings.shape[1]) if embeddings.shape[0] > 0 else 0, + 'sources': sources, + 'is_indexed': len(meta) > 0, + }) + + +@app.route('/vector/search', methods=['POST', 'OPTIONS']) +def search(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + body = request.get_json(force=True) + query = body.get('query', '') + top_k = min(body.get('top_k', 10), 50) + + if not query or embeddings.shape[0] == 0: + return jsonify({'results': [], 'query': query}) + + mdl = get_model() + q_emb = mdl.encode([query], normalize_embeddings=True).astype(np.float32) + + # Cosine similarity (embeddings are already normalized) + scores = (embeddings @ q_emb.T).flatten() + + top_idx = np.argsort(scores)[::-1][:top_k] + + results = [] + for i in top_idx: + if scores[i] < 0.1: # threshold + break + results.append({ + 'content': meta[i]['content'], + 'source': meta[i].get('source', ''), + 'author': meta[i].get('author', ''), + 'channel': meta[i].get('channel', ''), + 'server': meta[i].get('server', ''), + 'timestamp': meta[i].get('timestamp', ''), + 'link': meta[i].get('link', ''), + 'score': float(scores[i]), + }) + + return jsonify({'results': results, 'query': query, 'total': len(meta)}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@app.route('/vector/index', methods=['POST', 'OPTIONS']) +def index_messages(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + body = request.get_json(force=True) + messages = body.get('messages', []) + source = body.get('source', 'unknown') + + if not messages: + return jsonify({'error': 'messages required'}), 400 + + global embeddings, meta + + mdl = get_model() + texts = [m.get('content', '')[:2000] for m in messages] + new_emb = mdl.encode(texts, normalize_embeddings=True, show_progress_bar=False, + batch_size=64).astype(np.float32) + + start_idx = len(meta) + for m in messages: + meta.append({ + 'id': m.get('id', ''), + 'content': m.get('content', ''), + 'source': source, + 'author': m.get('author', ''), + 'channel': m.get('channel', ''), + 'server': m.get('server', ''), + 'timestamp': m.get('timestamp', ''), + 'link': m.get('link', ''), + }) + + if embeddings.shape[0] == 0: + embeddings = new_emb + else: + embeddings = np.vstack([embeddings, new_emb]) + + save_index() + return jsonify({'indexed': len(messages), 'total': len(meta)}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +@app.route('/vector/rebuild', methods=['POST', 'OPTIONS']) +def rebuild(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + messages = load_messages() + if not messages: + return jsonify({'error': 'No messages found. Run scraper first.'}), 404 + + global embeddings, meta + meta = [] + mdl = get_model() + + # Filter out empty/short messages + valid = [m for m in messages if len(m.get('content', '').strip()) > 10] + print(f'[VectorDB] Rebuilding index from {len(valid)} valid messages...') + + texts = [m.get('content', '')[:2000] for m in valid] + embeddings = mdl.encode(texts, normalize_embeddings=True, + show_progress_bar=True, batch_size=128).astype(np.float32) + + for m in valid: + meta.append({ + 'id': m.get('id', ''), + 'content': m.get('content', ''), + 'source': m.get('source', ''), + 'author': m.get('author', ''), + 'channel': m.get('channel', ''), + 'server': m.get('server', ''), + 'timestamp': m.get('timestamp', ''), + 'link': m.get('link', ''), + }) + + save_index() + return jsonify({'indexed': len(meta), 'total_valid': len(valid), 'total_raw': len(messages)}) + except Exception as e: + return jsonify({'error': str(e)}), 500 + + +if __name__ == '__main__': + load_index() + port = int(os.environ.get('VECTOR_DB_PORT', 8099)) + print(f'[VectorDB] Service running on port {port}') + app.run(host='0.0.0.0', port=port) diff --git a/wiki-api.py b/wiki-api.py new file mode 100755 index 0000000..6fbe90c --- /dev/null +++ b/wiki-api.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +"""Z.ai Wiki KB Search API - token-protected""" +import json, os, sys, urllib.parse, hashlib +from http.server import HTTPServer, BaseHTTPRequestHandler + +KB_PATH = "/opt/blog/wiki-kb.json" +TOKEN_PATH = "/opt/blog/.wiki-api-token" +PORT = 8097 +LOG_PATH = "/opt/blog/data/search-logs.json" +MAX_LOG_ENTRIES = 5000 + +def load_logs(): + if os.path.exists(LOG_PATH): + try: + with open(LOG_PATH) as f: + return json.load(f) + except: + pass + return [] + +def save_logs(logs): + os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True) + # Keep only last MAX_LOG_ENTRIES + with open(LOG_PATH, 'w') as f: + json.dump(logs[-MAX_LOG_ENTRIES:], f) + +def log_search(source, query, results, meta=None): + logs = load_logs() + entry = { + "ts": __import__('time').strftime("%Y-%m-%dT%H:%M:%SZ"), + "source": source, + "query": query, + "result_count": len(results) if isinstance(results, list) else 0, + "top_results": [ + {"q": r.get("q", r.get("content", ""))[:100], "score": r.get("score", 0), "source": r.get("source", "")} + for r in (results[:3] if isinstance(results, list) else []) + ], + } + if meta: + entry["meta"] = meta + logs.append(entry) + save_logs(logs) + return entry + + + +# Load or generate token +def load_token(): + if os.path.exists(TOKEN_PATH): + with open(TOKEN_PATH) as f: + return f.read().strip() + return None + +API_TOKEN = load_token() + +with open(KB_PATH) as f: + KB = json.load(f) + +def search_kb(query, topic=None, limit=20): + query_lower = query.lower() + query_words = set(query_lower.split()) + results = [] + for entry in KB: + score = 0 + q_text = entry.get("q", "").lower() + a_text = entry.get("a", "").lower() + q_words = set(q_text.split()) + a_words = set(a_text.split()) + score += len(query_words & q_words) * 3 + score += len(query_words & a_words) * 1 + if query_lower in q_text: score += 10 + if query_lower in a_text: score += 5 + if topic and entry.get("topic", "").lower() != topic.lower(): score -= 50 + if score > 0: + results.append({**entry, "score": score}) + results.sort(key=lambda x: -x["score"]) + return results[:limit] + +def check_auth(params, headers): + if not API_TOKEN: + return False + # Check query param ?token=... + token = params.get("token", [""])[0] + if token == API_TOKEN: + return True + # Check header Authorization: Bearer ... + auth = headers.get("Authorization", "") + if auth.startswith("Bearer "): + if auth[7:] == API_TOKEN: + return True + # Check header X-API-Key + api_key = headers.get("X-Api-Key", "") + if api_key == API_TOKEN: + return True + return False + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + parsed = urllib.parse.urlparse(self.path) + params = urllib.parse.parse_qs(parsed.query) + + if not check_auth(params, self.headers): + self.send_response(401) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.end_headers() + self.wfile.write(json.dumps({"error": "Unauthorized. Provide ?token=YOUR_TOKEN or Authorization: Bearer YOUR_TOKEN"}).encode()) + return + + if parsed.path == "/search": + query = params.get("q", [""])[0] + topic = params.get("topic", [None])[0] + limit = int(params.get("limit", [20])[0]) + if not query: + body = json.dumps({"error": "Missing ?q= parameter"}).encode() + else: + results = search_kb(query, topic, limit) + log_search("kb", query, results, {"topic": topic, "limit": limit, "ip": self.headers.get("X-Real-IP", self.client_address[0])}) + body = json.dumps({"query": query, "count": len(results), "results": results}, ensure_ascii=False).encode() + elif parsed.path == "/kb": + body = json.dumps(KB, ensure_ascii=False).encode() + elif parsed.path == "/logs": + logs = load_logs() + body = json.dumps({"total": len(logs), "logs": logs[::-1]}, ensure_ascii=False).encode() + else: + self.send_response(404) + self.end_headers() + return + + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Content-Length", len(body)) + self.end_headers() + self.wfile.write(body) + + def do_DELETE(self): + parsed = urllib.parse.urlparse(self.path) + params = urllib.parse.parse_qs(parsed.query) + if not check_auth(params, self.headers): + self.send_response(401) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.end_headers() + self.wfile.write(json.dumps({"error": "Unauthorized"}).encode()) + return + if parsed.path == "/logs": + save_logs([]) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.end_headers() + self.wfile.write(json.dumps({"cleared": True}).encode()) + return + self.send_response(404) + self.end_headers() + + def do_OPTIONS(self): + self.send_response(200) + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Authorization, X-Api-Key, Content-Type") + self.end_headers() + + def log_message(self, format, *args): + pass + +if __name__ == "__main__": + print(f"Wiki KB API running on port {PORT}") + server = HTTPServer(("127.0.0.1", PORT), Handler) + server.serve_forever() diff --git a/wiki-chat-proxy.py b/wiki-chat-proxy.py new file mode 100755 index 0000000..9b4a2f0 --- /dev/null +++ b/wiki-chat-proxy.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +"""Wiki Chat Proxy - Proxies chat requests to z.ai GLM-4-Plus""" + +import json +import os +import urllib.request + +ZAI_TOKEN = os.environ.get('ZAI_API_TOKEN', '') + +def handler(event): + try: + if event.get('method') != 'POST': + return {'status': 405, 'body': 'Method not allowed'} + + body = json.loads(event.get('body', '{}')) + messages = body.get('messages', []) + + if not messages: + return {'status': 400, 'body': json.dumps({'error': 'messages required'})} + + api_url = 'https://api.z.ai/api/coding/paas/v4/chat/completions' + + payload = json.dumps({ + 'model': 'glm-4-plus', + 'messages': messages, + 'temperature': 0.7, + 'max_tokens': 2000, + }).encode() + + headers = {'Content-Type': 'application/json'} + if ZAI_TOKEN: + headers['Authorization'] = 'Bearer ' + ZAI_TOKEN + + req = urllib.request.Request(api_url, data=payload, headers=headers, method='POST') + + try: + with urllib.request.urlopen(req, timeout=60) as resp: + data = json.loads(resp.read().decode()) + content = data.get('choices', [{}])[0].get('message', {}).get('content', '') + return { + 'status': 200, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps({'response': content}) + } + except urllib.error.HTTPError as e: + err_body = e.read().decode() if e.fp else '' + try: + err_json = json.loads(err_body) + err_msg = err_json.get('error', {}).get('message', err_json.get('message', str(e))) + except: + err_msg = str(e) + return {'status': e.code, 'body': json.dumps({'error': err_msg})} + except Exception as e: + return {'status': 500, 'body': json.dumps({'error': str(e)})} diff --git a/wiki-chat-server.py b/wiki-chat-server.py new file mode 100755 index 0000000..5a3d7c7 --- /dev/null +++ b/wiki-chat-server.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +"""Wiki Chat LLM Proxy Server on port 8098 using Flask""" + +import json +import os +import urllib.request +from flask import Flask, request, jsonify, make_response + +app = Flask(__name__) + +ZAI_TOKEN = os.environ.get('ZAI_API_TOKEN', '') + +@app.after_request +def add_cors(response): + h = 'Access-Control-Allow-Origin' + response.headers[h] = '*' + response.headers['Access-Control-Allow-Methods'] = 'POST, OPTIONS' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization' + return response + +def call_zai(messages, token=None): + api_url = 'https://api.z.ai/api/coding/paas/v4/chat/completions' + payload = json.dumps({ + 'model': 'glm-4-plus', + 'messages': messages, + 'temperature': 0.7, + 'max_tokens': 2000, + }).encode() + headers = {'Content-Type': 'application/json'} + if token: + headers['Authorization'] = 'Bearer ' + token + req = urllib.request.Request(api_url, data=payload, headers=headers, method='POST') + with urllib.request.urlopen(req, timeout=60) as resp: + data = json.loads(resp.read().decode()) + return data.get('choices', [{}])[0].get('message', {}).get('content', '') + +@app.route('/chat/wiki', methods=['POST', 'OPTIONS']) +def chat_wiki(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + body = request.get_json(force=True) + messages = body.get('messages', []) + if not messages: + return jsonify({'error': 'messages required'}), 400 + token = body.get('token') or ZAI_TOKEN + if not token: + return jsonify({'error': 'No token provided'}), 401 + content = call_zai(messages, token) + return jsonify({'response': content}) + except urllib.error.HTTPError as e: + err_body = e.read().decode() if e.fp else '' + try: + err_json = json.loads(err_body) + err_msg = err_json.get('error', {}).get('message', err_json.get('message', str(e))) + except Exception: + err_msg = str(e) + return jsonify({'error': err_msg}), e.code + except Exception as e: + return jsonify({'error': str(e)}), 500 + +@app.route('/chat/wiki-tunnel', methods=['POST', 'OPTIONS']) +def chat_wiki_tunnel(): + if request.method == 'OPTIONS': + return make_response('', 200) + try: + body = request.get_json(force=True) + messages = body.get('messages', []) + if not messages: + return jsonify({'error': 'messages required'}), 400 + token = ZAI_TOKEN + if not token: + return jsonify({'error': 'No server token configured. Use Token mode and paste your API key.'}), 503 + content = call_zai(messages, token) + return jsonify({'response': content}) + except urllib.error.HTTPError as e: + err_body = e.read().decode() if e.fp else '' + try: + err_json = json.loads(err_body) + err_msg = err_json.get('error', {}).get('message', err_json.get('message', str(e))) + except Exception: + err_msg = str(e) + return jsonify({'error': err_msg}), e.code + except Exception as e: + return jsonify({'error': str(e)}), 500 + +if __name__ == '__main__': + port = int(os.environ.get('WIKI_CHAT_PORT', 8098)) + print(f'[WikiChat] LLM proxy running on port {port}') + app.run(host='0.0.0.0', port=port) diff --git a/wiki-vector-chat.py b/wiki-vector-chat.py new file mode 100644 index 0000000..c3d0a76 --- /dev/null +++ b/wiki-vector-chat.py @@ -0,0 +1,503 @@ +#!/usr/bin/env python3 +"""Wiki VectorDB Chat — Multi-Provider AI Chat with RAG (KB + VectorDB). + +Serves at port 8770, proxied via nginx at /zportal/wiki/api/chat +Uses wiki-api (:8097) for KB search and vector-db (:8099) for vector search. +""" + +import asyncio +import json +import os +import re +import time +import urllib.request +from pathlib import Path + +PROVIDERS_FILE = Path("/opt/blog/wiki-chat-providers.json") +CUSTOM_PROVIDERS_FILE = Path("/opt/blog/wiki-chat-providers.json") + +WIKI_API = "http://127.0.0.1:8097" +VECTOR_DB = "http://127.0.0.1:8099" + +# Shared API token for wiki-api and vector-db +_API_TOKEN = "" +try: + _API_TOKEN = Path("/opt/blog/.wiki-api-token").read_text().strip() +except Exception: + pass + +PRESETS = [ + { + "id": "zai-coding", + "name": "Z.ai Coding Plan", + "base_url": "https://api.z.ai/api/coding/paas/v4", + "model": "glm-4-plus", + "format": "openai", + "icon": "\u26a1", + "description": "Official Z.ai coding plan API", + }, + { + "id": "openadapter", + "name": "OpenAdapter", + "base_url": "https://api.openadapter.com/v1", + "model": "gpt-4o-mini", + "format": "openai", + "icon": "\u1f512", + "description": "OpenAdapter unified API", + }, + { + "id": "openrouter", + "name": "OpenRouter", + "base_url": "https://openrouter.ai/api/v1", + "model": "anthropic/claude-sonnet-4", + "format": "openrouter", + "icon": "\u1f6e3", + "description": "Model router across providers", + }, + { + "id": "crofai", + "name": "Crof.AI", + "base_url": "https://api.crof.ai/v1", + "model": "crof-4-plus", + "format": "openai", + "icon": "\u1f42a", + "description": "Crof AI models", + }, + { + "id": "opencode-zen", + "name": "Opencode Zen", + "base_url": "https://api.zen.opencode.com/v1", + "model": "glm-4-plus", + "format": "openai", + "icon": "\u1f9e0", + "description": "Opencode Zen hosted models", + }, +] + + +def load_custom_providers(): + try: + if CUSTOM_PROVIDERS_FILE.exists(): + return json.loads(CUSTOM_PROVIDERS_FILE.read_text()) + except Exception: + pass + return [] + + +def save_custom_providers(providers): + CUSTOM_PROVIDERS_FILE.write_text(json.dumps(providers, indent=2)) + + +def get_all_providers(): + """Return presets + custom providers.""" + custom = load_custom_providers() + seen = {p["id"] for p in PRESETS} + result = list(PRESETS) + for p in custom: + if p.get("id") not in seen: + seen.add(p["id"]) + result.append(p) + return result + + +def detect_provider_format(base_url: str) -> str: + from urllib.parse import urlparse + host = urlparse(base_url).hostname.lower() + if "ollama" in host or host in ("localhost", "127.0.0.1"): + return "ollama" + if "anthropic" in host: + return "anthropic" + if "openrouter" in host: + return "openrouter" + if "groq" in host: + return "groq" + return "openai" + + +async def search_kb(query: str, limit: int = 3) -> str: + """Search wiki-kb.json via wiki-api.""" + try: + url = f"{WIKI_API}/search?q={urllib.parse.quote(query)}&limit={limit}&token={_API_TOKEN}" + req = urllib.request.Request(url) + with urllib.request.urlopen(req, timeout=5) as resp: + data = json.loads(resp.read()) + results = data.get("results", []) + if not results: + return "" + lines = [] + for r in results[:limit]: + q_text = r.get("q", "") + a_text = r.get("a", "") + topic = r.get("topic", "") + score = r.get("_score", 0) + lines.append(f"[{topic}] Q: {q_text}\nA: {a_text}") + return "\n\n".join(lines) + except Exception as e: + return f"(KB search error: {e})" + + +async def search_vector(query: str, top_k: int = 5) -> str: + """Search vector-db for related Discord/Reddit messages.""" + try: + data = json.dumps({"query": query, "top_k": top_k}).encode() + req = urllib.request.Request( + f"{VECTOR_DB}/vector/search", + data=data, + headers={"Content-Type": "application/json", "x-api-key": _API_TOKEN}, + ) + with urllib.request.urlopen(req, timeout=8) as resp: + result = json.loads(resp.read()) + hits = result.get("results", []) + if not hits: + return "" + lines = [] + for h in hits[:top_k]: + text = h.get("text", "")[:300] + score = h.get("score", 0) + source = h.get("source", "unknown") + meta = h.get("metadata", {}) + author = meta.get("author", "") + channel = meta.get("channel", "") + preview = text.replace("\n", " ")[:200] + lines.append(f"[{source}] @{author} in #{channel}: {preview} (score: {score:.2f})") + return "\n\n".join(lines) + except Exception as e: + return f"(Vector search error: {e})" + + +async def build_rag_context(user_message: str, rag_wiki: bool = True, rag_vector: bool = True) -> str: + """Build RAG context from KB + VectorDB searches.""" + kb_results = "" + vec_results = "" + tasks = [] + if rag_wiki: + tasks.append(search_kb(user_message, 3)) + if rag_vector: + tasks.append(search_vector(user_message, 5)) + + if tasks: + results = await asyncio.gather(*tasks) + idx = 0 + if rag_wiki: + kb_results = results[idx]; idx += 1 + if rag_vector: + vec_results = results[idx] + + parts = [ + "You are Z.ai Wiki Assistant. Use ALL the knowledge sources below to answer the user's question.", + "Draw from both the Wiki KB and Community Messages. Synthesize information even from partial matches.", + "If the context mentions anything relevant, include it in your answer. Be specific — quote authors, channels, and details when available.", + "Only say you don't have information if the sources are truly empty or completely unrelated.", + "", + ] + if rag_wiki: + parts += ["=== Wiki Knowledge Base ===", kb_results or "(no KB results found)", ""] + if rag_vector: + parts += ["=== Related Community Messages (Discord/Reddit) ===", vec_results or "(no community messages found)"] + if not rag_wiki and not rag_vector: + parts.append("(RAG sources disabled for this session)") + return "\n".join(parts) + + +# ── LLM Provider Calls ── + +def format_messages_openai(system: str, messages: list, model: str) -> dict: + """Format for OpenAI-compatible /chat/completions endpoint.""" + return {"model": model, "messages": [{"role": "system", "content": system}] + messages, + "temperature": 0.7, "max_tokens": 2048, "stream": True} + + +def format_messages_anthropic(system: str, messages: list, model: str) -> dict: + """Convert OpenAI-format messages to Anthropic format.""" + anthropic_msgs = [] + for m in messages: + role = "user" if m["role"] == "user" else "assistant" + anthropic_msgs.append({"role": role, "content": m["content"]}) + return {"model": model, "system": system, "messages": anthropic_msgs, + "max_tokens": 2048, "stream": True} + + +def format_messages_ollama(system: str, messages: list, model: str) -> dict: + """Format for Ollama /api/chat endpoint.""" + ollama_msgs = [] + for m in messages: + role = "user" if m["role"] == "user" else "assistant" + ollama_msgs.append({"role": role, "content": m["content"]}) + return {"model": model, "messages": ollama_msgs, "stream": True} + + +async def call_llm_stream(provider: dict, system: str, messages: list): + """Call LLM provider and yield SSE delta chunks.""" + base_url = provider["base_url"].rstrip("/") + fmt = provider.get("format", detect_provider_format(base_url)) + api_key = provider.get("api_key", "") + model = provider.get("model", "gpt-4o-mini") + + if fmt == "anthropic": + payload = format_messages_anthropic(system, messages, model) + url = f"{base_url}/v1/messages" + headers = {"x-api-key": api_key, "Content-Type": "application/json", + "anthropic-version": "2023-06-01"} + elif fmt == "ollama": + payload = format_messages_ollama(system, messages, model) + url = f"{base_url}/api/chat" + headers = {"Content-Type": "application/json"} + else: + # openai / openrouter / groq / default + payload = format_messages_openai(system, messages, model) + url = f"{base_url}/chat/completions" + headers = {} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + if fmt == "openrouter": + headers["HTTP-OpenRouter-AI-Model"] = model + headers["Content-Type"] = "application/json" + + data = json.dumps(payload).encode() + req = urllib.request.Request(url, data=data, headers=headers) + + try: + with urllib.request.urlopen(req, timeout=60) as resp: + reader = resp + buf = b"" + while True: + chunk = reader.read(4096) + if not chunk: + break + buf += chunk + while b"\n" in buf: + line, buf = buf.split(b"\n", 1) + line = line.decode("utf-8", errors="replace").strip() + if not line: + continue + if line.startswith("data: "): + data_str = line[5:].strip() + if data_str == "[DONE]": + yield {"type": "done"} + return + try: + chunk_data = json.loads(data_str) + except json.JSONDecodeError: + yield {"delta": data_str, "type": "raw"} + continue + + # Anthropic SSE format + if fmt == "anthropic": + evt_type = chunk_data.get("type", "") + if evt_type == "content_block_delta": + text = chunk_data.get("delta", {}).get("text", "") + if text: + yield {"delta": text, "type": "delta"} + elif evt_type == "message_stop": + yield {"type": "done"} + return + elif evt_type == "error": + err_msg = chunk_data.get("error", {}).get("message", str(chunk_data)) + yield {"type": "error", "delta": err_msg} + return + continue + + # OpenAI-compatible SSE format + deltas = chunk_data.get("choices", [{}])[0].get("delta", {}) + content = deltas.get("content", "") + if content: + yield {"delta": content, "type": "delta"} + tool_calls = deltas.get("tool_calls") + if tool_calls: + names = [tc.get("function", {}).get("name", "?") for tc in tool_calls] + yield {"delta": f"\n[Using tools: {', '.join(names)}]", "type": "tool"} + finish = chunk_data.get("finish_reason") + if finish: + yield {"type": "done"} + elif line.startswith("event:"): + pass # SSE event name, skip + except urllib.error.HTTPError as e: + body = e.read().decode("utf-8", errors="replace")[:500] + yield {"type": "error", "delta": f"HTTP {e.code}: {body}"} + except Exception as e: + yield {"type": "error", "delta": str(e)} + + +# ── FastAPI App ── + +try: + from fastapi import FastAPI + from fastapi.responses import StreamingResponse, JSONResponse, Response + from pydantic import BaseModel +except ImportError: + print("Installing fastapi...") + os.system("pip install fastapi uvicorn httpx -q") + from fastapi import FastAPI + from fastapi.responses import StreamingResponse, JSONResponse, Response + from pydantic import BaseModel + +app = FastAPI(title="Wiki VectorDB Chat") + + +class ChatMessage(BaseModel): + message: str + provider_id: str = "zai-coding" + history: list = [] + rag_wiki: bool = True + rag_vector: bool = True + mode: str = "chat" + + +class ProviderSave(BaseModel): + id: str + name: str + base_url: str + model: str + api_key: str = "" + format: str = "openai" + icon: str = "\u2b99" + description: str = "" + + +@app.get("/providers/presets") +async def get_presets(): + return PRESETS + + +@app.get("/providers") +async def list_providers(): + return get_all_providers() + + +@app.post("/providers/save") +async def save_provider(p: ProviderSave): + custom = load_custom_providers() + p_dict = p.model_dump() + # Update or append + found = False + for i, existing in enumerate(custom): + if existing.get("id") == p.id: + custom[i] = p_dict + found = True + break + if not found: + custom.append(p_dict) + save_custom_providers(custom) + return {"ok": True, "provider": p_dict} + + +@app.delete("/providers/{provider_id}") +async def delete_provider(provider_id: str): + custom = load_custom_providers() + custom = [p for p in custom if p.get("id") != provider_id] + save_custom_providers(custom) + return {"ok": True} + + +@app.post("/chat/message") +async def chat_message(msg: ChatMessage): + async def generate(): + providers = get_all_providers() + provider = next((p for p in providers if p.get("id") == msg.provider_id), None) + if not provider: + yield f"data: {json.dumps({'type':'error','delta':'Provider not found'})}\n\n" + return + + # Build conversation history + messages = [] + for h in msg.history[-10:]: + messages.append(h) + + messages.append({"role": "user", "content": msg.message}) + + # Build RAG context with per-session toggles + rag_context = await build_rag_context(msg.message, msg.rag_wiki, msg.rag_vector) + + # Mode-specific system prompt additions + mode_hints = { + "chat": "", + "code": "\n\nMODE: Coding. The user is working on code. Provide precise, well-structured code examples with explanations. Use markdown code blocks. Be concise and technical.", + "brain": "\n\nMODE: Brainstorm. The user wants creative exploration. Think freely, offer multiple perspectives, suggest unconventional approaches. Be enthusiastic and expansive.", + } + system_prompt = rag_context + mode_hints.get(msg.mode, "") + + async for chunk in call_llm_stream(provider, system_prompt, messages): + data = json.dumps(chunk, ensure_ascii=False) + yield f"data: {data}\n\n" + yield f"data: {json.dumps({'type':'done'})}\n\n" + + return StreamingResponse(generate(), media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}) + + +@app.post("/chat/tunnel") +async def chat_tunnel(msg: ChatMessage): + """Server-side token chat — uses ZAI_API_TOKEN env var if available.""" + async def generate(): + providers = get_all_providers() + provider = next((p for p in providers if p.get("id") == msg.provider_id), None) + if not provider: + yield f"data: {json.dumps({'type':'error','delta':'Provider not found'})}\n\n" + return + + # Use server-side token if available (for tunnel mode) + token = os.environ.get("ZAI_API_TOKEN", "") + if token and not provider.get("api_key"): + provider = dict(provider) + provider["api_key"] = token + + messages = [] + for h in msg.history[-10:]: + messages.append(h) + messages.append({"role": "user", "content": msg.message}) + + rag_context = await build_rag_context(msg.message) + + async for chunk in call_llm_stream(provider, rag_context, messages): + data = json.dumps(chunk, ensure_ascii=False) + yield f"data: {data}\n\n" + yield f"data: {json.dumps({'type':'done'})}\n\n" + + return StreamingResponse(generate(), media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}) + + +@app.get("/health") +async def health(): + return {"status": "ok", "providers": len(get_all_providers())} + + +class WikiSave(BaseModel): + question: str + answer: str + topic: str = "chat-saved" + + +@app.post("/chat/save-to-wiki") +async def save_to_wiki(item: WikiSave): + """Save a Q&A pair directly to wiki-kb.json.""" + try: + kb_path = Path("/opt/blog/wiki-kb.json") + kb = json.loads(kb_path.read_text()) + entry = { + "q": item.question, + "a": item.answer, + "topic": item.topic, + "author": "chat-assistant", + "source": "chat-saved", + "timestamp": time.strftime("%Y-%m-%d %H:%M"), + } + kb.append(entry) + kb_path.write_text(json.dumps(kb, ensure_ascii=False, indent=2)) + return {"ok": True, "total": len(kb)} + except Exception as e: + return {"ok": False, "error": str(e)} + + +def main(): + import uvicorn + port = 8770 + for i, arg in enumerate(__import__("sys").argv): + if arg == "--port" and i + 1 < len(__import__("sys").argv): + port = int(__import__("sys").argv[i + 1]) + print(f"Wiki VectorDB Chat starting on port {port}") + uvicorn.run(app, host="127.0.0.1", port=port, log_level="warning") + + +if __name__ == "__main__": + main() diff --git a/zportal-chat.html b/zportal-chat.html new file mode 100644 index 0000000..f853de8 --- /dev/null +++ b/zportal-chat.html @@ -0,0 +1,1473 @@ + + +
+ + +
+ + +
+