#!/usr/bin/env python3 """Z.ai Wiki Vector DB Service Stores Discord messages as vector embeddings using sentence-transformers. Provides a search API for the wiki chat system to query as an additional data source. Data sources: - Server 1346756824233148527 (Z.ai Community) - Server 1410352583364841555 (Z.ai Mod Server) - Channel 1476364011091136544 (Z.ai Mod Channel) Endpoints: - POST /vector/search - Search with a query, return top-K matches - POST /vector/index - Add messages to the index - GET /vector/stats - Get index statistics - POST /vector/rebuild - Rebuild from stored messages """ import json import os import glob import numpy as np from flask import Flask, request, jsonify, make_response from sentence_transformers import SentenceTransformer app = Flask(__name__) DATA_DIR = os.environ.get('VECTOR_DB_DIR', '/opt/blog/vector-db') MESSAGES_FILE = os.path.join(DATA_DIR, 'messages.json') EMBEDDINGS_FILE = os.path.join(DATA_DIR, 'embeddings.npy') META_FILE = os.path.join(DATA_DIR, 'meta.json') os.makedirs(DATA_DIR, exist_ok=True) model = None embeddings = None meta = [] # parallel array: [{id, content, source, author, channel, server, timestamp, link}, ...] def get_model(): global model if model is None: print('[VectorDB] Loading sentence-transformer model...') model = SentenceTransformer('all-MiniLM-L6-v2') print('[VectorDB] Model loaded') return model def load_index(): global embeddings, meta if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(META_FILE): embeddings = np.load(EMBEDDINGS_FILE) meta = json.load(open(META_FILE)) print(f'[VectorDB] Loaded index: {len(meta)} entries, dim={embeddings.shape[1]}') else: embeddings = np.empty((0, 384), dtype=np.float32) meta = [] print('[VectorDB] No existing index, starting fresh') def save_index(): np.save(EMBEDDINGS_FILE, embeddings) json.dump(meta, open(META_FILE, 'w')) print(f'[VectorDB] Saved index: {len(meta)} entries') def load_messages(): """Load raw messages from JSON files""" msgs = [] for f in glob.glob(os.path.join(DATA_DIR, 'messages_*.json')): msgs.extend(json.load(open(f))) return msgs @app.after_request def add_cors(response): response.headers['Access-Control-Allow-Origin'] = '*' response.headers['Access-Control-Allow-Methods'] = 'POST, GET, OPTIONS' response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization' return response @app.route('/vector/stats', methods=['GET', 'OPTIONS']) def stats(): if request.method == 'OPTIONS': return make_response('', 200) sources = {} for m in meta: s = m.get('source', 'unknown') sources[s] = sources.get(s, 0) + 1 return jsonify({ 'total': len(meta), 'dimension': int(embeddings.shape[1]) if embeddings.shape[0] > 0 else 0, 'sources': sources, 'is_indexed': len(meta) > 0, }) @app.route('/vector/search', methods=['POST', 'OPTIONS']) def search(): if request.method == 'OPTIONS': return make_response('', 200) try: body = request.get_json(force=True) query = body.get('query', '') top_k = min(body.get('top_k', 10), 50) if not query or embeddings.shape[0] == 0: return jsonify({'results': [], 'query': query}) mdl = get_model() q_emb = mdl.encode([query], normalize_embeddings=True).astype(np.float32) # Cosine similarity (embeddings are already normalized) scores = (embeddings @ q_emb.T).flatten() top_idx = np.argsort(scores)[::-1][:top_k] results = [] for i in top_idx: if scores[i] < 0.1: # threshold break results.append({ 'content': meta[i]['content'], 'source': meta[i].get('source', ''), 'author': meta[i].get('author', ''), 'channel': meta[i].get('channel', ''), 'server': meta[i].get('server', ''), 'timestamp': meta[i].get('timestamp', ''), 'link': meta[i].get('link', ''), 'score': float(scores[i]), }) return jsonify({'results': results, 'query': query, 'total': len(meta)}) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/vector/index', methods=['POST', 'OPTIONS']) def index_messages(): if request.method == 'OPTIONS': return make_response('', 200) try: body = request.get_json(force=True) messages = body.get('messages', []) source = body.get('source', 'unknown') if not messages: return jsonify({'error': 'messages required'}), 400 global embeddings, meta mdl = get_model() texts = [m.get('content', '')[:2000] for m in messages] new_emb = mdl.encode(texts, normalize_embeddings=True, show_progress_bar=False, batch_size=64).astype(np.float32) start_idx = len(meta) for m in messages: meta.append({ 'id': m.get('id', ''), 'content': m.get('content', ''), 'source': source, 'author': m.get('author', ''), 'channel': m.get('channel', ''), 'server': m.get('server', ''), 'timestamp': m.get('timestamp', ''), 'link': m.get('link', ''), }) if embeddings.shape[0] == 0: embeddings = new_emb else: embeddings = np.vstack([embeddings, new_emb]) save_index() return jsonify({'indexed': len(messages), 'total': len(meta)}) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/vector/rebuild', methods=['POST', 'OPTIONS']) def rebuild(): if request.method == 'OPTIONS': return make_response('', 200) try: messages = load_messages() if not messages: return jsonify({'error': 'No messages found. Run scraper first.'}), 404 global embeddings, meta meta = [] mdl = get_model() # Filter out empty/short messages valid = [m for m in messages if len(m.get('content', '').strip()) > 10] print(f'[VectorDB] Rebuilding index from {len(valid)} valid messages...') texts = [m.get('content', '')[:2000] for m in valid] embeddings = mdl.encode(texts, normalize_embeddings=True, show_progress_bar=True, batch_size=128).astype(np.float32) for m in valid: meta.append({ 'id': m.get('id', ''), 'content': m.get('content', ''), 'source': m.get('source', ''), 'author': m.get('author', ''), 'channel': m.get('channel', ''), 'server': m.get('server', ''), 'timestamp': m.get('timestamp', ''), 'link': m.get('link', ''), }) save_index() return jsonify({'indexed': len(meta), 'total_valid': len(valid), 'total_raw': len(messages)}) except Exception as e: return jsonify({'error': str(e)}), 500 if __name__ == '__main__': load_index() port = int(os.environ.get('VECTOR_DB_PORT', 8099)) print(f'[VectorDB] Service running on port {port}') app.run(host='0.0.0.0', port=port)