Files
Zportal-Wiki-VectorDB-Chat/vector-db-service.py
admin ae621ecbb5 Initial release: Multi-provider AI chat with RAG
FastAPI backend (wiki-vector-chat.py) with Odysseus-style frontend.
Features: multi-provider LLM, Wiki KB + VectorDB RAG, session history,
chat modes, save-to-wiki, markdown rendering, SSE streaming.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 10:25:29 +00:00

226 lines
7.3 KiB
Python

#!/usr/bin/env python3
"""Z.ai Wiki Vector DB Service
Stores Discord messages as vector embeddings using sentence-transformers.
Provides a search API for the wiki chat system to query as an additional data source.
Data sources:
- Server 1346756824233148527 (Z.ai Community)
- Server 1410352583364841555 (Z.ai Mod Server)
- Channel 1476364011091136544 (Z.ai Mod Channel)
Endpoints:
- POST /vector/search - Search with a query, return top-K matches
- POST /vector/index - Add messages to the index
- GET /vector/stats - Get index statistics
- POST /vector/rebuild - Rebuild from stored messages
"""
import json
import os
import glob
import numpy as np
from flask import Flask, request, jsonify, make_response
from sentence_transformers import SentenceTransformer
app = Flask(__name__)
DATA_DIR = os.environ.get('VECTOR_DB_DIR', '/opt/blog/vector-db')
MESSAGES_FILE = os.path.join(DATA_DIR, 'messages.json')
EMBEDDINGS_FILE = os.path.join(DATA_DIR, 'embeddings.npy')
META_FILE = os.path.join(DATA_DIR, 'meta.json')
os.makedirs(DATA_DIR, exist_ok=True)
model = None
embeddings = None
meta = [] # parallel array: [{id, content, source, author, channel, server, timestamp, link}, ...]
def get_model():
global model
if model is None:
print('[VectorDB] Loading sentence-transformer model...')
model = SentenceTransformer('all-MiniLM-L6-v2')
print('[VectorDB] Model loaded')
return model
def load_index():
global embeddings, meta
if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(META_FILE):
embeddings = np.load(EMBEDDINGS_FILE)
meta = json.load(open(META_FILE))
print(f'[VectorDB] Loaded index: {len(meta)} entries, dim={embeddings.shape[1]}')
else:
embeddings = np.empty((0, 384), dtype=np.float32)
meta = []
print('[VectorDB] No existing index, starting fresh')
def save_index():
np.save(EMBEDDINGS_FILE, embeddings)
json.dump(meta, open(META_FILE, 'w'))
print(f'[VectorDB] Saved index: {len(meta)} entries')
def load_messages():
"""Load raw messages from JSON files"""
msgs = []
for f in glob.glob(os.path.join(DATA_DIR, 'messages_*.json')):
msgs.extend(json.load(open(f)))
return msgs
@app.after_request
def add_cors(response):
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Methods'] = 'POST, GET, OPTIONS'
response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization'
return response
@app.route('/vector/stats', methods=['GET', 'OPTIONS'])
def stats():
if request.method == 'OPTIONS':
return make_response('', 200)
sources = {}
for m in meta:
s = m.get('source', 'unknown')
sources[s] = sources.get(s, 0) + 1
return jsonify({
'total': len(meta),
'dimension': int(embeddings.shape[1]) if embeddings.shape[0] > 0 else 0,
'sources': sources,
'is_indexed': len(meta) > 0,
})
@app.route('/vector/search', methods=['POST', 'OPTIONS'])
def search():
if request.method == 'OPTIONS':
return make_response('', 200)
try:
body = request.get_json(force=True)
query = body.get('query', '')
top_k = min(body.get('top_k', 10), 50)
if not query or embeddings.shape[0] == 0:
return jsonify({'results': [], 'query': query})
mdl = get_model()
q_emb = mdl.encode([query], normalize_embeddings=True).astype(np.float32)
# Cosine similarity (embeddings are already normalized)
scores = (embeddings @ q_emb.T).flatten()
top_idx = np.argsort(scores)[::-1][:top_k]
results = []
for i in top_idx:
if scores[i] < 0.1: # threshold
break
results.append({
'content': meta[i]['content'],
'source': meta[i].get('source', ''),
'author': meta[i].get('author', ''),
'channel': meta[i].get('channel', ''),
'server': meta[i].get('server', ''),
'timestamp': meta[i].get('timestamp', ''),
'link': meta[i].get('link', ''),
'score': float(scores[i]),
})
return jsonify({'results': results, 'query': query, 'total': len(meta)})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/vector/index', methods=['POST', 'OPTIONS'])
def index_messages():
if request.method == 'OPTIONS':
return make_response('', 200)
try:
body = request.get_json(force=True)
messages = body.get('messages', [])
source = body.get('source', 'unknown')
if not messages:
return jsonify({'error': 'messages required'}), 400
global embeddings, meta
mdl = get_model()
texts = [m.get('content', '')[:2000] for m in messages]
new_emb = mdl.encode(texts, normalize_embeddings=True, show_progress_bar=False,
batch_size=64).astype(np.float32)
start_idx = len(meta)
for m in messages:
meta.append({
'id': m.get('id', ''),
'content': m.get('content', ''),
'source': source,
'author': m.get('author', ''),
'channel': m.get('channel', ''),
'server': m.get('server', ''),
'timestamp': m.get('timestamp', ''),
'link': m.get('link', ''),
})
if embeddings.shape[0] == 0:
embeddings = new_emb
else:
embeddings = np.vstack([embeddings, new_emb])
save_index()
return jsonify({'indexed': len(messages), 'total': len(meta)})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/vector/rebuild', methods=['POST', 'OPTIONS'])
def rebuild():
if request.method == 'OPTIONS':
return make_response('', 200)
try:
messages = load_messages()
if not messages:
return jsonify({'error': 'No messages found. Run scraper first.'}), 404
global embeddings, meta
meta = []
mdl = get_model()
# Filter out empty/short messages
valid = [m for m in messages if len(m.get('content', '').strip()) > 10]
print(f'[VectorDB] Rebuilding index from {len(valid)} valid messages...')
texts = [m.get('content', '')[:2000] for m in valid]
embeddings = mdl.encode(texts, normalize_embeddings=True,
show_progress_bar=True, batch_size=128).astype(np.float32)
for m in valid:
meta.append({
'id': m.get('id', ''),
'content': m.get('content', ''),
'source': m.get('source', ''),
'author': m.get('author', ''),
'channel': m.get('channel', ''),
'server': m.get('server', ''),
'timestamp': m.get('timestamp', ''),
'link': m.get('link', ''),
})
save_index()
return jsonify({'indexed': len(meta), 'total_valid': len(valid), 'total_raw': len(messages)})
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
load_index()
port = int(os.environ.get('VECTOR_DB_PORT', 8099))
print(f'[VectorDB] Service running on port {port}')
app.run(host='0.0.0.0', port=port)