FastAPI backend (wiki-vector-chat.py) with Odysseus-style frontend. Features: multi-provider LLM, Wiki KB + VectorDB RAG, session history, chat modes, save-to-wiki, markdown rendering, SSE streaming. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
226 lines
7.3 KiB
Python
226 lines
7.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Z.ai Wiki Vector DB Service
|
|
|
|
Stores Discord messages as vector embeddings using sentence-transformers.
|
|
Provides a search API for the wiki chat system to query as an additional data source.
|
|
|
|
Data sources:
|
|
- Server 1346756824233148527 (Z.ai Community)
|
|
- Server 1410352583364841555 (Z.ai Mod Server)
|
|
- Channel 1476364011091136544 (Z.ai Mod Channel)
|
|
|
|
Endpoints:
|
|
- POST /vector/search - Search with a query, return top-K matches
|
|
- POST /vector/index - Add messages to the index
|
|
- GET /vector/stats - Get index statistics
|
|
- POST /vector/rebuild - Rebuild from stored messages
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import glob
|
|
import numpy as np
|
|
from flask import Flask, request, jsonify, make_response
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
app = Flask(__name__)
|
|
|
|
DATA_DIR = os.environ.get('VECTOR_DB_DIR', '/opt/blog/vector-db')
|
|
MESSAGES_FILE = os.path.join(DATA_DIR, 'messages.json')
|
|
EMBEDDINGS_FILE = os.path.join(DATA_DIR, 'embeddings.npy')
|
|
META_FILE = os.path.join(DATA_DIR, 'meta.json')
|
|
|
|
os.makedirs(DATA_DIR, exist_ok=True)
|
|
|
|
model = None
|
|
embeddings = None
|
|
meta = [] # parallel array: [{id, content, source, author, channel, server, timestamp, link}, ...]
|
|
|
|
|
|
def get_model():
|
|
global model
|
|
if model is None:
|
|
print('[VectorDB] Loading sentence-transformer model...')
|
|
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
print('[VectorDB] Model loaded')
|
|
return model
|
|
|
|
|
|
def load_index():
|
|
global embeddings, meta
|
|
if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(META_FILE):
|
|
embeddings = np.load(EMBEDDINGS_FILE)
|
|
meta = json.load(open(META_FILE))
|
|
print(f'[VectorDB] Loaded index: {len(meta)} entries, dim={embeddings.shape[1]}')
|
|
else:
|
|
embeddings = np.empty((0, 384), dtype=np.float32)
|
|
meta = []
|
|
print('[VectorDB] No existing index, starting fresh')
|
|
|
|
|
|
def save_index():
|
|
np.save(EMBEDDINGS_FILE, embeddings)
|
|
json.dump(meta, open(META_FILE, 'w'))
|
|
print(f'[VectorDB] Saved index: {len(meta)} entries')
|
|
|
|
|
|
def load_messages():
|
|
"""Load raw messages from JSON files"""
|
|
msgs = []
|
|
for f in glob.glob(os.path.join(DATA_DIR, 'messages_*.json')):
|
|
msgs.extend(json.load(open(f)))
|
|
return msgs
|
|
|
|
|
|
@app.after_request
|
|
def add_cors(response):
|
|
response.headers['Access-Control-Allow-Origin'] = '*'
|
|
response.headers['Access-Control-Allow-Methods'] = 'POST, GET, OPTIONS'
|
|
response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization'
|
|
return response
|
|
|
|
|
|
@app.route('/vector/stats', methods=['GET', 'OPTIONS'])
|
|
def stats():
|
|
if request.method == 'OPTIONS':
|
|
return make_response('', 200)
|
|
sources = {}
|
|
for m in meta:
|
|
s = m.get('source', 'unknown')
|
|
sources[s] = sources.get(s, 0) + 1
|
|
return jsonify({
|
|
'total': len(meta),
|
|
'dimension': int(embeddings.shape[1]) if embeddings.shape[0] > 0 else 0,
|
|
'sources': sources,
|
|
'is_indexed': len(meta) > 0,
|
|
})
|
|
|
|
|
|
@app.route('/vector/search', methods=['POST', 'OPTIONS'])
|
|
def search():
|
|
if request.method == 'OPTIONS':
|
|
return make_response('', 200)
|
|
try:
|
|
body = request.get_json(force=True)
|
|
query = body.get('query', '')
|
|
top_k = min(body.get('top_k', 10), 50)
|
|
|
|
if not query or embeddings.shape[0] == 0:
|
|
return jsonify({'results': [], 'query': query})
|
|
|
|
mdl = get_model()
|
|
q_emb = mdl.encode([query], normalize_embeddings=True).astype(np.float32)
|
|
|
|
# Cosine similarity (embeddings are already normalized)
|
|
scores = (embeddings @ q_emb.T).flatten()
|
|
|
|
top_idx = np.argsort(scores)[::-1][:top_k]
|
|
|
|
results = []
|
|
for i in top_idx:
|
|
if scores[i] < 0.1: # threshold
|
|
break
|
|
results.append({
|
|
'content': meta[i]['content'],
|
|
'source': meta[i].get('source', ''),
|
|
'author': meta[i].get('author', ''),
|
|
'channel': meta[i].get('channel', ''),
|
|
'server': meta[i].get('server', ''),
|
|
'timestamp': meta[i].get('timestamp', ''),
|
|
'link': meta[i].get('link', ''),
|
|
'score': float(scores[i]),
|
|
})
|
|
|
|
return jsonify({'results': results, 'query': query, 'total': len(meta)})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
@app.route('/vector/index', methods=['POST', 'OPTIONS'])
|
|
def index_messages():
|
|
if request.method == 'OPTIONS':
|
|
return make_response('', 200)
|
|
try:
|
|
body = request.get_json(force=True)
|
|
messages = body.get('messages', [])
|
|
source = body.get('source', 'unknown')
|
|
|
|
if not messages:
|
|
return jsonify({'error': 'messages required'}), 400
|
|
|
|
global embeddings, meta
|
|
|
|
mdl = get_model()
|
|
texts = [m.get('content', '')[:2000] for m in messages]
|
|
new_emb = mdl.encode(texts, normalize_embeddings=True, show_progress_bar=False,
|
|
batch_size=64).astype(np.float32)
|
|
|
|
start_idx = len(meta)
|
|
for m in messages:
|
|
meta.append({
|
|
'id': m.get('id', ''),
|
|
'content': m.get('content', ''),
|
|
'source': source,
|
|
'author': m.get('author', ''),
|
|
'channel': m.get('channel', ''),
|
|
'server': m.get('server', ''),
|
|
'timestamp': m.get('timestamp', ''),
|
|
'link': m.get('link', ''),
|
|
})
|
|
|
|
if embeddings.shape[0] == 0:
|
|
embeddings = new_emb
|
|
else:
|
|
embeddings = np.vstack([embeddings, new_emb])
|
|
|
|
save_index()
|
|
return jsonify({'indexed': len(messages), 'total': len(meta)})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
@app.route('/vector/rebuild', methods=['POST', 'OPTIONS'])
|
|
def rebuild():
|
|
if request.method == 'OPTIONS':
|
|
return make_response('', 200)
|
|
try:
|
|
messages = load_messages()
|
|
if not messages:
|
|
return jsonify({'error': 'No messages found. Run scraper first.'}), 404
|
|
|
|
global embeddings, meta
|
|
meta = []
|
|
mdl = get_model()
|
|
|
|
# Filter out empty/short messages
|
|
valid = [m for m in messages if len(m.get('content', '').strip()) > 10]
|
|
print(f'[VectorDB] Rebuilding index from {len(valid)} valid messages...')
|
|
|
|
texts = [m.get('content', '')[:2000] for m in valid]
|
|
embeddings = mdl.encode(texts, normalize_embeddings=True,
|
|
show_progress_bar=True, batch_size=128).astype(np.float32)
|
|
|
|
for m in valid:
|
|
meta.append({
|
|
'id': m.get('id', ''),
|
|
'content': m.get('content', ''),
|
|
'source': m.get('source', ''),
|
|
'author': m.get('author', ''),
|
|
'channel': m.get('channel', ''),
|
|
'server': m.get('server', ''),
|
|
'timestamp': m.get('timestamp', ''),
|
|
'link': m.get('link', ''),
|
|
})
|
|
|
|
save_index()
|
|
return jsonify({'indexed': len(meta), 'total_valid': len(valid), 'total_raw': len(messages)})
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
if __name__ == '__main__':
|
|
load_index()
|
|
port = int(os.environ.get('VECTOR_DB_PORT', 8099))
|
|
print(f'[VectorDB] Service running on port {port}')
|
|
app.run(host='0.0.0.0', port=port)
|