Initial release: Multi-provider AI chat with RAG
FastAPI backend (wiki-vector-chat.py) with Odysseus-style frontend. Features: multi-provider LLM, Wiki KB + VectorDB RAG, session history, chat modes, save-to-wiki, markdown rendering, SSE streaming. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
225
vector-db-service.py
Normal file
225
vector-db-service.py
Normal file
@@ -0,0 +1,225 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Z.ai Wiki Vector DB Service
|
||||
|
||||
Stores Discord messages as vector embeddings using sentence-transformers.
|
||||
Provides a search API for the wiki chat system to query as an additional data source.
|
||||
|
||||
Data sources:
|
||||
- Server 1346756824233148527 (Z.ai Community)
|
||||
- Server 1410352583364841555 (Z.ai Mod Server)
|
||||
- Channel 1476364011091136544 (Z.ai Mod Channel)
|
||||
|
||||
Endpoints:
|
||||
- POST /vector/search - Search with a query, return top-K matches
|
||||
- POST /vector/index - Add messages to the index
|
||||
- GET /vector/stats - Get index statistics
|
||||
- POST /vector/rebuild - Rebuild from stored messages
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import glob
|
||||
import numpy as np
|
||||
from flask import Flask, request, jsonify, make_response
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
DATA_DIR = os.environ.get('VECTOR_DB_DIR', '/opt/blog/vector-db')
|
||||
MESSAGES_FILE = os.path.join(DATA_DIR, 'messages.json')
|
||||
EMBEDDINGS_FILE = os.path.join(DATA_DIR, 'embeddings.npy')
|
||||
META_FILE = os.path.join(DATA_DIR, 'meta.json')
|
||||
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
|
||||
model = None
|
||||
embeddings = None
|
||||
meta = [] # parallel array: [{id, content, source, author, channel, server, timestamp, link}, ...]
|
||||
|
||||
|
||||
def get_model():
|
||||
global model
|
||||
if model is None:
|
||||
print('[VectorDB] Loading sentence-transformer model...')
|
||||
model = SentenceTransformer('all-MiniLM-L6-v2')
|
||||
print('[VectorDB] Model loaded')
|
||||
return model
|
||||
|
||||
|
||||
def load_index():
|
||||
global embeddings, meta
|
||||
if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(META_FILE):
|
||||
embeddings = np.load(EMBEDDINGS_FILE)
|
||||
meta = json.load(open(META_FILE))
|
||||
print(f'[VectorDB] Loaded index: {len(meta)} entries, dim={embeddings.shape[1]}')
|
||||
else:
|
||||
embeddings = np.empty((0, 384), dtype=np.float32)
|
||||
meta = []
|
||||
print('[VectorDB] No existing index, starting fresh')
|
||||
|
||||
|
||||
def save_index():
|
||||
np.save(EMBEDDINGS_FILE, embeddings)
|
||||
json.dump(meta, open(META_FILE, 'w'))
|
||||
print(f'[VectorDB] Saved index: {len(meta)} entries')
|
||||
|
||||
|
||||
def load_messages():
|
||||
"""Load raw messages from JSON files"""
|
||||
msgs = []
|
||||
for f in glob.glob(os.path.join(DATA_DIR, 'messages_*.json')):
|
||||
msgs.extend(json.load(open(f)))
|
||||
return msgs
|
||||
|
||||
|
||||
@app.after_request
|
||||
def add_cors(response):
|
||||
response.headers['Access-Control-Allow-Origin'] = '*'
|
||||
response.headers['Access-Control-Allow-Methods'] = 'POST, GET, OPTIONS'
|
||||
response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization'
|
||||
return response
|
||||
|
||||
|
||||
@app.route('/vector/stats', methods=['GET', 'OPTIONS'])
|
||||
def stats():
|
||||
if request.method == 'OPTIONS':
|
||||
return make_response('', 200)
|
||||
sources = {}
|
||||
for m in meta:
|
||||
s = m.get('source', 'unknown')
|
||||
sources[s] = sources.get(s, 0) + 1
|
||||
return jsonify({
|
||||
'total': len(meta),
|
||||
'dimension': int(embeddings.shape[1]) if embeddings.shape[0] > 0 else 0,
|
||||
'sources': sources,
|
||||
'is_indexed': len(meta) > 0,
|
||||
})
|
||||
|
||||
|
||||
@app.route('/vector/search', methods=['POST', 'OPTIONS'])
|
||||
def search():
|
||||
if request.method == 'OPTIONS':
|
||||
return make_response('', 200)
|
||||
try:
|
||||
body = request.get_json(force=True)
|
||||
query = body.get('query', '')
|
||||
top_k = min(body.get('top_k', 10), 50)
|
||||
|
||||
if not query or embeddings.shape[0] == 0:
|
||||
return jsonify({'results': [], 'query': query})
|
||||
|
||||
mdl = get_model()
|
||||
q_emb = mdl.encode([query], normalize_embeddings=True).astype(np.float32)
|
||||
|
||||
# Cosine similarity (embeddings are already normalized)
|
||||
scores = (embeddings @ q_emb.T).flatten()
|
||||
|
||||
top_idx = np.argsort(scores)[::-1][:top_k]
|
||||
|
||||
results = []
|
||||
for i in top_idx:
|
||||
if scores[i] < 0.1: # threshold
|
||||
break
|
||||
results.append({
|
||||
'content': meta[i]['content'],
|
||||
'source': meta[i].get('source', ''),
|
||||
'author': meta[i].get('author', ''),
|
||||
'channel': meta[i].get('channel', ''),
|
||||
'server': meta[i].get('server', ''),
|
||||
'timestamp': meta[i].get('timestamp', ''),
|
||||
'link': meta[i].get('link', ''),
|
||||
'score': float(scores[i]),
|
||||
})
|
||||
|
||||
return jsonify({'results': results, 'query': query, 'total': len(meta)})
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/vector/index', methods=['POST', 'OPTIONS'])
|
||||
def index_messages():
|
||||
if request.method == 'OPTIONS':
|
||||
return make_response('', 200)
|
||||
try:
|
||||
body = request.get_json(force=True)
|
||||
messages = body.get('messages', [])
|
||||
source = body.get('source', 'unknown')
|
||||
|
||||
if not messages:
|
||||
return jsonify({'error': 'messages required'}), 400
|
||||
|
||||
global embeddings, meta
|
||||
|
||||
mdl = get_model()
|
||||
texts = [m.get('content', '')[:2000] for m in messages]
|
||||
new_emb = mdl.encode(texts, normalize_embeddings=True, show_progress_bar=False,
|
||||
batch_size=64).astype(np.float32)
|
||||
|
||||
start_idx = len(meta)
|
||||
for m in messages:
|
||||
meta.append({
|
||||
'id': m.get('id', ''),
|
||||
'content': m.get('content', ''),
|
||||
'source': source,
|
||||
'author': m.get('author', ''),
|
||||
'channel': m.get('channel', ''),
|
||||
'server': m.get('server', ''),
|
||||
'timestamp': m.get('timestamp', ''),
|
||||
'link': m.get('link', ''),
|
||||
})
|
||||
|
||||
if embeddings.shape[0] == 0:
|
||||
embeddings = new_emb
|
||||
else:
|
||||
embeddings = np.vstack([embeddings, new_emb])
|
||||
|
||||
save_index()
|
||||
return jsonify({'indexed': len(messages), 'total': len(meta)})
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@app.route('/vector/rebuild', methods=['POST', 'OPTIONS'])
|
||||
def rebuild():
|
||||
if request.method == 'OPTIONS':
|
||||
return make_response('', 200)
|
||||
try:
|
||||
messages = load_messages()
|
||||
if not messages:
|
||||
return jsonify({'error': 'No messages found. Run scraper first.'}), 404
|
||||
|
||||
global embeddings, meta
|
||||
meta = []
|
||||
mdl = get_model()
|
||||
|
||||
# Filter out empty/short messages
|
||||
valid = [m for m in messages if len(m.get('content', '').strip()) > 10]
|
||||
print(f'[VectorDB] Rebuilding index from {len(valid)} valid messages...')
|
||||
|
||||
texts = [m.get('content', '')[:2000] for m in valid]
|
||||
embeddings = mdl.encode(texts, normalize_embeddings=True,
|
||||
show_progress_bar=True, batch_size=128).astype(np.float32)
|
||||
|
||||
for m in valid:
|
||||
meta.append({
|
||||
'id': m.get('id', ''),
|
||||
'content': m.get('content', ''),
|
||||
'source': m.get('source', ''),
|
||||
'author': m.get('author', ''),
|
||||
'channel': m.get('channel', ''),
|
||||
'server': m.get('server', ''),
|
||||
'timestamp': m.get('timestamp', ''),
|
||||
'link': m.get('link', ''),
|
||||
})
|
||||
|
||||
save_index()
|
||||
return jsonify({'indexed': len(meta), 'total_valid': len(valid), 'total_raw': len(messages)})
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
load_index()
|
||||
port = int(os.environ.get('VECTOR_DB_PORT', 8099))
|
||||
print(f'[VectorDB] Service running on port {port}')
|
||||
app.run(host='0.0.0.0', port=port)
|
||||
Reference in New Issue
Block a user