Switch to async httpx for RAG searches to prevent event loop blocking

The multi-query vector search with blocking urllib.request.urlopen calls
was stalling the single-threaded uvicorn event loop. Now uses async
httpx.AsyncClient with asyncio.gather for parallel requests.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
admin
2026-06-03 11:51:48 +00:00
Unverified
parent 6271f3726e
commit ce8d4a79a0

View File

@@ -125,11 +125,17 @@ async def search_kb(query: str, limit: int = 5) -> str:
queries = list(dict.fromkeys(queries))[:2] queries = list(dict.fromkeys(queries))[:2]
all_results = {} all_results = {}
import httpx
async with httpx.AsyncClient(timeout=5) as client:
tasks = []
for q in queries: for q in queries:
url = f"{WIKI_API}/search?q={urllib.parse.quote(q)}&limit={limit}&token={_API_TOKEN}" url = f"{WIKI_API}/search?q={urllib.parse.quote(q)}&limit={limit}&token={_API_TOKEN}"
req = urllib.request.Request(url) tasks.append(client.get(url))
with urllib.request.urlopen(req, timeout=5) as resp: responses = await asyncio.gather(*tasks, return_exceptions=True)
data = json.loads(resp.read()) for resp in responses:
if isinstance(resp, Exception):
continue
data = resp.json()
for r in data.get("results", []): for r in data.get("results", []):
key = r.get("q", "")[:80] key = r.get("q", "")[:80]
if key not in all_results: if key not in all_results:
@@ -173,17 +179,22 @@ async def search_vector(query: str, top_k: int = 10) -> str:
queries = list(dict.fromkeys(queries))[:4] queries = list(dict.fromkeys(queries))[:4]
all_hits = {} all_hits = {}
# Use async httpx to avoid blocking the event loop
import httpx
async with httpx.AsyncClient(timeout=10) as client:
tasks = []
for q in queries: for q in queries:
data = json.dumps({"query": q, "top_k": 30}).encode() tasks.append(client.post(
req = urllib.request.Request(
f"{VECTOR_DB}/vector/search", f"{VECTOR_DB}/vector/search",
data=data, json={"query": q, "top_k": 30},
headers={"Content-Type": "application/json", "x-api-key": _API_TOKEN}, headers={"Content-Type": "application/json", "x-api-key": _API_TOKEN},
) ))
with urllib.request.urlopen(req, timeout=8) as resp: responses = await asyncio.gather(*tasks, return_exceptions=True)
result = json.loads(resp.read()) for resp in responses:
if isinstance(resp, Exception):
continue
result = resp.json()
for h in result.get("results", []): for h in result.get("results", []):
# Deduplicate by content
text = h.get("content", "") or h.get("text", "") text = h.get("content", "") or h.get("text", "")
key = text[:80] key = text[:80]
if key not in all_hits or h.get("score", 0) > all_hits[key].get("score", 0): if key not in all_hits or h.get("score", 0) > all_hits[key].get("score", 0):