From ce8d4a79a0ed2f4a1adc7ec16386d54d7b4be635 Mon Sep 17 00:00:00 2001
From: admin <admin@rommark.dev>
Date: Wed, 3 Jun 2026 11:51:48 +0000
Subject: [PATCH] Switch to async httpx for RAG searches to prevent event loop
 blocking

The multi-query vector search with blocking urllib.request.urlopen calls
was stalling the single-threaded uvicorn event loop. Now uses async
httpx.AsyncClient with asyncio.gather for parallel requests.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 wiki-vector-chat.py | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/wiki-vector-chat.py b/wiki-vector-chat.py
index b5bc0e6..9ffeb2f 100644
--- a/wiki-vector-chat.py
+++ b/wiki-vector-chat.py
@@ -125,11 +125,17 @@ async def search_kb(query: str, limit: int = 5) -> str:
         queries = list(dict.fromkeys(queries))[:2]
 
         all_results = {}
-        for q in queries:
-            url = f"{WIKI_API}/search?q={urllib.parse.quote(q)}&limit={limit}&token={_API_TOKEN}"
-            req = urllib.request.Request(url)
-            with urllib.request.urlopen(req, timeout=5) as resp:
-                data = json.loads(resp.read())
+        import httpx
+        async with httpx.AsyncClient(timeout=5) as client:
+            tasks = []
+            for q in queries:
+                url = f"{WIKI_API}/search?q={urllib.parse.quote(q)}&limit={limit}&token={_API_TOKEN}"
+                tasks.append(client.get(url))
+            responses = await asyncio.gather(*tasks, return_exceptions=True)
+            for resp in responses:
+                if isinstance(resp, Exception):
+                    continue
+                data = resp.json()
                 for r in data.get("results", []):
                     key = r.get("q", "")[:80]
                     if key not in all_results:
@@ -173,17 +179,22 @@ async def search_vector(query: str, top_k: int = 10) -> str:
         queries = list(dict.fromkeys(queries))[:4]
 
         all_hits = {}
-        for q in queries:
-            data = json.dumps({"query": q, "top_k": 30}).encode()
-            req = urllib.request.Request(
-                f"{VECTOR_DB}/vector/search",
-                data=data,
-                headers={"Content-Type": "application/json", "x-api-key": _API_TOKEN},
-            )
-            with urllib.request.urlopen(req, timeout=8) as resp:
-                result = json.loads(resp.read())
+        # Use async httpx to avoid blocking the event loop
+        import httpx
+        async with httpx.AsyncClient(timeout=10) as client:
+            tasks = []
+            for q in queries:
+                tasks.append(client.post(
+                    f"{VECTOR_DB}/vector/search",
+                    json={"query": q, "top_k": 30},
+                    headers={"Content-Type": "application/json", "x-api-key": _API_TOKEN},
+                ))
+            responses = await asyncio.gather(*tasks, return_exceptions=True)
+            for resp in responses:
+                if isinstance(resp, Exception):
+                    continue
+                result = resp.json()
                 for h in result.get("results", []):
-                    # Deduplicate by content
                     text = h.get("content", "") or h.get("text", "")
                     key = text[:80]
                     if key not in all_hits or h.get("score", 0) > all_hits[key].get("score", 0):