Files
mantle-ai-trader/skills/quiz-mastery/src/quiz_mastery/quiz_extractor.py
2026-06-06 05:21:10 +00:00

116 lines
3.5 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import json
from .models import Question
def build_extraction_prompt(content: str) -> dict:
"""Build a prompt for LLM to parse questions from a question file.
Args:
content: Raw text content containing questions.
Returns:
dict with 'system_prompt' and 'user_prompt' keys.
"""
system_prompt = (
"你是一个专业的题目解析助手。从用户提供的题目文件中识别并解析所有题目。\n"
"严格按照要求的 JSON 格式输出,不要输出任何其他内容。"
)
user_prompt = f"""请从以下题目文件内容中解析出所有题目。
## 题目文件内容
{content}
## 解析要求
1. 识别每道题的类型:
- single_choice: 选择题(有 A/B/C/D 等选项)
- true_false: 判断题(判断对错)
- fill_blank: 填空题(有空格需要填写)
- short_answer: 简答题(需要文字回答)
2. 提取题目的所有信息
3. 如果题目有答案和解析,也一并提取
4. 为每道题分配唯一 ID
## 输出格式
输出纯 JSON 数组,每个元素格式如下:
```json
[
{{
"id": "q_001",
"knowledge_point_ids": [],
"level": 1,
"type": "single_choice",
"prompt": "题目内容",
"options": ["A. 选项一", "B. 选项二", "C. 选项三", "D. 选项四"],
"answer": "A",
"explanation": "解析内容(如果有)"
}}
]
```
注意:
- type 必须是 single_choice, true_false, fill_blank, short_answer 之一
- 选择题的 answer 填选项字母A/B/C/D
- 判断题的 answer 填 "True""False"
- 填空题的 answer 填正确答案文本
- 简答题的 answer 填参考答案(如果有)
- 如果无法确定 knowledge_point_ids留空数组
- level 默认为 1如果能从题目难度判断则相应调整
请直接输出 JSON 数组,不要包含 markdown 代码块标记或其他文字。"""
return {
"system_prompt": system_prompt,
"user_prompt": user_prompt,
}
def parse_questions_json(json_str: str) -> list[Question]:
"""Parse LLM-returned JSON string into a list of Question objects.
Args:
json_str: JSON string containing a list of question dicts.
Returns:
List of Question objects.
Raises:
json.JSONDecodeError: If json_str is not valid JSON.
ValueError: If the parsed data is not a list.
"""
# Try to extract JSON from possible markdown code blocks
cleaned = json_str.strip()
if cleaned.startswith("```"):
# Remove markdown code block markers
lines = cleaned.split("\n")
# Remove first line (```json or ```)
lines = lines[1:]
# Remove last line (```)
if lines and lines[-1].strip() == "```":
lines = lines[:-1]
cleaned = "\n".join(lines)
data = json.loads(cleaned)
if not isinstance(data, list):
raise ValueError(f"Expected a JSON array, got {type(data).__name__}")
questions: list[Question] = []
for item in data:
q = Question(
id=item.get("id", "q_unknown"),
knowledge_point_ids=item.get("knowledge_point_ids", []),
level=item.get("level", 1),
type=item.get("type", "single_choice"),
prompt=item.get("prompt", ""),
options=item.get("options", []),
answer=item.get("answer"),
explanation=item.get("explanation", ""),
source_refs=item.get("source_refs", []),
)
questions.append(q)
return questions