Initial commit
This commit is contained in:
115
skills/quiz-mastery/src/quiz_mastery/quiz_extractor.py
Executable file
115
skills/quiz-mastery/src/quiz_mastery/quiz_extractor.py
Executable file
@@ -0,0 +1,115 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from .models import Question
|
||||
|
||||
|
||||
def build_extraction_prompt(content: str) -> dict:
|
||||
"""Build a prompt for LLM to parse questions from a question file.
|
||||
|
||||
Args:
|
||||
content: Raw text content containing questions.
|
||||
|
||||
Returns:
|
||||
dict with 'system_prompt' and 'user_prompt' keys.
|
||||
"""
|
||||
system_prompt = (
|
||||
"你是一个专业的题目解析助手。从用户提供的题目文件中识别并解析所有题目。\n"
|
||||
"严格按照要求的 JSON 格式输出,不要输出任何其他内容。"
|
||||
)
|
||||
|
||||
user_prompt = f"""请从以下题目文件内容中解析出所有题目。
|
||||
|
||||
## 题目文件内容
|
||||
|
||||
{content}
|
||||
|
||||
## 解析要求
|
||||
1. 识别每道题的类型:
|
||||
- single_choice: 选择题(有 A/B/C/D 等选项)
|
||||
- true_false: 判断题(判断对错)
|
||||
- fill_blank: 填空题(有空格需要填写)
|
||||
- short_answer: 简答题(需要文字回答)
|
||||
2. 提取题目的所有信息
|
||||
3. 如果题目有答案和解析,也一并提取
|
||||
4. 为每道题分配唯一 ID
|
||||
|
||||
## 输出格式
|
||||
输出纯 JSON 数组,每个元素格式如下:
|
||||
```json
|
||||
[
|
||||
{{
|
||||
"id": "q_001",
|
||||
"knowledge_point_ids": [],
|
||||
"level": 1,
|
||||
"type": "single_choice",
|
||||
"prompt": "题目内容",
|
||||
"options": ["A. 选项一", "B. 选项二", "C. 选项三", "D. 选项四"],
|
||||
"answer": "A",
|
||||
"explanation": "解析内容(如果有)"
|
||||
}}
|
||||
]
|
||||
```
|
||||
|
||||
注意:
|
||||
- type 必须是 single_choice, true_false, fill_blank, short_answer 之一
|
||||
- 选择题的 answer 填选项字母(A/B/C/D)
|
||||
- 判断题的 answer 填 "True" 或 "False"
|
||||
- 填空题的 answer 填正确答案文本
|
||||
- 简答题的 answer 填参考答案(如果有)
|
||||
- 如果无法确定 knowledge_point_ids,留空数组
|
||||
- level 默认为 1,如果能从题目难度判断则相应调整
|
||||
|
||||
请直接输出 JSON 数组,不要包含 markdown 代码块标记或其他文字。"""
|
||||
|
||||
return {
|
||||
"system_prompt": system_prompt,
|
||||
"user_prompt": user_prompt,
|
||||
}
|
||||
|
||||
|
||||
def parse_questions_json(json_str: str) -> list[Question]:
|
||||
"""Parse LLM-returned JSON string into a list of Question objects.
|
||||
|
||||
Args:
|
||||
json_str: JSON string containing a list of question dicts.
|
||||
|
||||
Returns:
|
||||
List of Question objects.
|
||||
|
||||
Raises:
|
||||
json.JSONDecodeError: If json_str is not valid JSON.
|
||||
ValueError: If the parsed data is not a list.
|
||||
"""
|
||||
# Try to extract JSON from possible markdown code blocks
|
||||
cleaned = json_str.strip()
|
||||
if cleaned.startswith("```"):
|
||||
# Remove markdown code block markers
|
||||
lines = cleaned.split("\n")
|
||||
# Remove first line (```json or ```)
|
||||
lines = lines[1:]
|
||||
# Remove last line (```)
|
||||
if lines and lines[-1].strip() == "```":
|
||||
lines = lines[:-1]
|
||||
cleaned = "\n".join(lines)
|
||||
|
||||
data = json.loads(cleaned)
|
||||
if not isinstance(data, list):
|
||||
raise ValueError(f"Expected a JSON array, got {type(data).__name__}")
|
||||
|
||||
questions: list[Question] = []
|
||||
for item in data:
|
||||
q = Question(
|
||||
id=item.get("id", "q_unknown"),
|
||||
knowledge_point_ids=item.get("knowledge_point_ids", []),
|
||||
level=item.get("level", 1),
|
||||
type=item.get("type", "single_choice"),
|
||||
prompt=item.get("prompt", ""),
|
||||
options=item.get("options", []),
|
||||
answer=item.get("answer"),
|
||||
explanation=item.get("explanation", ""),
|
||||
source_refs=item.get("source_refs", []),
|
||||
)
|
||||
questions.append(q)
|
||||
|
||||
return questions
|
||||
Reference in New Issue
Block a user