Initial commit

2026-06-06 05:21:10 +00:00
commit 6664758a6d
493 changed files with 135653 additions and 0 deletions
--- a/skills/quiz-mastery/src/quiz_mastery/quiz_extractor.py
+++ b/skills/quiz-mastery/src/quiz_mastery/quiz_extractor.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+import json
+from .models import Question
+
+
+def build_extraction_prompt(content: str) -> dict:
+    """Build a prompt for LLM to parse questions from a question file.
+
+    Args:
+        content: Raw text content containing questions.
+
+    Returns:
+        dict with 'system_prompt' and 'user_prompt' keys.
+    """
+    system_prompt = (
+        "你是一个专业的题目解析助手。从用户提供的题目文件中识别并解析所有题目。\n"
+        "严格按照要求的 JSON 格式输出，不要输出任何其他内容。"
+    )
+
+    user_prompt = f"""请从以下题目文件内容中解析出所有题目。
+
+## 题目文件内容
+
+{content}
+
+## 解析要求
+1. 识别每道题的类型：
+   - single_choice: 选择题（有 A/B/C/D 等选项）
+   - true_false: 判断题（判断对错）
+   - fill_blank: 填空题（有空格需要填写）
+   - short_answer: 简答题（需要文字回答）
+2. 提取题目的所有信息
+3. 如果题目有答案和解析，也一并提取
+4. 为每道题分配唯一 ID
+
+## 输出格式
+输出纯 JSON 数组，每个元素格式如下：
+```json
+[
+  {{
+    "id": "q_001",
+    "knowledge_point_ids": [],
+    "level": 1,
+    "type": "single_choice",
+    "prompt": "题目内容",
+    "options": ["A. 选项一", "B. 选项二", "C. 选项三", "D. 选项四"],
+    "answer": "A",
+    "explanation": "解析内容（如果有）"
+  }}
+]
+```
+
+注意：
+- type 必须是 single_choice, true_false, fill_blank, short_answer 之一
+- 选择题的 answer 填选项字母（A/B/C/D）
+- 判断题的 answer 填 "True" 或 "False"
+- 填空题的 answer 填正确答案文本
+- 简答题的 answer 填参考答案（如果有）
+- 如果无法确定 knowledge_point_ids，留空数组
+- level 默认为 1，如果能从题目难度判断则相应调整
+
+请直接输出 JSON 数组，不要包含 markdown 代码块标记或其他文字。"""
+
+    return {
+        "system_prompt": system_prompt,
+        "user_prompt": user_prompt,
+    }
+
+
+def parse_questions_json(json_str: str) -> list[Question]:
+    """Parse LLM-returned JSON string into a list of Question objects.
+
+    Args:
+        json_str: JSON string containing a list of question dicts.
+
+    Returns:
+        List of Question objects.
+
+    Raises:
+        json.JSONDecodeError: If json_str is not valid JSON.
+        ValueError: If the parsed data is not a list.
+    """
+    # Try to extract JSON from possible markdown code blocks
+    cleaned = json_str.strip()
+    if cleaned.startswith("```"):
+        # Remove markdown code block markers
+        lines = cleaned.split("\n")
+        # Remove first line (```json or ```)
+        lines = lines[1:]
+        # Remove last line (```)
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        cleaned = "\n".join(lines)
+
+    data = json.loads(cleaned)
+    if not isinstance(data, list):
+        raise ValueError(f"Expected a JSON array, got {type(data).__name__}")
+
+    questions: list[Question] = []
+    for item in data:
+        q = Question(
+            id=item.get("id", "q_unknown"),
+            knowledge_point_ids=item.get("knowledge_point_ids", []),
+            level=item.get("level", 1),
+            type=item.get("type", "single_choice"),
+            prompt=item.get("prompt", ""),
+            options=item.get("options", []),
+            answer=item.get("answer"),
+            explanation=item.get("explanation", ""),
+            source_refs=item.get("source_refs", []),
+        )
+        questions.append(q)
+
+    return questions