Initial commit

2026-06-06 05:21:10 +00:00
commit 6664758a6d
493 changed files with 135653 additions and 0 deletions
--- a/skills/jd-resume-tailor/scripts/jd_gap.py
+++ b/skills/jd-resume-tailor/scripts/jd_gap.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+"""
+jd_gap.py — 把 parse_jd.py 的 JSON 与简历文本做 gap 分析
+
+用法：
+    python jd_gap.py --jd jd_parsed.json --resume resume.md --out gap.md
+
+输出 markdown 报告：完美命中 / 隐性命中 / 真缺口 三类，附改写建议。
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+
+
+def load_resume_text(path: Path) -> str:
+    suffix = path.suffix.lower()
+    if suffix in {".md", ".txt"}:
+        return path.read_text(encoding="utf-8")
+    if suffix == ".docx":
+        try:
+            from docx import Document
+        except ImportError:
+            print(
+                "✗ 缺少 python-docx：pip install python-docx --break-system-packages",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        doc = Document(str(path))
+        return "\n".join(p.text for p in doc.paragraphs)
+    print(f"✗ 暂不支持的格式：{suffix}", file=sys.stderr)
+    sys.exit(1)
+
+
+def find_evidence(resume_text: str, keyword: str, window: int = 30) -> str | None:
+    """在简历里找关键词，返回上下文片段；找不到返回 None。"""
+    pattern = re.escape(keyword)
+    m = re.search(pattern, resume_text, flags=re.IGNORECASE)
+    if not m:
+        return None
+    start = max(0, m.start() - window)
+    end = min(len(resume_text), m.end() + window)
+    snippet = resume_text[start:end].replace("\n", " ").strip()
+    return snippet
+
+
+def fuzzy_hit(resume_text: str, keyword: str) -> str | None:
+    """模糊命中：取关键词的中文 / 英文核心，做包含匹配。"""
+    # 拿前 2 个字 / 前 5 个字符
+    candidates = []
+    if re.search(r"[一-龥]", keyword):
+        if len(keyword) >= 4:
+            candidates.append(keyword[:2])
+            candidates.append(keyword[-2:])
+    else:
+        if len(keyword) >= 4:
+            candidates.append(keyword[:4].lower())
+    text_low = resume_text.lower()
+    for c in candidates:
+        if c and c in text_low:
+            return c
+    return None
+
+
+def analyze(jd: dict, resume_text: str) -> dict:
+    perfect, implicit, missing = [], [], []
+
+    # 用 must_have 句子里抽出来的 skills 作为对比项
+    candidates = jd.get("skills_extracted", []) + jd.get("must_have", [])
+
+    seen = set()
+    for c in candidates:
+        # 句子层面太长，截短
+        keyword = c.strip()
+        if len(keyword) > 30:
+            # 从长句子里抽更短的关键词
+            short_tokens = re.findall(
+                r"[A-Za-z][A-Za-z0-9+/.\-_]{1,20}|[一-龥]{2,6}",
+                keyword,
+            )
+            for t in short_tokens:
+                if t.lower() not in seen:
+                    seen.add(t.lower())
+                    process_one(t, resume_text, perfect, implicit, missing)
+        else:
+            if keyword.lower() not in seen:
+                seen.add(keyword.lower())
+                process_one(keyword, resume_text, perfect, implicit, missing)
+
+    return {"perfect": perfect, "implicit": implicit, "missing": missing}
+
+
+def process_one(keyword, resume_text, perfect, implicit, missing):
+    ev = find_evidence(resume_text, keyword)
+    if ev:
+        perfect.append({"keyword": keyword, "evidence": ev})
+        return
+    fuzzy = fuzzy_hit(resume_text, keyword)
+    if fuzzy:
+        implicit.append({"keyword": keyword, "fuzzy_match": fuzzy})
+    else:
+        missing.append(keyword)
+
+
+def render(jd: dict, gap: dict) -> str:
+    lines = ["# JD ⇄ Resume Gap 分析报告", ""]
+
+    spec = jd.get("special_requirements", {})
+    if spec:
+        lines += ["## JD 硬条件", ""]
+        for k, v in spec.items():
+            lines.append(f"- **{k}**：{v}")
+        lines.append("")
+
+    lines += ["## ✅ 完美命中（简历里有明确证据）", ""]
+    if gap["perfect"]:
+        for item in gap["perfect"][:30]:
+            lines.append(f"- **{item['keyword']}** —— 证据：`...{item['evidence']}...`")
+    else:
+        lines.append("（无）")
+    lines.append("")
+
+    lines += ["## 🟡 隐性命中（简历里有近似但用词不同，建议改写时对齐）", ""]
+    if gap["implicit"]:
+        for item in gap["implicit"][:20]:
+            lines.append(f"- **JD 关键词：{item['keyword']}**（简历里出现：`{item['fuzzy_match']}`）")
+    else:
+        lines.append("（无）")
+    lines.append("")
+
+    lines += ["## 🔴 真缺口（简历完全没有，需要确认 / 补充 / 转换叙事）", ""]
+    if gap["missing"]:
+        for kw in gap["missing"][:30]:
+            lines.append(f"- **{kw}**")
+    else:
+        lines.append("（无）")
+    lines.append("")
+
+    lines += [
+        "---",
+        "## 改写建议",
+        "",
+        "1. **完美命中** 的部分保留，但确保措辞与 JD 一致（比如 JD 用『A/B 测试』就别写『AB 实验』）",
+        "2. **隐性命中** 是性价比最高的优化点 —— 把简历里的近义词改成 JD 的措辞",
+        "3. **真缺口** 分两类：",
+        "   - 你做过但没写？→ 补到对应经历的 bullet 里",
+        "   - 你没做过？→ **不要编造**。可以在 cover letter / Summary 里诚实说明 transferable skill",
+        "4. 把改后的简历再跑一次 ats_check.py 看命中率是否提升",
+    ]
+    return "\n".join(lines)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--jd", required=True, help="parse_jd.py 输出的 json")
+    parser.add_argument("--resume", required=True, help="简历文件 (.md/.txt/.docx)")
+    parser.add_argument("--out", help="输出 markdown 路径")
+    args = parser.parse_args()
+
+    jd = json.loads(Path(args.jd).read_text(encoding="utf-8"))
+    resume_text = load_resume_text(Path(args.resume))
+    gap = analyze(jd, resume_text)
+    report = render(jd, gap)
+
+    if args.out:
+        Path(args.out).write_text(report, encoding="utf-8")
+        print(f"✓ Gap 报告已生成：{args.out}")
+    else:
+        print(report)
+
+
+if __name__ == "__main__":
+    main()
--- a/skills/jd-resume-tailor/scripts/parse_jd.py
+++ b/skills/jd-resume-tailor/scripts/parse_jd.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+"""
+parse_jd.py — 解析 JD 文本，抽取 must-have / nice-to-have / 职责 / 特殊要求
+
+用法：
+    python parse_jd.py --jd-file jd.txt --out jd_parsed.json
+    python parse_jd.py --jd-text "..." --out jd_parsed.json
+
+输出 JSON 结构供下一步的 jd_gap.py 使用，也可以直接给用户看。
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+
+MUST_PATTERNS = [
+    r"必须", r"必备", r"必要条件", r"硬性要求", r"应当", r"需要", r"需具备",
+    r"至少\s*\d+\s*年", r"\d+\+?\s*年以上",
+    r"required", r"must\s*have", r"mandatory", r"essential",
+    r"minimum\s+\d+\s+years",
+]
+NICE_PATTERNS = [
+    r"加分", r"优先", r"加分项", r"熟悉.+者优先", r"有.+经验者优先",
+    r"preferred", r"nice\s*to\s*have", r"plus", r"bonus", r"desirable",
+]
+ACTION_VERBS = [
+    "负责", "主导", "推动", "设计", "搭建", "构建", "优化", "规划", "迭代",
+    "孵化", "复盘", "运营", "管理", "协调", "执行", "驱动", "落地", "重构",
+    "lead", "drive", "build", "design", "architect", "develop", "implement",
+    "optimize", "manage", "coordinate", "execute", "own",
+]
+
+
+def split_sentences(text: str) -> list[str]:
+    # 中文按 。！？; 拆，英文按 . ; 拆，并保留 bullet 行
+    raw = re.split(r"[。！？!?；;\n]+", text)
+    return [s.strip(" \t-•·*") for s in raw if s.strip()]
+
+
+def classify_sentences(sentences: list[str]) -> dict:
+    must, nice, resp, others = [], [], [], []
+    for s in sentences:
+        s_low = s.lower()
+        if any(re.search(p, s_low) for p in NICE_PATTERNS):
+            nice.append(s)
+        elif any(re.search(p, s_low) for p in MUST_PATTERNS):
+            must.append(s)
+        elif any(v in s_low for v in ACTION_VERBS):
+            resp.append(s)
+        else:
+            others.append(s)
+    return {"must": must, "nice": nice, "responsibilities": resp, "others": others}
+
+
+def extract_special(text: str) -> dict:
+    out: dict[str, str] = {}
+
+    # 学历
+    edu = re.search(r"(本科|硕士|博士|大专)(?:及以上|以上)?", text)
+    if edu:
+        out["education"] = edu.group(0)
+
+    # 工作年限
+    years = re.search(r"(\d+)\s*[-~–到至]\s*(\d+)\s*年|(\d+)\s*\+?\s*年(以上|及以上)?", text)
+    if years:
+        out["years"] = years.group(0)
+
+    # 语言
+    lang_pat = re.search(
+        r"(英语\s*(口语)?\s*(流利|熟练|母语)|CET[-\s]?[46]|雅思\s*\d(\.\d)?|托福\s*\d{2,3}|母语水平|business\s*english)",
+        text,
+        flags=re.IGNORECASE,
+    )
+    if lang_pat:
+        out["language"] = lang_pat.group(0)
+
+    # 城市
+    cities = re.findall(
+        r"(北京|上海|广州|深圳|杭州|南京|苏州|成都|武汉|西安|香港|新加坡|remote|hybrid|远程|海外)",
+        text,
+        flags=re.IGNORECASE,
+    )
+    if cities:
+        out["location"] = "/".join(sorted(set(c.lower() for c in cities)))
+
+    # 出差 / 加班信号
+    travel = re.search(r"(出差|派驻|常驻|项目制|加班|999|996|大小周)", text)
+    if travel:
+        out["working_style"] = travel.group(0)
+
+    # 证书
+    certs = re.findall(
+        r"(CFA(?:\s*Level\s*[I123]+)?|CPA|FRM|ACCA|PMP|AWS\s*[\w\s]*认证|Azure\s*[\w]*|GCP\s*[\w]*)",
+        text,
+        flags=re.IGNORECASE,
+    )
+    if certs:
+        out["certificates"] = "/".join(sorted(set(c.strip() for c in certs)))
+
+    return out
+
+
+def extract_skills(sentences: list[str]) -> list[str]:
+    """从所有句子里抽取技能词候选（短词优先，避免抽出整句）。"""
+    text = " ".join(sentences)
+    # 英文技能（CamelCase 或大写开头的词、含 . 或 +/- 的标识）
+    en = re.findall(r"\b[A-Za-z][A-Za-z0-9+/.\-_#]{1,20}\b", text)
+    # 中文 2~5 字常见技能词
+    zh = re.findall(r"[一-龥]{2,5}", text)
+    raw = en + zh
+
+    stop = {
+        # 中文虚词 / 通用动词
+        "公司", "我们", "你将", "团队", "需要", "能够", "具备", "熟悉", "了解",
+        "良好", "优秀", "经验", "能力", "岗位", "职责", "要求", "以上", "相关",
+        "进行", "完成", "负责", "推动", "实现", "提升", "并且", "包括", "以下",
+        "工作", "项目", "业务", "及其", "或者", "或", "与", "及", "的", "了",
+        "并", "对", "在", "等", "以", "等等", "通过", "将", "其", "之", "至",
+        "至上", "本科", "硕士", "博士", "者优先", "根据", "进行", "支持", "参与",
+        "主导", "提供", "建立", "搭建", "设计", "驱动", "决策", "分析", "推动",
+        "迭代", "规划", "运营", "协作", "跨部门", "跨团队", "高级", "资深",
+        "若干", "多种", "多元", "多类",
+        # 英文虚词
+        "and", "the", "with", "for", "of", "or", "to", "be", "as", "an", "is",
+        "are", "in", "on", "at", "by", "all", "you", "we", "us", "our", "your",
+        "a", "an", "this", "that", "these", "those", "it", "its",
+    }
+    # 含数字的"X年""X个"也过滤
+    digit_only = re.compile(r"^\d+$")
+
+    seen = set()
+    out = []
+    for token in raw:
+        key = token.lower()
+        if key in seen or token in stop or len(token) < 2 or digit_only.match(token):
+            continue
+        # 中文 token 不允许全是 stop 词的子串
+        seen.add(key)
+        out.append(token)
+    return out[:60]
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    src = parser.add_mutually_exclusive_group(required=True)
+    src.add_argument("--jd-file", help="JD 文本文件路径")
+    src.add_argument("--jd-text", help="直接传 JD 文本")
+    parser.add_argument("--out", help="输出 JSON 路径，缺省打印")
+    args = parser.parse_args()
+
+    if args.jd_file:
+        path = Path(args.jd_file).expanduser()
+        if not path.exists():
+            print(f"✗ JD 文件不存在：{path}", file=sys.stderr)
+            sys.exit(1)
+        text = path.read_text(encoding="utf-8")
+    else:
+        text = args.jd_text
+
+    sentences = split_sentences(text)
+    classified = classify_sentences(sentences)
+    special = extract_special(text)
+    skills = extract_skills(classified["must"] + classified["responsibilities"])
+
+    result = {
+        "must_have": classified["must"],
+        "nice_to_have": classified["nice"],
+        "responsibilities": classified["responsibilities"],
+        "skills_extracted": skills,
+        "special_requirements": special,
+        "raw_sentence_count": len(sentences),
+    }
+
+    payload = json.dumps(result, ensure_ascii=False, indent=2)
+    if args.out:
+        out_path = Path(args.out).expanduser()
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        out_path.write_text(payload, encoding="utf-8")
+        print(f"✓ JD 解析结果已保存：{out_path}")
+    else:
+        print(payload)
+
+
+if __name__ == "__main__":
+    main()