Initial commit

2026-06-06 05:21:10 +00:00
commit 6664758a6d
493 changed files with 135653 additions and 0 deletions
--- a/skills/resume-builder/scripts/ats_check.py
+++ b/skills/resume-builder/scripts/ats_check.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python3
+"""
+ats_check.py — 简历 ATS（Applicant Tracking System）友好度检查 + 关键词覆盖率
+
+用法：
+    python ats_check.py --resume resume.md --industry internet
+    python ats_check.py --resume resume.md --industry tech --jd jd.txt
+    python ats_check.py --resume resume.docx --industry finance --out report.md
+
+支持输入：.md / .txt / .docx（docx 走 python-docx，需要 pip install python-docx）
+"""
+
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+INDUSTRIES = {"internet", "tech", "finance", "general"}
+
+
+def load_resume_text(path: Path) -> str:
+    suffix = path.suffix.lower()
+    if suffix in {".md", ".txt"}:
+        return path.read_text(encoding="utf-8")
+    if suffix == ".docx":
+        try:
+            from docx import Document
+        except ImportError:
+            print(
+                "✗ 缺少 python-docx，请先安装：pip install python-docx --break-system-packages",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        doc = Document(str(path))
+        return "\n".join(p.text for p in doc.paragraphs)
+    print(f"✗ 暂不支持的格式：{suffix}", file=sys.stderr)
+    sys.exit(1)
+
+
+def load_keywords(industry: str, references_dir: Path) -> list[str]:
+    path = references_dir / "keywords" / f"{industry}.txt"
+    if not path.exists():
+        print(f"✗ 关键词库不存在：{path}", file=sys.stderr)
+        sys.exit(1)
+    return [
+        line.strip()
+        for line in path.read_text(encoding="utf-8").splitlines()
+        if line.strip()
+    ]
+
+
+def coverage(resume_text: str, keywords: list[str]) -> dict:
+    text_lower = resume_text.lower()
+    hits, missing = [], []
+    for kw in keywords:
+        # 兼容大小写、中英混排
+        if kw.lower() in text_lower:
+            hits.append(kw)
+        else:
+            missing.append(kw)
+    return {
+        "hits": hits,
+        "missing": missing,
+        "rate": len(hits) / len(keywords) if keywords else 0,
+    }
+
+
+def jd_extract_keywords(jd_text: str) -> list[str]:
+    """从 JD 文本里抽取候选关键词。简易版：取常见技能/工具/动词。"""
+    # 抓中文 2~10 字、英文 2~30 字的"实词"
+    candidates = re.findall(
+        r"[A-Za-z][A-Za-z0-9+/.\-_]{1,29}|[一-龥]{2,10}",
+        jd_text,
+    )
+    # 简单去停用词
+    stop = {
+        "公司", "工作", "我们", "你将", "团队", "需要", "能够", "具备", "熟悉",
+        "了解", "良好", "优秀", "经验", "能力", "岗位", "职责", "要求", "以上",
+        "相关", "及其", "或者", "进行", "完成", "负责", "推动", "实现", "提升",
+        "并且", "包括", "以下", "根据",
+    }
+    seen = set()
+    out = []
+    for c in candidates:
+        key = c.lower()
+        if key in seen or c in stop:
+            continue
+        seen.add(key)
+        out.append(c)
+    return out[:80]  # 取前 80 个候选
+
+
+def ats_friendliness(text: str, source_path: Path) -> tuple[int, list[str]]:
+    """评估 ATS 友好度，返回 (分数 / 10, 警告列表)。"""
+    score = 10
+    warnings = []
+
+    # 长度
+    if len(text) < 200:
+        score -= 3
+        warnings.append("⚠ 简历文本过短（< 200 字符），可能解析失败或内容不足")
+    elif len(text) > 6000:
+        score -= 1
+        warnings.append("⚠ 简历偏长（> 6000 字符），建议精简到 1~2 页")
+
+    # 联系方式
+    has_email = bool(re.search(r"[\w.\-+]+@[\w.\-]+\.\w+", text))
+    has_phone = bool(re.search(r"(\+?86[-\s]?)?1[3-9]\d{9}|\d{3}[-\s]?\d{4}[-\s]?\d{4}", text))
+    if not has_email:
+        score -= 1
+        warnings.append("⚠ 没找到邮箱")
+    if not has_phone:
+        score -= 1
+        warnings.append("⚠ 没找到手机号")
+
+    # 装饰符号
+    decorative = re.findall(r"[★☆●○◆◇▶▷■□▪▫♦]", text)
+    if len(decorative) > 5:
+        score -= 1
+        warnings.append(f"⚠ 用了 {len(decorative)} 个装饰符号（★●◆等），ATS 可能识别异常，建议精简")
+
+    # emoji
+    emojis = re.findall(r"[\U0001F300-\U0001FAFF\U0001F600-\U0001F64F]", text)
+    if emojis:
+        score -= 1
+        warnings.append(f"⚠ 检测到 {len(emojis)} 个 emoji，部分 ATS 会乱码，建议删掉")
+
+    # 数字密度（量化结果是否充分）
+    numbers = re.findall(r"\d+(?:\.\d+)?%?", text)
+    bullet_count = len(re.findall(r"^\s*[-*•]\s+", text, flags=re.MULTILINE))
+    if bullet_count > 0:
+        density = len(numbers) / bullet_count
+        if density < 0.4:
+            score -= 1
+            warnings.append(
+                f"⚠ 量化密度低（每条 bullet 平均 {density:.2f} 个数字），"
+                f"建议在工作 / 项目经历里多加数字"
+            )
+
+    # docx 特定
+    if source_path.suffix.lower() == ".docx":
+        try:
+            from docx import Document
+            doc = Document(str(source_path))
+            tables = len(doc.tables)
+            images = sum(1 for s in doc.inline_shapes)
+            if tables > 1:
+                score -= 1
+                warnings.append(
+                    f"⚠ docx 里有 {tables} 个表格，部分 ATS 解析表格会丢字段，"
+                    f"建议改成正文段落"
+                )
+            if images > 0:
+                score -= 1
+                warnings.append(
+                    f"⚠ docx 里有 {images} 张图片（含证件照），ATS 不读图，"
+                    f"重要信息别只放在图里；证件照可保留"
+                )
+        except Exception:
+            pass
+
+    return max(0, score), warnings
+
+
+def render_report(
+    industry_cov: dict,
+    industry: str,
+    jd_cov: dict | None,
+    ats_score: int,
+    ats_warnings: list[str],
+) -> str:
+    lines = [
+        "# 简历 ATS 检查报告",
+        "",
+        f"## 行业关键词覆盖（{industry}）",
+        f"- 命中率：**{industry_cov['rate'] * 100:.1f}%** "
+        f"({len(industry_cov['hits'])} / {len(industry_cov['hits']) + len(industry_cov['missing'])})",
+        "",
+        "**已命中**：" + (", ".join(industry_cov["hits"]) or "（无）"),
+        "",
+        "**建议补充**（前 15 个）：" + (", ".join(industry_cov["missing"][:15]) or "（无）"),
+        "",
+    ]
+
+    if jd_cov is not None:
+        lines += [
+            "## JD 关键词覆盖",
+            f"- 命中率：**{jd_cov['rate'] * 100:.1f}%**",
+            "",
+            "**已命中**：" + (", ".join(jd_cov["hits"][:30]) or "（无）"),
+            "",
+            "**JD 出现但简历没有**（重点补这些）：" + (", ".join(jd_cov["missing"][:20]) or "（无）"),
+            "",
+        ]
+
+    lines += [
+        f"## ATS 友好度评分：**{ats_score}/10**",
+        "",
+    ]
+    if ats_warnings:
+        lines += ats_warnings
+    else:
+        lines.append("✅ 没有明显问题")
+
+    lines += [
+        "",
+        "---",
+        "## 改进建议优先级",
+        "",
+        "1. 先补 JD 命中率 → 这是 ATS 通过率的最直接信号",
+        "2. 再补行业关键词 → 让简历能在更宽的搜索里被捞到",
+        "3. 最后调 ATS 友好度 → 移除装饰符号、emoji、表格、图片",
+        "",
+        "注意：覆盖率不是越高越好，**关键词必须出现在真实的成就 bullet 里**，",
+        "不要把关键词单独列一长串当 skills，会被 HR 一眼识破。",
+    ]
+    return "\n".join(lines)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--resume", required=True, help="简历文件路径 (.md/.txt/.docx)")
+    parser.add_argument(
+        "--industry", choices=list(INDUSTRIES), required=True, help="行业方向"
+    )
+    parser.add_argument("--jd", help="可选：JD 文本文件，做精准对比")
+    parser.add_argument("--out", help="输出报告路径，缺省直接打印")
+    parser.add_argument(
+        "--references-dir",
+        default=str(Path(__file__).resolve().parent.parent / "references"),
+    )
+    args = parser.parse_args()
+
+    resume_path = Path(args.resume).expanduser()
+    if not resume_path.exists():
+        print(f"✗ 简历文件不存在：{resume_path}", file=sys.stderr)
+        sys.exit(1)
+
+    text = load_resume_text(resume_path)
+    references_dir = Path(args.references_dir)
+
+    industry_keywords = load_keywords(args.industry, references_dir)
+    industry_cov = coverage(text, industry_keywords)
+
+    jd_cov = None
+    if args.jd:
+        jd_path = Path(args.jd).expanduser()
+        if not jd_path.exists():
+            print(f"✗ JD 文件不存在：{jd_path}", file=sys.stderr)
+            sys.exit(1)
+        jd_keywords = jd_extract_keywords(jd_path.read_text(encoding="utf-8"))
+        jd_cov = coverage(text, jd_keywords)
+
+    ats_score, ats_warnings = ats_friendliness(text, resume_path)
+    report = render_report(industry_cov, args.industry, jd_cov, ats_score, ats_warnings)
+
+    if args.out:
+        out_path = Path(args.out).expanduser()
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        out_path.write_text(report, encoding="utf-8")
+        print(f"✓ 报告已生成：{out_path}")
+    else:
+        print(report)
+
+
+if __name__ == "__main__":
+    main()