Files
mantle-ai-trader/skills/jd-resume-tailor/scripts/jd_gap.py
2026-06-06 05:21:10 +00:00

178 lines
5.9 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
jd_gap.py — 把 parse_jd.py 的 JSON 与简历文本做 gap 分析
用法:
python jd_gap.py --jd jd_parsed.json --resume resume.md --out gap.md
输出 markdown 报告:完美命中 / 隐性命中 / 真缺口 三类,附改写建议。
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from pathlib import Path
def load_resume_text(path: Path) -> str:
suffix = path.suffix.lower()
if suffix in {".md", ".txt"}:
return path.read_text(encoding="utf-8")
if suffix == ".docx":
try:
from docx import Document
except ImportError:
print(
"✗ 缺少 python-docxpip install python-docx --break-system-packages",
file=sys.stderr,
)
sys.exit(1)
doc = Document(str(path))
return "\n".join(p.text for p in doc.paragraphs)
print(f"✗ 暂不支持的格式:{suffix}", file=sys.stderr)
sys.exit(1)
def find_evidence(resume_text: str, keyword: str, window: int = 30) -> str | None:
"""在简历里找关键词,返回上下文片段;找不到返回 None。"""
pattern = re.escape(keyword)
m = re.search(pattern, resume_text, flags=re.IGNORECASE)
if not m:
return None
start = max(0, m.start() - window)
end = min(len(resume_text), m.end() + window)
snippet = resume_text[start:end].replace("\n", " ").strip()
return snippet
def fuzzy_hit(resume_text: str, keyword: str) -> str | None:
"""模糊命中:取关键词的中文 / 英文核心,做包含匹配。"""
# 拿前 2 个字 / 前 5 个字符
candidates = []
if re.search(r"[一-龥]", keyword):
if len(keyword) >= 4:
candidates.append(keyword[:2])
candidates.append(keyword[-2:])
else:
if len(keyword) >= 4:
candidates.append(keyword[:4].lower())
text_low = resume_text.lower()
for c in candidates:
if c and c in text_low:
return c
return None
def analyze(jd: dict, resume_text: str) -> dict:
perfect, implicit, missing = [], [], []
# 用 must_have 句子里抽出来的 skills 作为对比项
candidates = jd.get("skills_extracted", []) + jd.get("must_have", [])
seen = set()
for c in candidates:
# 句子层面太长,截短
keyword = c.strip()
if len(keyword) > 30:
# 从长句子里抽更短的关键词
short_tokens = re.findall(
r"[A-Za-z][A-Za-z0-9+/.\-_]{1,20}|[一-龥]{2,6}",
keyword,
)
for t in short_tokens:
if t.lower() not in seen:
seen.add(t.lower())
process_one(t, resume_text, perfect, implicit, missing)
else:
if keyword.lower() not in seen:
seen.add(keyword.lower())
process_one(keyword, resume_text, perfect, implicit, missing)
return {"perfect": perfect, "implicit": implicit, "missing": missing}
def process_one(keyword, resume_text, perfect, implicit, missing):
ev = find_evidence(resume_text, keyword)
if ev:
perfect.append({"keyword": keyword, "evidence": ev})
return
fuzzy = fuzzy_hit(resume_text, keyword)
if fuzzy:
implicit.append({"keyword": keyword, "fuzzy_match": fuzzy})
else:
missing.append(keyword)
def render(jd: dict, gap: dict) -> str:
lines = ["# JD ⇄ Resume Gap 分析报告", ""]
spec = jd.get("special_requirements", {})
if spec:
lines += ["## JD 硬条件", ""]
for k, v in spec.items():
lines.append(f"- **{k}**{v}")
lines.append("")
lines += ["## ✅ 完美命中(简历里有明确证据)", ""]
if gap["perfect"]:
for item in gap["perfect"][:30]:
lines.append(f"- **{item['keyword']}** —— 证据:`...{item['evidence']}...`")
else:
lines.append("(无)")
lines.append("")
lines += ["## 🟡 隐性命中(简历里有近似但用词不同,建议改写时对齐)", ""]
if gap["implicit"]:
for item in gap["implicit"][:20]:
lines.append(f"- **JD 关键词:{item['keyword']}**(简历里出现:`{item['fuzzy_match']}`")
else:
lines.append("(无)")
lines.append("")
lines += ["## 🔴 真缺口(简历完全没有,需要确认 / 补充 / 转换叙事)", ""]
if gap["missing"]:
for kw in gap["missing"][:30]:
lines.append(f"- **{kw}**")
else:
lines.append("(无)")
lines.append("")
lines += [
"---",
"## 改写建议",
"",
"1. **完美命中** 的部分保留,但确保措辞与 JD 一致(比如 JD 用『A/B 测试』就别写『AB 实验』)",
"2. **隐性命中** 是性价比最高的优化点 —— 把简历里的近义词改成 JD 的措辞",
"3. **真缺口** 分两类:",
" - 你做过但没写?→ 补到对应经历的 bullet 里",
" - 你没做过?→ **不要编造**。可以在 cover letter / Summary 里诚实说明 transferable skill",
"4. 把改后的简历再跑一次 ats_check.py 看命中率是否提升",
]
return "\n".join(lines)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--jd", required=True, help="parse_jd.py 输出的 json")
parser.add_argument("--resume", required=True, help="简历文件 (.md/.txt/.docx)")
parser.add_argument("--out", help="输出 markdown 路径")
args = parser.parse_args()
jd = json.loads(Path(args.jd).read_text(encoding="utf-8"))
resume_text = load_resume_text(Path(args.resume))
gap = analyze(jd, resume_text)
report = render(jd, gap)
if args.out:
Path(args.out).write_text(report, encoding="utf-8")
print(f"✓ Gap 报告已生成:{args.out}")
else:
print(report)
if __name__ == "__main__":
main()