178 lines
5.9 KiB
Python
Executable File
178 lines
5.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
jd_gap.py — 把 parse_jd.py 的 JSON 与简历文本做 gap 分析
|
||
|
||
用法:
|
||
python jd_gap.py --jd jd_parsed.json --resume resume.md --out gap.md
|
||
|
||
输出 markdown 报告:完美命中 / 隐性命中 / 真缺口 三类,附改写建议。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
|
||
def load_resume_text(path: Path) -> str:
|
||
suffix = path.suffix.lower()
|
||
if suffix in {".md", ".txt"}:
|
||
return path.read_text(encoding="utf-8")
|
||
if suffix == ".docx":
|
||
try:
|
||
from docx import Document
|
||
except ImportError:
|
||
print(
|
||
"✗ 缺少 python-docx:pip install python-docx --break-system-packages",
|
||
file=sys.stderr,
|
||
)
|
||
sys.exit(1)
|
||
doc = Document(str(path))
|
||
return "\n".join(p.text for p in doc.paragraphs)
|
||
print(f"✗ 暂不支持的格式:{suffix}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
|
||
def find_evidence(resume_text: str, keyword: str, window: int = 30) -> str | None:
|
||
"""在简历里找关键词,返回上下文片段;找不到返回 None。"""
|
||
pattern = re.escape(keyword)
|
||
m = re.search(pattern, resume_text, flags=re.IGNORECASE)
|
||
if not m:
|
||
return None
|
||
start = max(0, m.start() - window)
|
||
end = min(len(resume_text), m.end() + window)
|
||
snippet = resume_text[start:end].replace("\n", " ").strip()
|
||
return snippet
|
||
|
||
|
||
def fuzzy_hit(resume_text: str, keyword: str) -> str | None:
|
||
"""模糊命中:取关键词的中文 / 英文核心,做包含匹配。"""
|
||
# 拿前 2 个字 / 前 5 个字符
|
||
candidates = []
|
||
if re.search(r"[一-龥]", keyword):
|
||
if len(keyword) >= 4:
|
||
candidates.append(keyword[:2])
|
||
candidates.append(keyword[-2:])
|
||
else:
|
||
if len(keyword) >= 4:
|
||
candidates.append(keyword[:4].lower())
|
||
text_low = resume_text.lower()
|
||
for c in candidates:
|
||
if c and c in text_low:
|
||
return c
|
||
return None
|
||
|
||
|
||
def analyze(jd: dict, resume_text: str) -> dict:
|
||
perfect, implicit, missing = [], [], []
|
||
|
||
# 用 must_have 句子里抽出来的 skills 作为对比项
|
||
candidates = jd.get("skills_extracted", []) + jd.get("must_have", [])
|
||
|
||
seen = set()
|
||
for c in candidates:
|
||
# 句子层面太长,截短
|
||
keyword = c.strip()
|
||
if len(keyword) > 30:
|
||
# 从长句子里抽更短的关键词
|
||
short_tokens = re.findall(
|
||
r"[A-Za-z][A-Za-z0-9+/.\-_]{1,20}|[一-龥]{2,6}",
|
||
keyword,
|
||
)
|
||
for t in short_tokens:
|
||
if t.lower() not in seen:
|
||
seen.add(t.lower())
|
||
process_one(t, resume_text, perfect, implicit, missing)
|
||
else:
|
||
if keyword.lower() not in seen:
|
||
seen.add(keyword.lower())
|
||
process_one(keyword, resume_text, perfect, implicit, missing)
|
||
|
||
return {"perfect": perfect, "implicit": implicit, "missing": missing}
|
||
|
||
|
||
def process_one(keyword, resume_text, perfect, implicit, missing):
|
||
ev = find_evidence(resume_text, keyword)
|
||
if ev:
|
||
perfect.append({"keyword": keyword, "evidence": ev})
|
||
return
|
||
fuzzy = fuzzy_hit(resume_text, keyword)
|
||
if fuzzy:
|
||
implicit.append({"keyword": keyword, "fuzzy_match": fuzzy})
|
||
else:
|
||
missing.append(keyword)
|
||
|
||
|
||
def render(jd: dict, gap: dict) -> str:
|
||
lines = ["# JD ⇄ Resume Gap 分析报告", ""]
|
||
|
||
spec = jd.get("special_requirements", {})
|
||
if spec:
|
||
lines += ["## JD 硬条件", ""]
|
||
for k, v in spec.items():
|
||
lines.append(f"- **{k}**:{v}")
|
||
lines.append("")
|
||
|
||
lines += ["## ✅ 完美命中(简历里有明确证据)", ""]
|
||
if gap["perfect"]:
|
||
for item in gap["perfect"][:30]:
|
||
lines.append(f"- **{item['keyword']}** —— 证据:`...{item['evidence']}...`")
|
||
else:
|
||
lines.append("(无)")
|
||
lines.append("")
|
||
|
||
lines += ["## 🟡 隐性命中(简历里有近似但用词不同,建议改写时对齐)", ""]
|
||
if gap["implicit"]:
|
||
for item in gap["implicit"][:20]:
|
||
lines.append(f"- **JD 关键词:{item['keyword']}**(简历里出现:`{item['fuzzy_match']}`)")
|
||
else:
|
||
lines.append("(无)")
|
||
lines.append("")
|
||
|
||
lines += ["## 🔴 真缺口(简历完全没有,需要确认 / 补充 / 转换叙事)", ""]
|
||
if gap["missing"]:
|
||
for kw in gap["missing"][:30]:
|
||
lines.append(f"- **{kw}**")
|
||
else:
|
||
lines.append("(无)")
|
||
lines.append("")
|
||
|
||
lines += [
|
||
"---",
|
||
"## 改写建议",
|
||
"",
|
||
"1. **完美命中** 的部分保留,但确保措辞与 JD 一致(比如 JD 用『A/B 测试』就别写『AB 实验』)",
|
||
"2. **隐性命中** 是性价比最高的优化点 —— 把简历里的近义词改成 JD 的措辞",
|
||
"3. **真缺口** 分两类:",
|
||
" - 你做过但没写?→ 补到对应经历的 bullet 里",
|
||
" - 你没做过?→ **不要编造**。可以在 cover letter / Summary 里诚实说明 transferable skill",
|
||
"4. 把改后的简历再跑一次 ats_check.py 看命中率是否提升",
|
||
]
|
||
return "\n".join(lines)
|
||
|
||
|
||
def main() -> None:
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("--jd", required=True, help="parse_jd.py 输出的 json")
|
||
parser.add_argument("--resume", required=True, help="简历文件 (.md/.txt/.docx)")
|
||
parser.add_argument("--out", help="输出 markdown 路径")
|
||
args = parser.parse_args()
|
||
|
||
jd = json.loads(Path(args.jd).read_text(encoding="utf-8"))
|
||
resume_text = load_resume_text(Path(args.resume))
|
||
gap = analyze(jd, resume_text)
|
||
report = render(jd, gap)
|
||
|
||
if args.out:
|
||
Path(args.out).write_text(report, encoding="utf-8")
|
||
print(f"✓ Gap 报告已生成:{args.out}")
|
||
else:
|
||
print(report)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|