270 lines
9.1 KiB
Python
Executable File
270 lines
9.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
ats_check.py — 简历 ATS(Applicant Tracking System)友好度检查 + 关键词覆盖率
|
||
|
||
用法:
|
||
python ats_check.py --resume resume.md --industry internet
|
||
python ats_check.py --resume resume.md --industry tech --jd jd.txt
|
||
python ats_check.py --resume resume.docx --industry finance --out report.md
|
||
|
||
支持输入:.md / .txt / .docx(docx 走 python-docx,需要 pip install python-docx)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
INDUSTRIES = {"internet", "tech", "finance", "general"}
|
||
|
||
|
||
def load_resume_text(path: Path) -> str:
|
||
suffix = path.suffix.lower()
|
||
if suffix in {".md", ".txt"}:
|
||
return path.read_text(encoding="utf-8")
|
||
if suffix == ".docx":
|
||
try:
|
||
from docx import Document
|
||
except ImportError:
|
||
print(
|
||
"✗ 缺少 python-docx,请先安装:pip install python-docx --break-system-packages",
|
||
file=sys.stderr,
|
||
)
|
||
sys.exit(1)
|
||
doc = Document(str(path))
|
||
return "\n".join(p.text for p in doc.paragraphs)
|
||
print(f"✗ 暂不支持的格式:{suffix}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
|
||
def load_keywords(industry: str, references_dir: Path) -> list[str]:
|
||
path = references_dir / "keywords" / f"{industry}.txt"
|
||
if not path.exists():
|
||
print(f"✗ 关键词库不存在:{path}", file=sys.stderr)
|
||
sys.exit(1)
|
||
return [
|
||
line.strip()
|
||
for line in path.read_text(encoding="utf-8").splitlines()
|
||
if line.strip()
|
||
]
|
||
|
||
|
||
def coverage(resume_text: str, keywords: list[str]) -> dict:
|
||
text_lower = resume_text.lower()
|
||
hits, missing = [], []
|
||
for kw in keywords:
|
||
# 兼容大小写、中英混排
|
||
if kw.lower() in text_lower:
|
||
hits.append(kw)
|
||
else:
|
||
missing.append(kw)
|
||
return {
|
||
"hits": hits,
|
||
"missing": missing,
|
||
"rate": len(hits) / len(keywords) if keywords else 0,
|
||
}
|
||
|
||
|
||
def jd_extract_keywords(jd_text: str) -> list[str]:
|
||
"""从 JD 文本里抽取候选关键词。简易版:取常见技能/工具/动词。"""
|
||
# 抓中文 2~10 字、英文 2~30 字的"实词"
|
||
candidates = re.findall(
|
||
r"[A-Za-z][A-Za-z0-9+/.\-_]{1,29}|[一-龥]{2,10}",
|
||
jd_text,
|
||
)
|
||
# 简单去停用词
|
||
stop = {
|
||
"公司", "工作", "我们", "你将", "团队", "需要", "能够", "具备", "熟悉",
|
||
"了解", "良好", "优秀", "经验", "能力", "岗位", "职责", "要求", "以上",
|
||
"相关", "及其", "或者", "进行", "完成", "负责", "推动", "实现", "提升",
|
||
"并且", "包括", "以下", "根据",
|
||
}
|
||
seen = set()
|
||
out = []
|
||
for c in candidates:
|
||
key = c.lower()
|
||
if key in seen or c in stop:
|
||
continue
|
||
seen.add(key)
|
||
out.append(c)
|
||
return out[:80] # 取前 80 个候选
|
||
|
||
|
||
def ats_friendliness(text: str, source_path: Path) -> tuple[int, list[str]]:
|
||
"""评估 ATS 友好度,返回 (分数 / 10, 警告列表)。"""
|
||
score = 10
|
||
warnings = []
|
||
|
||
# 长度
|
||
if len(text) < 200:
|
||
score -= 3
|
||
warnings.append("⚠ 简历文本过短(< 200 字符),可能解析失败或内容不足")
|
||
elif len(text) > 6000:
|
||
score -= 1
|
||
warnings.append("⚠ 简历偏长(> 6000 字符),建议精简到 1~2 页")
|
||
|
||
# 联系方式
|
||
has_email = bool(re.search(r"[\w.\-+]+@[\w.\-]+\.\w+", text))
|
||
has_phone = bool(re.search(r"(\+?86[-\s]?)?1[3-9]\d{9}|\d{3}[-\s]?\d{4}[-\s]?\d{4}", text))
|
||
if not has_email:
|
||
score -= 1
|
||
warnings.append("⚠ 没找到邮箱")
|
||
if not has_phone:
|
||
score -= 1
|
||
warnings.append("⚠ 没找到手机号")
|
||
|
||
# 装饰符号
|
||
decorative = re.findall(r"[★☆●○◆◇▶▷■□▪▫♦]", text)
|
||
if len(decorative) > 5:
|
||
score -= 1
|
||
warnings.append(f"⚠ 用了 {len(decorative)} 个装饰符号(★●◆等),ATS 可能识别异常,建议精简")
|
||
|
||
# emoji
|
||
emojis = re.findall(r"[\U0001F300-\U0001FAFF\U0001F600-\U0001F64F]", text)
|
||
if emojis:
|
||
score -= 1
|
||
warnings.append(f"⚠ 检测到 {len(emojis)} 个 emoji,部分 ATS 会乱码,建议删掉")
|
||
|
||
# 数字密度(量化结果是否充分)
|
||
numbers = re.findall(r"\d+(?:\.\d+)?%?", text)
|
||
bullet_count = len(re.findall(r"^\s*[-*•]\s+", text, flags=re.MULTILINE))
|
||
if bullet_count > 0:
|
||
density = len(numbers) / bullet_count
|
||
if density < 0.4:
|
||
score -= 1
|
||
warnings.append(
|
||
f"⚠ 量化密度低(每条 bullet 平均 {density:.2f} 个数字),"
|
||
f"建议在工作 / 项目经历里多加数字"
|
||
)
|
||
|
||
# docx 特定
|
||
if source_path.suffix.lower() == ".docx":
|
||
try:
|
||
from docx import Document
|
||
doc = Document(str(source_path))
|
||
tables = len(doc.tables)
|
||
images = sum(1 for s in doc.inline_shapes)
|
||
if tables > 1:
|
||
score -= 1
|
||
warnings.append(
|
||
f"⚠ docx 里有 {tables} 个表格,部分 ATS 解析表格会丢字段,"
|
||
f"建议改成正文段落"
|
||
)
|
||
if images > 0:
|
||
score -= 1
|
||
warnings.append(
|
||
f"⚠ docx 里有 {images} 张图片(含证件照),ATS 不读图,"
|
||
f"重要信息别只放在图里;证件照可保留"
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
return max(0, score), warnings
|
||
|
||
|
||
def render_report(
|
||
industry_cov: dict,
|
||
industry: str,
|
||
jd_cov: dict | None,
|
||
ats_score: int,
|
||
ats_warnings: list[str],
|
||
) -> str:
|
||
lines = [
|
||
"# 简历 ATS 检查报告",
|
||
"",
|
||
f"## 行业关键词覆盖({industry})",
|
||
f"- 命中率:**{industry_cov['rate'] * 100:.1f}%** "
|
||
f"({len(industry_cov['hits'])} / {len(industry_cov['hits']) + len(industry_cov['missing'])})",
|
||
"",
|
||
"**已命中**:" + (", ".join(industry_cov["hits"]) or "(无)"),
|
||
"",
|
||
"**建议补充**(前 15 个):" + (", ".join(industry_cov["missing"][:15]) or "(无)"),
|
||
"",
|
||
]
|
||
|
||
if jd_cov is not None:
|
||
lines += [
|
||
"## JD 关键词覆盖",
|
||
f"- 命中率:**{jd_cov['rate'] * 100:.1f}%**",
|
||
"",
|
||
"**已命中**:" + (", ".join(jd_cov["hits"][:30]) or "(无)"),
|
||
"",
|
||
"**JD 出现但简历没有**(重点补这些):" + (", ".join(jd_cov["missing"][:20]) or "(无)"),
|
||
"",
|
||
]
|
||
|
||
lines += [
|
||
f"## ATS 友好度评分:**{ats_score}/10**",
|
||
"",
|
||
]
|
||
if ats_warnings:
|
||
lines += ats_warnings
|
||
else:
|
||
lines.append("✅ 没有明显问题")
|
||
|
||
lines += [
|
||
"",
|
||
"---",
|
||
"## 改进建议优先级",
|
||
"",
|
||
"1. 先补 JD 命中率 → 这是 ATS 通过率的最直接信号",
|
||
"2. 再补行业关键词 → 让简历能在更宽的搜索里被捞到",
|
||
"3. 最后调 ATS 友好度 → 移除装饰符号、emoji、表格、图片",
|
||
"",
|
||
"注意:覆盖率不是越高越好,**关键词必须出现在真实的成就 bullet 里**,",
|
||
"不要把关键词单独列一长串当 skills,会被 HR 一眼识破。",
|
||
]
|
||
return "\n".join(lines)
|
||
|
||
|
||
def main() -> None:
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("--resume", required=True, help="简历文件路径 (.md/.txt/.docx)")
|
||
parser.add_argument(
|
||
"--industry", choices=list(INDUSTRIES), required=True, help="行业方向"
|
||
)
|
||
parser.add_argument("--jd", help="可选:JD 文本文件,做精准对比")
|
||
parser.add_argument("--out", help="输出报告路径,缺省直接打印")
|
||
parser.add_argument(
|
||
"--references-dir",
|
||
default=str(Path(__file__).resolve().parent.parent / "references"),
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
resume_path = Path(args.resume).expanduser()
|
||
if not resume_path.exists():
|
||
print(f"✗ 简历文件不存在:{resume_path}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
text = load_resume_text(resume_path)
|
||
references_dir = Path(args.references_dir)
|
||
|
||
industry_keywords = load_keywords(args.industry, references_dir)
|
||
industry_cov = coverage(text, industry_keywords)
|
||
|
||
jd_cov = None
|
||
if args.jd:
|
||
jd_path = Path(args.jd).expanduser()
|
||
if not jd_path.exists():
|
||
print(f"✗ JD 文件不存在:{jd_path}", file=sys.stderr)
|
||
sys.exit(1)
|
||
jd_keywords = jd_extract_keywords(jd_path.read_text(encoding="utf-8"))
|
||
jd_cov = coverage(text, jd_keywords)
|
||
|
||
ats_score, ats_warnings = ats_friendliness(text, resume_path)
|
||
report = render_report(industry_cov, args.industry, jd_cov, ats_score, ats_warnings)
|
||
|
||
if args.out:
|
||
out_path = Path(args.out).expanduser()
|
||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||
out_path.write_text(report, encoding="utf-8")
|
||
print(f"✓ 报告已生成:{out_path}")
|
||
else:
|
||
print(report)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|