129 lines
4.4 KiB
Python
Executable File
129 lines
4.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
profile_match.py — 把用户技能列表 vs 关键词库做匹配,输出匹配度报告
|
||
|
||
用法:
|
||
python profile_match.py --skills "SQL,Python,产品规划,A/B测试" \
|
||
--library internet \
|
||
[--out report.md]
|
||
|
||
library 取值:internet / tech / finance / general
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
LIB_MAP = {
|
||
"internet": "keywords_internet.md",
|
||
"tech": "keywords_tech.md",
|
||
"finance": "keywords_finance.md",
|
||
"general": "keywords_general.md",
|
||
}
|
||
|
||
|
||
def extract_keywords(md_path: Path) -> list[str]:
|
||
"""简单解析 markdown,把所有 bullet 后面的中英文词汇收集起来。"""
|
||
text = md_path.read_text(encoding="utf-8")
|
||
# 匹配 - 开头的行
|
||
bullets = re.findall(r"^\s*[-*]\s+(.+)$", text, flags=re.MULTILINE)
|
||
keywords: set[str] = set()
|
||
for line in bullets:
|
||
# 去掉括号内的解释、占位符、markdown 控制字符
|
||
clean = re.sub(r"[((][^))]*[))]", "", line)
|
||
clean = re.sub(r"_{2,}", "", clean)
|
||
clean = clean.replace("**", "").replace("__", "")
|
||
for token in re.split(r"[、,,//\s]+", clean):
|
||
token = token.strip().strip("::。.\"\"''`*").lower()
|
||
# 过滤掉只含标点 / 短横 / 数字 的词
|
||
if not re.search(r"[一-龥A-Za-z]", token):
|
||
continue
|
||
if 1 < len(token) < 30:
|
||
keywords.add(token)
|
||
return sorted(keywords)
|
||
|
||
|
||
def match_score(user_skills: list[str], lib_keywords: list[str]) -> dict:
|
||
"""返回匹配命中、缺失、命中率。模糊匹配:包含即算命中。"""
|
||
user_lower = [s.strip().lower() for s in user_skills if s.strip()]
|
||
hits, missing = [], []
|
||
for kw in lib_keywords:
|
||
if any(kw in u or u in kw for u in user_lower):
|
||
hits.append(kw)
|
||
else:
|
||
missing.append(kw)
|
||
rate = len(hits) / len(lib_keywords) if lib_keywords else 0
|
||
return {
|
||
"hits": hits,
|
||
"missing": missing,
|
||
"rate": rate,
|
||
"user_skills": user_lower,
|
||
}
|
||
|
||
|
||
def render_report(result: dict, library: str) -> str:
|
||
rate_pct = f"{result['rate'] * 100:.1f}%"
|
||
# 缺口前 20 个,避免太长
|
||
top_missing = result["missing"][:20]
|
||
lines = [
|
||
f"# 岗位画像匹配报告 — {library}",
|
||
"",
|
||
f"- **命中率**:{rate_pct}({len(result['hits'])} / {len(result['hits']) + len(result['missing'])})",
|
||
f"- **用户提供技能**:{', '.join(result['user_skills'])}",
|
||
"",
|
||
"## ✅ 命中的关键词",
|
||
"",
|
||
", ".join(result["hits"]) if result["hits"] else "(无)",
|
||
"",
|
||
"## ⚠️ 缺口(前 20 个,按字典序)",
|
||
"",
|
||
", ".join(top_missing) if top_missing else "(无)",
|
||
"",
|
||
"## 解读",
|
||
"",
|
||
"- 命中率 < 20%:方向不匹配,建议重新评估目标岗",
|
||
"- 命中率 20-50%:可投但需要补关键缺口",
|
||
"- 命中率 > 50%:核心匹配,可以重点投",
|
||
"",
|
||
"注意:本工具是关键词级别的粗筛,不能替代真实 JD 对照(用 jd-resume-tailor 做精准对比)。",
|
||
]
|
||
return "\n".join(lines)
|
||
|
||
|
||
def main() -> None:
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("--skills", required=True, help="用户技能列表,逗号分隔")
|
||
parser.add_argument(
|
||
"--library", choices=list(LIB_MAP), required=True, help="关键词库"
|
||
)
|
||
parser.add_argument("--out", help="输出 markdown 报告路径,缺省直接打印")
|
||
parser.add_argument(
|
||
"--references-dir",
|
||
default=str(Path(__file__).resolve().parent.parent / "references"),
|
||
help="references 目录路径",
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
lib_path = Path(args.references_dir) / LIB_MAP[args.library]
|
||
if not lib_path.exists():
|
||
print(f"✗ 找不到关键词库:{lib_path}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
lib_keywords = extract_keywords(lib_path)
|
||
user_skills = [s for s in args.skills.split(",") if s.strip()]
|
||
result = match_score(user_skills, lib_keywords)
|
||
report = render_report(result, args.library)
|
||
|
||
if args.out:
|
||
Path(args.out).write_text(report, encoding="utf-8")
|
||
print(f"✓ 报告已生成:{args.out}")
|
||
else:
|
||
print(report)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|