Initial commit

This commit is contained in:
Z User
2026-06-06 05:21:10 +00:00
Unverified
commit 6664758a6d
493 changed files with 135653 additions and 0 deletions

View File

@@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""
profile_match.py — 把用户技能列表 vs 关键词库做匹配,输出匹配度报告
用法:
python profile_match.py --skills "SQL,Python,产品规划,A/B测试" \
--library internet \
[--out report.md]
library 取值internet / tech / finance / general
"""
from __future__ import annotations
import argparse
import re
import sys
from pathlib import Path
LIB_MAP = {
"internet": "keywords_internet.md",
"tech": "keywords_tech.md",
"finance": "keywords_finance.md",
"general": "keywords_general.md",
}
def extract_keywords(md_path: Path) -> list[str]:
"""简单解析 markdown把所有 bullet 后面的中英文词汇收集起来。"""
text = md_path.read_text(encoding="utf-8")
# 匹配 - 开头的行
bullets = re.findall(r"^\s*[-*]\s+(.+)$", text, flags=re.MULTILINE)
keywords: set[str] = set()
for line in bullets:
# 去掉括号内的解释、占位符、markdown 控制字符
clean = re.sub(r"[(][^)]*[)]", "", line)
clean = re.sub(r"_{2,}", "", clean)
clean = clean.replace("**", "").replace("__", "")
for token in re.split(r"[、,/\s]+", clean):
token = token.strip().strip(":。.\"\"''`*").lower()
# 过滤掉只含标点 / 短横 / 数字 的词
if not re.search(r"[一-龥A-Za-z]", token):
continue
if 1 < len(token) < 30:
keywords.add(token)
return sorted(keywords)
def match_score(user_skills: list[str], lib_keywords: list[str]) -> dict:
"""返回匹配命中、缺失、命中率。模糊匹配:包含即算命中。"""
user_lower = [s.strip().lower() for s in user_skills if s.strip()]
hits, missing = [], []
for kw in lib_keywords:
if any(kw in u or u in kw for u in user_lower):
hits.append(kw)
else:
missing.append(kw)
rate = len(hits) / len(lib_keywords) if lib_keywords else 0
return {
"hits": hits,
"missing": missing,
"rate": rate,
"user_skills": user_lower,
}
def render_report(result: dict, library: str) -> str:
rate_pct = f"{result['rate'] * 100:.1f}%"
# 缺口前 20 个,避免太长
top_missing = result["missing"][:20]
lines = [
f"# 岗位画像匹配报告 — {library}",
"",
f"- **命中率**{rate_pct}{len(result['hits'])} / {len(result['hits']) + len(result['missing'])}",
f"- **用户提供技能**{', '.join(result['user_skills'])}",
"",
"## ✅ 命中的关键词",
"",
", ".join(result["hits"]) if result["hits"] else "(无)",
"",
"## ⚠️ 缺口(前 20 个,按字典序)",
"",
", ".join(top_missing) if top_missing else "(无)",
"",
"## 解读",
"",
"- 命中率 < 20%:方向不匹配,建议重新评估目标岗",
"- 命中率 20-50%:可投但需要补关键缺口",
"- 命中率 > 50%:核心匹配,可以重点投",
"",
"注意:本工具是关键词级别的粗筛,不能替代真实 JD 对照(用 jd-resume-tailor 做精准对比)。",
]
return "\n".join(lines)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--skills", required=True, help="用户技能列表,逗号分隔")
parser.add_argument(
"--library", choices=list(LIB_MAP), required=True, help="关键词库"
)
parser.add_argument("--out", help="输出 markdown 报告路径,缺省直接打印")
parser.add_argument(
"--references-dir",
default=str(Path(__file__).resolve().parent.parent / "references"),
help="references 目录路径",
)
args = parser.parse_args()
lib_path = Path(args.references_dir) / LIB_MAP[args.library]
if not lib_path.exists():
print(f"✗ 找不到关键词库:{lib_path}", file=sys.stderr)
sys.exit(1)
lib_keywords = extract_keywords(lib_path)
user_skills = [s for s in args.skills.split(",") if s.strip()]
result = match_score(user_skills, lib_keywords)
report = render_report(result, args.library)
if args.out:
Path(args.out).write_text(report, encoding="utf-8")
print(f"✓ 报告已生成:{args.out}")
else:
print(report)
if __name__ == "__main__":
main()