Add community skills, agents, system prompts from 22+ sources
Community Skills (32): - jat: jat-start, jat-verify, jat-complete - pi-mono: codex-cli, codex-5.3-prompting, interactive-shell - picoclaw: github, weather, tmux, summarize, skill-creator - dyad: 18 skills (swarm-to-plan, multi-pr-review, fix-issue, lint, etc.) - dexter: dcf valuation skill Agents (23): - pi-mono subagents: scout, planner, reviewer, worker - toad: 19 agent configs (Claude, Codex, Gemini, Copilot, OpenCode, etc.) System Prompts (91): - Anthropic: 15 Claude prompts (opus-4.6, code, cowork, etc.) - OpenAI: 49 GPT prompts (gpt-5 series, o3, o4-mini, tools) - Google: 13 Gemini prompts (2.5-pro, 3-pro, workspace, cli) - xAI: 5 Grok prompts - Other: 9 misc prompts (Notion, Raycast, Warp, Kagi, etc.) Hooks (9): - JAT hooks for session management, signal tracking, activity logging Prompts (6): - pi-mono templates for PR review, issue analysis, changelog audit Sources analyzed: jat, ralph-desktop, toad, pi-mono, cmux, pi-interactive-shell, craft-agents-oss, dexter, picoclaw, dyad, system_prompts_leaks, Prometheus, zed, clawdbot, OS-Copilot, and more
This commit is contained in:
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Sanitize GitHub issue markdown by removing comments, unusual formatting,
|
||||
and other artifacts that may confuse LLMs processing the issue.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
def sanitize_issue_markdown(markdown: str) -> str:
|
||||
"""
|
||||
Sanitize GitHub issue markdown content.
|
||||
|
||||
Removes:
|
||||
- HTML comments (<!-- ... -->)
|
||||
- Zero-width characters and other invisible Unicode
|
||||
- Excessive blank lines (more than 2 consecutive)
|
||||
- Leading/trailing whitespace on each line
|
||||
- HTML tags that aren't useful for understanding content
|
||||
- GitHub-specific directives that aren't content
|
||||
|
||||
Args:
|
||||
markdown: Raw markdown string from GitHub issue
|
||||
|
||||
Returns:
|
||||
Cleaned markdown string
|
||||
"""
|
||||
result = markdown
|
||||
|
||||
# Remove HTML comments (including multi-line)
|
||||
result = re.sub(r"<!--[\s\S]*?-->", "", result)
|
||||
|
||||
# Remove zero-width characters and other invisible Unicode
|
||||
# (Zero-width space, non-joiner, joiner, word joiner, no-break space, etc.)
|
||||
result = re.sub(
|
||||
r"[\u200b\u200c\u200d\u2060\ufeff\u00ad\u034f\u061c\u180e]", "", result
|
||||
)
|
||||
|
||||
# Remove other control characters (except newlines, tabs)
|
||||
result = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", result)
|
||||
|
||||
# Remove HTML details/summary blocks but keep inner content
|
||||
result = re.sub(r"</?(?:details|summary)[^>]*>", "", result, flags=re.IGNORECASE)
|
||||
|
||||
# Remove empty HTML tags
|
||||
result = re.sub(r"<([a-z]+)[^>]*>\s*</\1>", "", result, flags=re.IGNORECASE)
|
||||
|
||||
# Remove GitHub task list markers that are just decoration
|
||||
# But keep the actual checkbox content (supports both [x] and [X])
|
||||
result = re.sub(r"^\s*-\s*\[[ xX]\]\s*$", "", result, flags=re.MULTILINE)
|
||||
|
||||
# Normalize line endings
|
||||
result = result.replace("\r\n", "\n").replace("\r", "\n")
|
||||
|
||||
# Strip trailing whitespace from each line
|
||||
result = "\n".join(line.rstrip() for line in result.split("\n"))
|
||||
|
||||
# Collapse more than 2 consecutive blank lines into 2
|
||||
result = re.sub(r"\n{4,}", "\n\n\n", result)
|
||||
|
||||
# Strip leading/trailing whitespace from the whole document
|
||||
result = result.strip()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
"""Read from stdin, sanitize, write to stdout."""
|
||||
if len(sys.argv) > 1:
|
||||
# Read from file
|
||||
with open(sys.argv[1], "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
else:
|
||||
# Read from stdin
|
||||
content = sys.stdin.read()
|
||||
|
||||
sanitized = sanitize_issue_markdown(content)
|
||||
print(sanitized)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user