Files
Agentic-Compaction-and-Pipl…/skills/pdf/scripts/sanitize_code.py
Z User 2380d33861 feat: Add complete Agentic Compaction & Pipeline System
- Context Compaction System with token counting and summarization
- Deterministic State Machine for flow control (no LLM decisions)
- Parallel Execution Engine (up to 12 concurrent sessions)
- Event-Driven Coordination via Event Bus
- Agent Workspace Isolation (tools, memory, identity, files)
- YAML Workflow Integration (OpenClaw/Lobster compatible)
- Claude Code integration layer
- Complete demo UI with real-time visualization
- Comprehensive documentation and README

Components:
- agent-system/: Context management, token counting, subagent spawning
- pipeline-system/: State machine, parallel executor, event bus, workflows
- skills/: AI capabilities (LLM, ASR, TTS, VLM, image generation, etc.)
- src/app/: Next.js demo application

Total: ~100KB of production-ready TypeScript code
2026-03-03 12:40:47 +00:00

110 lines
3.5 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
import html
import sys
from typing import Dict
# ---------- Step 0: restore literal unicode escapes/entities to real chars ----------
_RE_UNICODE_ESC = re.compile(r"(\\u[0-9a-fA-F]{4})|(\\U[0-9a-fA-F]{8})|(\\x[0-9a-fA-F]{2})")
def _restore_escapes(s: str) -> str:
# HTML entities: ³ ≤ α ...
s = html.unescape(s)
# Literal backslash escapes: "\\u00B3" -> "³"
def _dec(m: re.Match) -> str:
esc = m.group(0)
try:
if esc.startswith("\\u") or esc.startswith("\\U"):
return chr(int(esc[2:], 16))
if esc.startswith("\\x"):
return chr(int(esc[2:], 16))
except Exception:
return esc
return esc
return _RE_UNICODE_ESC.sub(_dec, s)
# ---------- Step 1: superscripts/subscripts -> <super>/<sub> ----------
_SUPERSCRIPT_MAP: Dict[str, str] = {
"": "0", "¹": "1", "²": "2", "³": "3", "": "4",
"": "5", "": "6", "": "7", "": "8", "": "9",
"": "+", "": "-", "": "=", "": "(", "": ")",
"": "n", "": "i",
}
_SUBSCRIPT_MAP: Dict[str, str] = {
"": "0", "": "1", "": "2", "": "3", "": "4",
"": "5", "": "6", "": "7", "": "8", "": "9",
"": "+", "": "-", "": "=", "": "(", "": ")",
"": "a", "": "e", "": "h", "": "i", "": "j",
"": "k", "": "l", "": "m", "": "n", "": "o",
"": "p", "": "r", "": "s", "": "t", "": "u",
"": "v", "": "x",
}
def _replace_super_sub(s: str) -> str:
out = []
for ch in s:
if ch in _SUPERSCRIPT_MAP:
out.append(f"<super>{_SUPERSCRIPT_MAP[ch]}</super>")
elif ch in _SUBSCRIPT_MAP:
out.append(f"<sub>{_SUBSCRIPT_MAP[ch]}</sub>")
else:
out.append(ch)
return "".join(out)
# ---------- Step 2: symbol fallback for SimHei (protect tags, then replace) ----------
_SYMBOL_FALLBACK: Dict[str, str] = {
# Currently empty - enable entries as needed for fonts missing specific glyphs
# "±": "+/-",
# "×": "*",
# "÷": "/",
# "≤": "<=",
# "≥": ">=",
# "≠": "!=",
# "≈": "~=",
# "∞": "inf",
}
def _fallback_symbols(s: str) -> str:
# Protect <super>/<sub> tags from being modified
placeholders = {}
def _protect_tag(m: re.Match) -> str:
key = f"@@TAG{len(placeholders)}@@"
placeholders[key] = m.group(0)
return key
protected = re.sub(r"</?super>|</?sub>", _protect_tag, s)
# Replace symbols
protected = "".join(_SYMBOL_FALLBACK.get(ch, ch) for ch in protected)
# Restore tags
for k, v in placeholders.items():
protected = protected.replace(k, v)
return protected
def sanitize_code(text: str) -> str:
"""
Full sanitization pipeline for PDF generation code.
- Restore unicode escapes/entities to real characters
- Replace superscript/subscript unicode with <super>/<sub>
- Replace other risky symbols with ASCII/text fallbacks
"""
s = _restore_escapes(text)
s = _replace_super_sub(s)
s = _fallback_symbols(s)
return s
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python sanitize_code.py <target_script.py>")
sys.exit(1)
target = sys.argv[1]
with open(target, "r", encoding="utf-8") as f:
code = f.read()
sanitized = sanitize_code(code)
with open(target, "w", encoding="utf-8") as f:
f.write(sanitized)
print(f"Sanitized: {target}")