feat: Add complete Agentic Compaction & Pipeline System

- Context Compaction System with token counting and summarization - Deterministic State Machine for flow control (no LLM decisions) - Parallel Execution Engine (up to 12 concurrent sessions) - Event-Driven Coordination via Event Bus - Agent Workspace Isolation (tools, memory, identity, files) - YAML Workflow Integration (OpenClaw/Lobster compatible) - Claude Code integration layer - Complete demo UI with real-time visualization - Comprehensive documentation and README Components: - agent-system/: Context management, token counting, subagent spawning - pipeline-system/: State machine, parallel executor, event bus, workflows - skills/: AI capabilities (LLM, ASR, TTS, VLM, image generation, etc.) - src/app/: Next.js demo application Total: ~100KB of production-ready TypeScript code
2026-03-03 12:40:47 +00:00
parent 63a8b123c9
commit 2380d33861
152 changed files with 51569 additions and 817 deletions
--- a/skills/docx/scripts/init.py
+++ b/skills/docx/scripts/init.py
@@ -0,0 +1 @@
+# Make scripts directory a package for relative imports in tests
--- a/skills/docx/scripts/add_toc_placeholders.py
+++ b/skills/docx/scripts/add_toc_placeholders.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+"""
+Add placeholder entries to Table of Contents in a DOCX file.
+
+This script adds placeholder TOC entries between the 'separate' and 'end'
+field characters, so users see some content on first open instead of an empty TOC.
+The original file is replaced with the modified version.
+
+Usage:
+    python add_toc_placeholders.py <docx_file> --entries <entries_json>
+
+    entries_json format: JSON string with array of objects:
+    [
+        {"level": 1, "text": "Chapter 1 Overview", "page": "1"},
+        {"level": 2, "text": "Section 1.1 Details", "page": "1"}
+    ]
+
+    If --entries is not provided, generates generic placeholders.
+
+Example:
+    python add_toc_placeholders.py document.docx
+    python add_toc_placeholders.py document.docx --entries '[{"level":1,"text":"Introduction","page":"1"}]'
+"""
+
+import argparse
+import html
+import json
+import shutil
+import sys
+import tempfile
+import zipfile
+from pathlib import Path
+
+
+def add_toc_placeholders(docx_path: str, entries: list = None) -> None:
+    """Add placeholder TOC entries to a DOCX file (in-place replacement).
+
+    Args:
+        docx_path: Path to DOCX file (will be modified in-place)
+        entries: Optional list of placeholder entries. Each entry should be a dict
+                 with 'level' (1-3), 'text', and 'page' keys.
+    """
+    docx_path = Path(docx_path)
+
+    # Create temp directory for extraction
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        extracted_dir = temp_path / "extracted"
+        temp_output = temp_path / "output.docx"
+
+        # Extract DOCX
+        with zipfile.ZipFile(docx_path, 'r') as zip_ref:
+            zip_ref.extractall(extracted_dir)
+
+        # Detect TOC styles from styles.xml
+        toc_style_mapping = _detect_toc_styles(extracted_dir / "word" / "styles.xml")
+        print(toc_style_mapping)
+        # Process document.xml
+        document_xml = extracted_dir / "word" / "document.xml"
+        if not document_xml.exists():
+            raise ValueError("document.xml not found in the DOCX file")
+
+        # Read and process XML
+        content = document_xml.read_text(encoding='utf-8')
+
+        # Find TOC structure and add placeholders
+        modified_content = _insert_toc_placeholders(content, entries, toc_style_mapping)
+
+        # Write back
+        document_xml.write_text(modified_content, encoding='utf-8')
+
+        # Repack DOCX to temp file
+        with zipfile.ZipFile(temp_output, 'w', zipfile.ZIP_DEFLATED) as zipf:
+            for file_path in extracted_dir.rglob('*'):
+                if file_path.is_file():
+                    arcname = file_path.relative_to(extracted_dir)
+                    zipf.write(file_path, arcname)
+
+        # Replace original file with modified version (use shutil.move for cross-device support)
+        docx_path.unlink()
+        shutil.move(str(temp_output), str(docx_path))
+
+
+def _detect_toc_styles(styles_xml_path: Path) -> dict:
+    """Detect TOC style IDs from styles.xml.
+
+    Args:
+        styles_xml_path: Path to styles.xml
+
+    Returns:
+        Dictionary mapping level (1, 2, 3) to style ID
+    """
+    default_mapping = {1: "9", 2: "11", 3: "12"}
+
+    if not styles_xml_path.exists():
+        return default_mapping
+
+    content = styles_xml_path.read_text(encoding='utf-8')
+
+    # Find styles with names like "toc 1", "toc 2", "toc 3"
+    import re
+    toc_styles = {}
+    for match in re.finditer(r'<w:style[^>]*w:styleId="([^"]*)"[^>]*>.*?<w:name\s+w:val="toc\s+(\d)"', content, re.DOTALL):
+        style_id = match.group(1)
+        level = int(match.group(2))
+        toc_styles[level] = style_id
+
+    # If we found styles, use them; otherwise use defaults
+    return toc_styles if toc_styles else default_mapping
+
+
+def _insert_toc_placeholders(xml_content: str, entries: list = None, toc_style_mapping: dict = None) -> str:
+    """Insert placeholder TOC entries into XML content.
+
+    Args:
+        xml_content: The XML content of document.xml
+        entries: Optional list of placeholder entries
+        toc_style_mapping: Dictionary mapping level to style ID
+
+    Returns:
+        Modified XML content with placeholders inserted
+    """
+    # Generate default placeholder entries if none provided
+    if entries is None:
+        entries = [
+            {"level": 1, "text": "Chapter 1 Overview", "page": "1"},
+            {"level": 2, "text": "Section 1.1 Details", "page": "1"},
+            {"level": 2, "text": "Section 1.2 More Details", "page": "2"},
+            {"level": 1, "text": "Chapter 2 Content", "page": "3"},
+        ]
+
+    # Use provided mapping or default
+    if toc_style_mapping is None:
+        toc_style_mapping = {1: "9", 2: "11", 3: "12"}
+
+    # Find the TOC structure: w:p with w:fldChar separate, followed by w:p with w:fldChar end
+    # Pattern: <w:p><w:r>...<w:fldChar w:fldCharType="separate"/></w:r></w:p><w:p><w:r><w:fldChar w:fldCharType="end"/>
+    separate_end_pattern = (
+        r'(<w:p[^>]*><w:r[^>]*>.*?<w:fldChar[^>]*w:fldCharType="separate"[^>]*/></w:r></w:p>)'
+        r'(<w:p[^>]*><w:r[^>]*>.*?<w:fldChar[^>]*w:fldCharType="end"[^>]*/></w:r></w:p>)'
+    )
+
+    import re
+
+    def replace_with_placeholders(match):
+        separate_para = match.group(1)
+        end_para = match.group(2)
+
+        # Indentation values in twips (1 inch = 1440 twips)
+        # Level 1: 0, Level 2: 0.25" (360), Level 3: 0.5" (720), Level 4+: 0.75" (1080)
+        indent_mapping = {1: 0, 2: 360, 3: 720, 4: 1080, 5: 1440, 6: 1800}
+
+        # Generate placeholder paragraphs matching Word's TOC format
+        placeholder_paragraphs = []
+        for entry in entries:
+            level = entry.get('level', 1)
+            text = html.escape(entry.get('text', ''))
+            page = entry.get('page', '1')
+
+            # Get style ID for this level
+            toc_style = toc_style_mapping.get(level, toc_style_mapping.get(1, "9"))
+
+            # Get indentation for this level
+            indent = indent_mapping.get(level, 0)
+            indent_attr = f'<w:ind w:left="{indent}"/>' if indent > 0 else ''
+
+            # Use w:tab element (not w:tabStop) like Word does
+            placeholder_para = f'''<w:p>
+  <w:pPr>
+    <w:pStyle w:val="{toc_style}"/>
+    {indent_attr}
+    <w:tabs><w:tab w:val="right" w:leader="dot" w:pos="9026"/></w:tabs>
+  </w:pPr>
+  <w:r><w:t>{text}</w:t></w:r>
+  <w:r><w:tab/></w:r>
+  <w:r><w:t>{page}</w:t></w:r>
+</w:p>'''
+            placeholder_paragraphs.append(placeholder_para)
+
+        # Join with the separate paragraph at start and end paragraph at end
+        return separate_para + '\n'.join(placeholder_paragraphs) + end_para
+
+    # Replace the pattern
+    modified_content = re.sub(separate_end_pattern, replace_with_placeholders, xml_content, flags=re.DOTALL)
+
+    return modified_content
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Add placeholder entries to Table of Contents in a DOCX file (in-place)'
+    )
+    parser.add_argument('docx_file', help='DOCX file to modify (will be replaced)')
+    parser.add_argument(
+        '--entries',
+        help='JSON string with placeholder entries: [{"level":1,"text":"Chapter 1","page":"1"}]'
+    )
+
+    args = parser.parse_args()
+
+    # Parse entries if provided
+    entries = None
+    if args.entries:
+        try:
+            entries = json.loads(args.entries)
+        except json.JSONDecodeError as e:
+            print(f"Error parsing entries JSON: {e}", file=sys.stderr)
+            sys.exit(1)
+
+    # Add placeholders
+    try:
+        add_toc_placeholders(args.docx_file, entries)
+        print(f"Successfully added TOC placeholders to {args.docx_file}")
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/skills/docx/scripts/document.py
+++ b/skills/docx/scripts/document.py
--- a/skills/docx/scripts/templates/comments.xml
+++ b/skills/docx/scripts/templates/comments.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w:comments xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:oel="http://schemas.microsoft.com/office/2019/extlst" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" xmlns:w16du="http://schemas.microsoft.com/office/word/2023/wordml/word16du" xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" xmlns:w16sdtfl="http://schemas.microsoft.com/office/word/2024/wordml/sdtformatlock" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh w16sdtfl w16du wp14">
+</w:comments>
--- a/skills/docx/scripts/templates/commentsExtended.xml
+++ b/skills/docx/scripts/templates/commentsExtended.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w15:commentsEx xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:oel="http://schemas.microsoft.com/office/2019/extlst" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" xmlns:w16du="http://schemas.microsoft.com/office/word/2023/wordml/word16du" xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" xmlns:w16sdtfl="http://schemas.microsoft.com/office/word/2024/wordml/sdtformatlock" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh w16sdtfl w16du wp14">
+</w15:commentsEx>
--- a/skills/docx/scripts/templates/commentsExtensible.xml
+++ b/skills/docx/scripts/templates/commentsExtensible.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w16cex:commentsExtensible xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:oel="http://schemas.microsoft.com/office/2019/extlst" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" xmlns:w16du="http://schemas.microsoft.com/office/word/2023/wordml/word16du" xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" xmlns:w16sdtfl="http://schemas.microsoft.com/office/word/2024/wordml/sdtformatlock" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" xmlns:cr="http://schemas.microsoft.com/office/comments/2020/reactions" mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh w16sdtfl cr w16du wp14">
+</w16cex:commentsExtensible>
--- a/skills/docx/scripts/templates/commentsIds.xml
+++ b/skills/docx/scripts/templates/commentsIds.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w16cid:commentsIds xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:oel="http://schemas.microsoft.com/office/2019/extlst" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" xmlns:w16du="http://schemas.microsoft.com/office/word/2023/wordml/word16du" xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" xmlns:w16sdtfl="http://schemas.microsoft.com/office/word/2024/wordml/sdtformatlock" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh w16sdtfl w16du wp14">
+</w16cid:commentsIds>
--- a/skills/docx/scripts/templates/people.xml
+++ b/skills/docx/scripts/templates/people.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w15:people xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml">
+</w15:people>
--- a/skills/docx/scripts/utilities.py
+++ b/skills/docx/scripts/utilities.py
@@ -0,0 +1,374 @@
+#!/usr/bin/env python3
+"""
+Utilities for editing OOXML documents.
+
+This module provides XMLEditor, a tool for manipulating XML files with support for
+line-number-based node finding and DOM manipulation. Each element is automatically
+annotated with its original line and column position during parsing.
+
+Example usage:
+    editor = XMLEditor("document.xml")
+
+    # Find node by line number or range
+    elem = editor.get_node(tag="w:r", line_number=519)
+    elem = editor.get_node(tag="w:p", line_number=range(100, 200))
+
+    # Find node by text content
+    elem = editor.get_node(tag="w:p", contains="specific text")
+
+    # Find node by attributes
+    elem = editor.get_node(tag="w:r", attrs={"w:id": "target"})
+
+    # Combine filters
+    elem = editor.get_node(tag="w:p", line_number=range(1, 50), contains="text")
+
+    # Replace, insert, or manipulate
+    new_elem = editor.replace_node(elem, "<w:r><w:t>new text</w:t></w:r>")
+    editor.insert_after(new_elem, "<w:r><w:t>more</w:t></w:r>")
+
+    # Save changes
+    editor.save()
+"""
+
+import html
+from pathlib import Path
+from typing import Optional, Union
+
+import defusedxml.minidom
+import defusedxml.sax
+
+
+class XMLEditor:
+    """
+    Editor for manipulating OOXML XML files with line-number-based node finding.
+
+    This class parses XML files and tracks the original line and column position
+    of each element. This enables finding nodes by their line number in the original
+    file, which is useful when working with Read tool output.
+
+    Attributes:
+        xml_path: Path to the XML file being edited
+        encoding: Detected encoding of the XML file ('ascii' or 'utf-8')
+        dom: Parsed DOM tree with parse_position attributes on elements
+    """
+
+    def __init__(self, xml_path):
+        """
+        Initialize with path to XML file and parse with line number tracking.
+
+        Args:
+            xml_path: Path to XML file to edit (str or Path)
+
+        Raises:
+            ValueError: If the XML file does not exist
+        """
+        self.xml_path = Path(xml_path)
+        if not self.xml_path.exists():
+            raise ValueError(f"XML file not found: {xml_path}")
+
+        with open(self.xml_path, "rb") as f:
+            header = f.read(200).decode("utf-8", errors="ignore")
+        self.encoding = "ascii" if 'encoding="ascii"' in header else "utf-8"
+
+        parser = _create_line_tracking_parser()
+        self.dom = defusedxml.minidom.parse(str(self.xml_path), parser)
+
+    def get_node(
+        self,
+        tag: str,
+        attrs: Optional[dict[str, str]] = None,
+        line_number: Optional[Union[int, range]] = None,
+        contains: Optional[str] = None,
+    ):
+        """
+        Get a DOM element by tag and identifier.
+
+        Finds an element by either its line number in the original file or by
+        matching attribute values. Exactly one match must be found.
+
+        Args:
+            tag: The XML tag name (e.g., "w:del", "w:ins", "w:r")
+            attrs: Dictionary of attribute name-value pairs to match (e.g., {"w:id": "1"})
+            line_number: Line number (int) or line range (range) in original XML file (1-indexed)
+            contains: Text string that must appear in any text node within the element.
+                      Supports both entity notation (&#8220;) and Unicode characters (\u201c).
+
+        Returns:
+            defusedxml.minidom.Element: The matching DOM element
+
+        Raises:
+            ValueError: If node not found or multiple matches found
+
+        Example:
+            elem = editor.get_node(tag="w:r", line_number=519)
+            elem = editor.get_node(tag="w:r", line_number=range(100, 200))
+            elem = editor.get_node(tag="w:del", attrs={"w:id": "1"})
+            elem = editor.get_node(tag="w:p", attrs={"w14:paraId": "12345678"})
+            elem = editor.get_node(tag="w:commentRangeStart", attrs={"w:id": "0"})
+            elem = editor.get_node(tag="w:p", contains="specific text")
+            elem = editor.get_node(tag="w:t", contains="&#8220;Agreement")  # Entity notation
+            elem = editor.get_node(tag="w:t", contains="\u201cAgreement")   # Unicode character
+        """
+        matches = []
+        for elem in self.dom.getElementsByTagName(tag):
+            # Check line_number filter
+            if line_number is not None:
+                parse_pos = getattr(elem, "parse_position", (None,))
+                elem_line = parse_pos[0]
+
+                # Handle both single line number and range
+                if isinstance(line_number, range):
+                    if elem_line not in line_number:
+                        continue
+                else:
+                    if elem_line != line_number:
+                        continue
+
+            # Check attrs filter
+            if attrs is not None:
+                if not all(
+                    elem.getAttribute(attr_name) == attr_value
+                    for attr_name, attr_value in attrs.items()
+                ):
+                    continue
+
+            # Check contains filter
+            if contains is not None:
+                elem_text = self._get_element_text(elem)
+                # Normalize the search string: convert HTML entities to Unicode characters
+                # This allows searching for both "&#8220;Rowan" and ""Rowan"
+                normalized_contains = html.unescape(contains)
+                if normalized_contains not in elem_text:
+                    continue
+
+            # If all applicable filters passed, this is a match
+            matches.append(elem)
+
+        if not matches:
+            # Build descriptive error message
+            filters = []
+            if line_number is not None:
+                line_str = (
+                    f"lines {line_number.start}-{line_number.stop - 1}"
+                    if isinstance(line_number, range)
+                    else f"line {line_number}"
+                )
+                filters.append(f"at {line_str}")
+            if attrs is not None:
+                filters.append(f"with attributes {attrs}")
+            if contains is not None:
+                filters.append(f"containing '{contains}'")
+
+            filter_desc = " ".join(filters) if filters else ""
+            base_msg = f"Node not found: <{tag}> {filter_desc}".strip()
+
+            # Add helpful hint based on filters used
+            if contains:
+                hint = "Text may be split across elements or use different wording."
+            elif line_number:
+                hint = "Line numbers may have changed if document was modified."
+            elif attrs:
+                hint = "Verify attribute values are correct."
+            else:
+                hint = "Try adding filters (attrs, line_number, or contains)."
+
+            raise ValueError(f"{base_msg}. {hint}")
+        if len(matches) > 1:
+            raise ValueError(
+                f"Multiple nodes found: <{tag}>. "
+                f"Add more filters (attrs, line_number, or contains) to narrow the search."
+            )
+        return matches[0]
+
+    def _get_element_text(self, elem):
+        """
+        Recursively extract all text content from an element.
+
+        Skips text nodes that contain only whitespace (spaces, tabs, newlines),
+        which typically represent XML formatting rather than document content.
+
+        Args:
+            elem: defusedxml.minidom.Element to extract text from
+
+        Returns:
+            str: Concatenated text from all non-whitespace text nodes within the element
+        """
+        text_parts = []
+        for node in elem.childNodes:
+            if node.nodeType == node.TEXT_NODE:
+                # Skip whitespace-only text nodes (XML formatting)
+                if node.data.strip():
+                    text_parts.append(node.data)
+            elif node.nodeType == node.ELEMENT_NODE:
+                text_parts.append(self._get_element_text(node))
+        return "".join(text_parts)
+
+    def replace_node(self, elem, new_content):
+        """
+        Replace a DOM element with new XML content.
+
+        Args:
+            elem: defusedxml.minidom.Element to replace
+            new_content: String containing XML to replace the node with
+
+        Returns:
+            List[defusedxml.minidom.Node]: All inserted nodes
+
+        Example:
+            new_nodes = editor.replace_node(old_elem, "<w:r><w:t>text</w:t></w:r>")
+        """
+        parent = elem.parentNode
+        nodes = self._parse_fragment(new_content)
+        for node in nodes:
+            parent.insertBefore(node, elem)
+        parent.removeChild(elem)
+        return nodes
+
+    def insert_after(self, elem, xml_content):
+        """
+        Insert XML content after a DOM element.
+
+        Args:
+            elem: defusedxml.minidom.Element to insert after
+            xml_content: String containing XML to insert
+
+        Returns:
+            List[defusedxml.minidom.Node]: All inserted nodes
+
+        Example:
+            new_nodes = editor.insert_after(elem, "<w:r><w:t>text</w:t></w:r>")
+        """
+        parent = elem.parentNode
+        next_sibling = elem.nextSibling
+        nodes = self._parse_fragment(xml_content)
+        for node in nodes:
+            if next_sibling:
+                parent.insertBefore(node, next_sibling)
+            else:
+                parent.appendChild(node)
+        return nodes
+
+    def insert_before(self, elem, xml_content):
+        """
+        Insert XML content before a DOM element.
+
+        Args:
+            elem: defusedxml.minidom.Element to insert before
+            xml_content: String containing XML to insert
+
+        Returns:
+            List[defusedxml.minidom.Node]: All inserted nodes
+
+        Example:
+            new_nodes = editor.insert_before(elem, "<w:r><w:t>text</w:t></w:r>")
+        """
+        parent = elem.parentNode
+        nodes = self._parse_fragment(xml_content)
+        for node in nodes:
+            parent.insertBefore(node, elem)
+        return nodes
+
+    def append_to(self, elem, xml_content):
+        """
+        Append XML content as a child of a DOM element.
+
+        Args:
+            elem: defusedxml.minidom.Element to append to
+            xml_content: String containing XML to append
+
+        Returns:
+            List[defusedxml.minidom.Node]: All inserted nodes
+
+        Example:
+            new_nodes = editor.append_to(elem, "<w:r><w:t>text</w:t></w:r>")
+        """
+        nodes = self._parse_fragment(xml_content)
+        for node in nodes:
+            elem.appendChild(node)
+        return nodes
+
+    def get_next_rid(self):
+        """Get the next available rId for relationships files."""
+        max_id = 0
+        for rel_elem in self.dom.getElementsByTagName("Relationship"):
+            rel_id = rel_elem.getAttribute("Id")
+            if rel_id.startswith("rId"):
+                try:
+                    max_id = max(max_id, int(rel_id[3:]))
+                except ValueError:
+                    pass
+        return f"rId{max_id + 1}"
+
+    def save(self):
+        """
+        Save the edited XML back to the file.
+
+        Serializes the DOM tree and writes it back to the original file path,
+        preserving the original encoding (ascii or utf-8).
+        """
+        content = self.dom.toxml(encoding=self.encoding)
+        self.xml_path.write_bytes(content)
+
+    def _parse_fragment(self, xml_content):
+        """
+        Parse XML fragment and return list of imported nodes.
+
+        Args:
+            xml_content: String containing XML fragment
+
+        Returns:
+            List of defusedxml.minidom.Node objects imported into this document
+
+        Raises:
+            AssertionError: If fragment contains no element nodes
+        """
+        # Extract namespace declarations from the root document element
+        root_elem = self.dom.documentElement
+        namespaces = []
+        if root_elem and root_elem.attributes:
+            for i in range(root_elem.attributes.length):
+                attr = root_elem.attributes.item(i)
+                if attr.name.startswith("xmlns"):  # type: ignore
+                    namespaces.append(f'{attr.name}="{attr.value}"')  # type: ignore
+
+        ns_decl = " ".join(namespaces)
+        wrapper = f"<root {ns_decl}>{xml_content}</root>"
+        fragment_doc = defusedxml.minidom.parseString(wrapper)
+        nodes = [
+            self.dom.importNode(child, deep=True)
+            for child in fragment_doc.documentElement.childNodes  # type: ignore
+        ]
+        elements = [n for n in nodes if n.nodeType == n.ELEMENT_NODE]
+        assert elements, "Fragment must contain at least one element"
+        return nodes
+
+
+def _create_line_tracking_parser():
+    """
+    Create a SAX parser that tracks line and column numbers for each element.
+
+    Monkey patches the SAX content handler to store the current line and column
+    position from the underlying expat parser onto each element as a parse_position
+    attribute (line, column) tuple.
+
+    Returns:
+        defusedxml.sax.xmlreader.XMLReader: Configured SAX parser
+    """
+
+    def set_content_handler(dom_handler):
+        def startElementNS(name, tagName, attrs):
+            orig_start_cb(name, tagName, attrs)
+            cur_elem = dom_handler.elementStack[-1]
+            cur_elem.parse_position = (
+                parser._parser.CurrentLineNumber,  # type: ignore
+                parser._parser.CurrentColumnNumber,  # type: ignore
+            )
+
+        orig_start_cb = dom_handler.startElementNS
+        dom_handler.startElementNS = startElementNS
+        orig_set_content_handler(dom_handler)
+
+    parser = defusedxml.sax.make_parser()
+    orig_set_content_handler = parser.setContentHandler
+    parser.setContentHandler = set_content_handler  # type: ignore
+    return parser
				`@@ -0,0 +1 @@`
				`# Make scripts directory a package for relative imports in tests`