Files
2026-06-06 05:21:10 +00:00

46 lines
1.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""Unpack and format XML contents of Office files (.docx, .pptx, .xlsx)"""
import argparse
import random
import zipfile
import defusedxml.minidom
from pathlib import Path
def main():
parser = argparse.ArgumentParser(description="Unpack an Office file into a directory")
parser.add_argument("office_file", help="Office file (.docx/.pptx/.xlsx)")
parser.add_argument("output_dir", help="Output directory")
args = parser.parse_args()
unpack_document(args.office_file, args.output_dir)
def unpack_document(input_file, output_dir):
"""Unpack an Office file into a directory and pretty-print all XML files."""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(input_file) as zf:
zf.extractall(output_path)
for pattern in ["*.xml", "*.rels"]:
for xml_file in output_path.rglob(pattern):
pretty_print_xml(xml_file)
# For .docx files, suggest an RSID for tracked changes
if str(input_file).endswith(".docx"):
suggested_rsid = "".join(random.choices("0123456789ABCDEF", k=8))
print(f"Suggested RSID for edit session: {suggested_rsid}")
def pretty_print_xml(xml_file):
"""Pretty-print a single XML file in place."""
content = xml_file.read_text(encoding="utf-8")
dom = defusedxml.minidom.parseString(content)
xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="ascii"))
if __name__ == "__main__":
main()