Initial commit
This commit is contained in:
45
skills/ppt/ooxml/scripts/unpack.py
Executable file
45
skills/ppt/ooxml/scripts/unpack.py
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Unpack and format XML contents of Office files (.docx, .pptx, .xlsx)"""
|
||||
|
||||
import argparse
|
||||
import random
|
||||
import zipfile
|
||||
import defusedxml.minidom
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Unpack an Office file into a directory")
|
||||
parser.add_argument("office_file", help="Office file (.docx/.pptx/.xlsx)")
|
||||
parser.add_argument("output_dir", help="Output directory")
|
||||
args = parser.parse_args()
|
||||
unpack_document(args.office_file, args.output_dir)
|
||||
|
||||
|
||||
def unpack_document(input_file, output_dir):
|
||||
"""Unpack an Office file into a directory and pretty-print all XML files."""
|
||||
output_path = Path(output_dir)
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with zipfile.ZipFile(input_file) as zf:
|
||||
zf.extractall(output_path)
|
||||
|
||||
for pattern in ["*.xml", "*.rels"]:
|
||||
for xml_file in output_path.rglob(pattern):
|
||||
pretty_print_xml(xml_file)
|
||||
|
||||
# For .docx files, suggest an RSID for tracked changes
|
||||
if str(input_file).endswith(".docx"):
|
||||
suggested_rsid = "".join(random.choices("0123456789ABCDEF", k=8))
|
||||
print(f"Suggested RSID for edit session: {suggested_rsid}")
|
||||
|
||||
|
||||
def pretty_print_xml(xml_file):
|
||||
"""Pretty-print a single XML file in place."""
|
||||
content = xml_file.read_text(encoding="utf-8")
|
||||
dom = defusedxml.minidom.parseString(content)
|
||||
xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="ascii"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user