46 lines
1.5 KiB
Python
Executable File
46 lines
1.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Unpack and format XML contents of Office files (.docx, .pptx, .xlsx)"""
|
|
|
|
import argparse
|
|
import random
|
|
import zipfile
|
|
import defusedxml.minidom
|
|
from pathlib import Path
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Unpack an Office file into a directory")
|
|
parser.add_argument("office_file", help="Office file (.docx/.pptx/.xlsx)")
|
|
parser.add_argument("output_dir", help="Output directory")
|
|
args = parser.parse_args()
|
|
unpack_document(args.office_file, args.output_dir)
|
|
|
|
|
|
def unpack_document(input_file, output_dir):
|
|
"""Unpack an Office file into a directory and pretty-print all XML files."""
|
|
output_path = Path(output_dir)
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
with zipfile.ZipFile(input_file) as zf:
|
|
zf.extractall(output_path)
|
|
|
|
for pattern in ["*.xml", "*.rels"]:
|
|
for xml_file in output_path.rglob(pattern):
|
|
pretty_print_xml(xml_file)
|
|
|
|
# For .docx files, suggest an RSID for tracked changes
|
|
if str(input_file).endswith(".docx"):
|
|
suggested_rsid = "".join(random.choices("0123456789ABCDEF", k=8))
|
|
print(f"Suggested RSID for edit session: {suggested_rsid}")
|
|
|
|
|
|
def pretty_print_xml(xml_file):
|
|
"""Pretty-print a single XML file in place."""
|
|
content = xml_file.read_text(encoding="utf-8")
|
|
dom = defusedxml.minidom.parseString(content)
|
|
xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="ascii"))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|