Initial commit
This commit is contained in:
BIN
skills/ppt/scripts/__pycache__/inventory.cpython-313.pyc
Executable file
BIN
skills/ppt/scripts/__pycache__/inventory.cpython-313.pyc
Executable file
Binary file not shown.
1337
skills/ppt/scripts/html2pptx.js
Executable file
1337
skills/ppt/scripts/html2pptx.js
Executable file
File diff suppressed because it is too large
Load Diff
512
skills/ppt/scripts/inventory.py
Executable file
512
skills/ppt/scripts/inventory.py
Executable file
@@ -0,0 +1,512 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extract structured text content from PowerPoint presentations.
|
||||
|
||||
Usage:
|
||||
python inventory.py input.pptx output.json [--issues-only]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pptx import Presentation
|
||||
from pptx.enum.text import PP_ALIGN
|
||||
from pptx.shapes.base import BaseShape
|
||||
|
||||
# Public type alias used by replace.py: slide_id -> {shape_id -> ShapeData}
|
||||
InventoryData = Dict[str, Dict[str, "ShapeData"]]
|
||||
|
||||
_EMU = 914400 # EMUs per inch
|
||||
_BULLET_NS = "{http://schemas.openxmlformats.org/drawingml/2006/main}"
|
||||
_ALIGN_MAP = {
|
||||
PP_ALIGN.CENTER: "CENTER",
|
||||
PP_ALIGN.RIGHT: "RIGHT",
|
||||
PP_ALIGN.JUSTIFY: "JUSTIFY",
|
||||
}
|
||||
|
||||
|
||||
def _is_cjk(ch: str) -> bool:
|
||||
"""True for full-width CJK characters (Chinese, Japanese, Korean, full-width forms)."""
|
||||
cp = ord(ch)
|
||||
return (
|
||||
0x4E00 <= cp <= 0x9FFF # CJK Unified Ideographs
|
||||
or 0x3400 <= cp <= 0x4DBF # CJK Extension A
|
||||
or 0x3040 <= cp <= 0x30FF # Hiragana / Katakana
|
||||
or 0xFF00 <= cp <= 0xFFEF # Full-width ASCII & half-width Katakana
|
||||
or 0xAC00 <= cp <= 0xD7AF # Hangul syllables
|
||||
)
|
||||
|
||||
|
||||
class ParagraphData:
|
||||
"""Text and formatting for one paragraph."""
|
||||
|
||||
def __init__(self, paragraph: Any):
|
||||
self.text: str = paragraph.text.strip()
|
||||
self.bullet: bool = False
|
||||
self.level: Optional[int] = None
|
||||
self.alignment: Optional[str] = None
|
||||
self.space_before: Optional[float] = None
|
||||
self.space_after: Optional[float] = None
|
||||
self.font_name: Optional[str] = None
|
||||
self.font_size: Optional[float] = None
|
||||
self.bold: Optional[bool] = None
|
||||
self.italic: Optional[bool] = None
|
||||
self.underline: Optional[bool] = None
|
||||
self.color: Optional[str] = None
|
||||
self.theme_color: Optional[str] = None
|
||||
self.line_spacing: Optional[float] = None
|
||||
|
||||
# Bullet detection
|
||||
pPr = getattr(getattr(paragraph, "_p", None), "pPr", None)
|
||||
if pPr is not None and (
|
||||
pPr.find(f"{_BULLET_NS}buChar") is not None
|
||||
or pPr.find(f"{_BULLET_NS}buAutoNum") is not None
|
||||
):
|
||||
self.bullet = True
|
||||
self.level = getattr(paragraph, "level", None)
|
||||
|
||||
# Alignment (omit LEFT — it's the default)
|
||||
align = getattr(paragraph, "alignment", None)
|
||||
if align in _ALIGN_MAP:
|
||||
self.alignment = _ALIGN_MAP[align]
|
||||
|
||||
# Spacing
|
||||
sb = getattr(paragraph, "space_before", None)
|
||||
if sb:
|
||||
self.space_before = sb.pt
|
||||
sa = getattr(paragraph, "space_after", None)
|
||||
if sa:
|
||||
self.space_after = sa.pt
|
||||
|
||||
# Font from first run
|
||||
if paragraph.runs:
|
||||
font = paragraph.runs[0].font
|
||||
self.font_name = font.name or None
|
||||
self.font_size = font.size.pt if font.size else None
|
||||
self.bold = font.bold
|
||||
self.italic = font.italic
|
||||
self.underline = font.underline
|
||||
try:
|
||||
self.color = str(font.color.rgb) if font.color.rgb else None
|
||||
except (AttributeError, TypeError):
|
||||
try:
|
||||
tc = font.color.theme_color
|
||||
self.theme_color = tc.name if tc else None
|
||||
except (AttributeError, TypeError):
|
||||
pass
|
||||
|
||||
# Line spacing (after font so font_size is available)
|
||||
ls = getattr(paragraph, "line_spacing", None)
|
||||
if ls is not None:
|
||||
if hasattr(ls, "pt"):
|
||||
self.line_spacing = round(ls.pt, 2)
|
||||
else:
|
||||
# Multiplier — convert to points using current font size
|
||||
self.line_spacing = round(ls * (self.font_size or 12.0), 2)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {"text": self.text}
|
||||
if self.bullet:
|
||||
d["bullet"] = True
|
||||
if self.level is not None:
|
||||
d["level"] = self.level
|
||||
if self.alignment:
|
||||
d["alignment"] = self.alignment
|
||||
for key in ("space_before", "space_after", "font_size", "line_spacing"):
|
||||
val = getattr(self, key)
|
||||
if val is not None:
|
||||
d[key] = val
|
||||
if self.font_name:
|
||||
d["font_name"] = self.font_name
|
||||
for key in ("bold", "italic", "underline"):
|
||||
val = getattr(self, key)
|
||||
if val is not None:
|
||||
d[key] = val
|
||||
if self.color:
|
||||
d["color"] = self.color
|
||||
elif self.theme_color:
|
||||
d["theme_color"] = self.theme_color
|
||||
return d
|
||||
|
||||
|
||||
class ShapeData:
|
||||
"""Position, formatting metadata, and text content for one shape."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
shape: BaseShape,
|
||||
absolute_left: Optional[int] = None,
|
||||
absolute_top: Optional[int] = None,
|
||||
slide: Optional[Any] = None,
|
||||
):
|
||||
self.shape = shape
|
||||
self.shape_id: str = "" # assigned after sorting
|
||||
|
||||
# Slide dimensions (for overflow checking)
|
||||
self.slide_width_emu: Optional[int] = None
|
||||
self.slide_height_emu: Optional[int] = None
|
||||
if slide:
|
||||
try:
|
||||
prs_xml = slide.part.package.presentation_part.presentation
|
||||
self.slide_width_emu = prs_xml.slide_width
|
||||
self.slide_height_emu = prs_xml.slide_height
|
||||
except (AttributeError, TypeError):
|
||||
pass
|
||||
|
||||
# Placeholder metadata
|
||||
self.placeholder_type: Optional[str] = None
|
||||
self.default_font_size: Optional[float] = None
|
||||
if getattr(shape, "is_placeholder", False):
|
||||
pf = shape.placeholder_format # type: ignore
|
||||
if pf and pf.type:
|
||||
self.placeholder_type = str(pf.type).split(".")[-1].split(" ")[0]
|
||||
if slide and hasattr(slide, "slide_layout"):
|
||||
self.default_font_size = _layout_font_size(shape, slide.slide_layout)
|
||||
|
||||
# Position in inches (use absolute coords for shapes inside groups)
|
||||
left_emu = absolute_left if absolute_left is not None else getattr(shape, "left", 0)
|
||||
top_emu = absolute_top if absolute_top is not None else getattr(shape, "top", 0)
|
||||
self.left = round(left_emu / _EMU, 2)
|
||||
self.top = round(top_emu / _EMU, 2)
|
||||
self.width = round(getattr(shape, "width", 0) / _EMU, 2)
|
||||
self.height = round(getattr(shape, "height", 0) / _EMU, 2)
|
||||
|
||||
# EMU positions kept for overflow arithmetic
|
||||
self.left_emu = left_emu
|
||||
self.top_emu = top_emu
|
||||
self.width_emu = getattr(shape, "width", 0)
|
||||
self.height_emu = getattr(shape, "height", 0)
|
||||
|
||||
# Issue detection
|
||||
self.frame_overflow_bottom: Optional[float] = None
|
||||
self.slide_overflow_right: Optional[float] = None
|
||||
self.slide_overflow_bottom: Optional[float] = None
|
||||
self.overlapping_shapes: Dict[str, float] = {}
|
||||
self.warnings: List[str] = []
|
||||
self._estimate_frame_overflow()
|
||||
self._calculate_slide_overflow()
|
||||
self._detect_bullet_issues()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Issue detection helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _default_font_size_pts(self) -> float:
|
||||
"""Best-effort default font size from theme styles."""
|
||||
if self.default_font_size:
|
||||
return self.default_font_size
|
||||
try:
|
||||
master = self.shape.part.slide_layout.slide_master # type: ignore
|
||||
style = "titleStyle" if (self.placeholder_type and "TITLE" in self.placeholder_type) else "bodyStyle"
|
||||
for child in master.element.iter():
|
||||
tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
|
||||
if tag == style:
|
||||
for elem in child.iter():
|
||||
if "sz" in elem.attrib:
|
||||
return int(elem.attrib["sz"]) / 100.0
|
||||
except Exception:
|
||||
pass
|
||||
return 14.0 # conservative fallback
|
||||
|
||||
def _estimate_frame_overflow(self) -> None:
|
||||
"""Estimate text overflow via character-count heuristic (no external deps)."""
|
||||
if not hasattr(self.shape, "text_frame"):
|
||||
return
|
||||
tf = self.shape.text_frame # type: ignore
|
||||
if not tf or not tf.paragraphs:
|
||||
return
|
||||
|
||||
# Usable area after text frame margins
|
||||
def e2i(v: Any) -> float:
|
||||
return (v or 0) / _EMU
|
||||
|
||||
margin_h = e2i(tf.margin_top) + e2i(tf.margin_bottom)
|
||||
margin_w = e2i(tf.margin_left) + e2i(tf.margin_right)
|
||||
if margin_h == 0:
|
||||
margin_h = 0.10 # PowerPoint default: ~0.05" top + 0.05" bottom
|
||||
if margin_w == 0:
|
||||
margin_w = 0.20 # PowerPoint default: ~0.1" left + 0.1" right
|
||||
usable_w = self.width - margin_w
|
||||
usable_h = self.height - margin_h
|
||||
if usable_w <= 0 or usable_h <= 0:
|
||||
return
|
||||
|
||||
default_size = self._default_font_size_pts()
|
||||
total_h = 0.0
|
||||
|
||||
for para in tf.paragraphs:
|
||||
if not para.text.strip():
|
||||
continue
|
||||
pd = ParagraphData(para)
|
||||
size_pt = pd.font_size or default_size
|
||||
|
||||
# Estimate text width: CJK chars ≈ 1.0× font_size pts, others ≈ 0.5×
|
||||
text_w_pts = sum(
|
||||
size_pt if _is_cjk(c) else size_pt * 0.5
|
||||
for c in para.text
|
||||
)
|
||||
usable_w_pts = usable_w * 72.0
|
||||
n_lines = max(1, -(-int(text_w_pts) // max(1, int(usable_w_pts)))) # ceiling div
|
||||
|
||||
line_h_in = (pd.line_spacing or size_pt) / 72.0
|
||||
total_h += (pd.space_before or 0) / 72.0
|
||||
total_h += n_lines * line_h_in
|
||||
total_h += (pd.space_after or 0) / 72.0
|
||||
|
||||
if total_h > usable_h + 0.05: # ignore sub-0.05" rounding noise
|
||||
self.frame_overflow_bottom = round(total_h - usable_h, 2)
|
||||
|
||||
def _calculate_slide_overflow(self) -> None:
|
||||
if self.slide_width_emu is None or self.slide_height_emu is None:
|
||||
return
|
||||
r = self.left_emu + self.width_emu - self.slide_width_emu
|
||||
if r > 0:
|
||||
v = round(r / _EMU, 2)
|
||||
if v > 0.01:
|
||||
self.slide_overflow_right = v
|
||||
b = self.top_emu + self.height_emu - self.slide_height_emu
|
||||
if b > 0:
|
||||
v = round(b / _EMU, 2)
|
||||
if v > 0.01:
|
||||
self.slide_overflow_bottom = v
|
||||
|
||||
def _detect_bullet_issues(self) -> None:
|
||||
if not hasattr(self.shape, "text_frame"):
|
||||
return
|
||||
for para in self.shape.text_frame.paragraphs: # type: ignore
|
||||
text = para.text.strip()
|
||||
if text and any(text.startswith(s + " ") for s in ("•", "●", "○")):
|
||||
self.warnings.append("manual_bullet_symbol: use proper bullet formatting")
|
||||
break
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def paragraphs(self) -> List[ParagraphData]:
|
||||
if not hasattr(self.shape, "text_frame"):
|
||||
return []
|
||||
return [ParagraphData(p) for p in self.shape.text_frame.paragraphs if p.text.strip()] # type: ignore
|
||||
|
||||
@property
|
||||
def has_any_issues(self) -> bool:
|
||||
return bool(
|
||||
self.frame_overflow_bottom is not None
|
||||
or self.slide_overflow_right is not None
|
||||
or self.slide_overflow_bottom is not None
|
||||
or self.overlapping_shapes
|
||||
or self.warnings
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {
|
||||
"left": self.left, "top": self.top,
|
||||
"width": self.width, "height": self.height,
|
||||
}
|
||||
if self.placeholder_type:
|
||||
d["placeholder_type"] = self.placeholder_type
|
||||
if self.default_font_size:
|
||||
d["default_font_size"] = self.default_font_size
|
||||
|
||||
overflow: Dict[str, Any] = {}
|
||||
if self.frame_overflow_bottom is not None:
|
||||
overflow["frame"] = {"overflow_bottom": self.frame_overflow_bottom}
|
||||
slide_ov: Dict[str, float] = {}
|
||||
if self.slide_overflow_right is not None:
|
||||
slide_ov["overflow_right"] = self.slide_overflow_right
|
||||
if self.slide_overflow_bottom is not None:
|
||||
slide_ov["overflow_bottom"] = self.slide_overflow_bottom
|
||||
if slide_ov:
|
||||
overflow["slide"] = slide_ov
|
||||
if overflow:
|
||||
d["overflow"] = overflow
|
||||
if self.overlapping_shapes:
|
||||
d["overlap"] = {"overlapping_shapes": self.overlapping_shapes}
|
||||
if self.warnings:
|
||||
d["warnings"] = self.warnings
|
||||
d["paragraphs"] = [p.to_dict() for p in self.paragraphs]
|
||||
return d
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Module-level helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _layout_font_size(shape: BaseShape, slide_layout: Any) -> Optional[float]:
|
||||
"""Extract default font size from the matching layout placeholder."""
|
||||
try:
|
||||
shape_type = shape.placeholder_format.type # type: ignore
|
||||
for ph in slide_layout.placeholders:
|
||||
if ph.placeholder_format.type == shape_type:
|
||||
for elem in ph.element.iter():
|
||||
if "defRPr" in elem.tag and (sz := elem.get("sz")):
|
||||
return float(sz) / 100.0
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _is_valid_shape(shape: BaseShape) -> bool:
|
||||
"""True if shape has meaningful text and is not a slide-number placeholder."""
|
||||
if not hasattr(shape, "text_frame"):
|
||||
return False
|
||||
tf = shape.text_frame # type: ignore
|
||||
if not tf or not tf.text.strip():
|
||||
return False
|
||||
if getattr(shape, "is_placeholder", False):
|
||||
pf = shape.placeholder_format # type: ignore
|
||||
if pf and pf.type:
|
||||
pt = str(pf.type).split(".")[-1].split(" ")[0]
|
||||
if pt == "SLIDE_NUMBER":
|
||||
return False
|
||||
if pt == "FOOTER" and tf.text.strip().isdigit():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _collect_shapes(shape: BaseShape, parent_left: int = 0, parent_top: int = 0):
|
||||
"""Yield (shape, abs_left, abs_top) tuples, recursing into GroupShapes."""
|
||||
if hasattr(shape, "shapes"): # GroupShape
|
||||
g_left = parent_left + getattr(shape, "left", 0)
|
||||
g_top = parent_top + getattr(shape, "top", 0)
|
||||
for child in shape.shapes: # type: ignore
|
||||
yield from _collect_shapes(child, g_left, g_top)
|
||||
elif _is_valid_shape(shape):
|
||||
yield (
|
||||
shape,
|
||||
parent_left + getattr(shape, "left", 0),
|
||||
parent_top + getattr(shape, "top", 0),
|
||||
)
|
||||
|
||||
|
||||
def _sort_by_position(shapes: List[ShapeData]) -> List[ShapeData]:
|
||||
"""Sort shapes top-to-bottom, left-to-right (0.5" row tolerance)."""
|
||||
if not shapes:
|
||||
return shapes
|
||||
shapes = sorted(shapes, key=lambda s: (s.top, s.left))
|
||||
result: List[ShapeData] = []
|
||||
row = [shapes[0]]
|
||||
row_top = shapes[0].top
|
||||
for s in shapes[1:]:
|
||||
if abs(s.top - row_top) <= 0.5:
|
||||
row.append(s)
|
||||
else:
|
||||
result.extend(sorted(row, key=lambda s: s.left))
|
||||
row = [s]
|
||||
row_top = s.top
|
||||
result.extend(sorted(row, key=lambda s: s.left))
|
||||
return result
|
||||
|
||||
|
||||
def _detect_overlaps(shapes: List[ShapeData]) -> None:
|
||||
"""Populate overlapping_shapes for all pairs with meaningful overlap."""
|
||||
for i, s1 in enumerate(shapes):
|
||||
for s2 in shapes[i + 1:]:
|
||||
ow = min(s1.left + s1.width, s2.left + s2.width) - max(s1.left, s2.left)
|
||||
oh = min(s1.top + s1.height, s2.top + s2.height) - max(s1.top, s2.top)
|
||||
if ow > 0.05 and oh > 0.05:
|
||||
area = round(ow * oh, 2)
|
||||
s1.overlapping_shapes[s2.shape_id] = area
|
||||
s2.overlapping_shapes[s1.shape_id] = area
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def extract_text_inventory(
|
||||
pptx_path: Path,
|
||||
prs: Optional[Any] = None,
|
||||
issues_only: bool = False,
|
||||
) -> InventoryData:
|
||||
"""Extract text from all slides.
|
||||
|
||||
Returns {slide-N: {shape-N: ShapeData}}, shapes sorted by visual position.
|
||||
Pass an existing Presentation object via `prs` to avoid re-loading.
|
||||
"""
|
||||
if prs is None:
|
||||
prs = Presentation(str(pptx_path))
|
||||
|
||||
inventory: InventoryData = {}
|
||||
|
||||
for slide_idx, slide in enumerate(prs.slides):
|
||||
raw = list(_collect_shapes_from_slide(slide))
|
||||
if not raw:
|
||||
continue
|
||||
|
||||
shape_data_list = [ShapeData(s, al, at, slide) for s, al, at in raw]
|
||||
sorted_shapes = _sort_by_position(shape_data_list)
|
||||
|
||||
for idx, sd in enumerate(sorted_shapes):
|
||||
sd.shape_id = f"shape-{idx}"
|
||||
|
||||
if len(sorted_shapes) > 1:
|
||||
_detect_overlaps(sorted_shapes)
|
||||
|
||||
if issues_only:
|
||||
sorted_shapes = [sd for sd in sorted_shapes if sd.has_any_issues]
|
||||
if not sorted_shapes:
|
||||
continue
|
||||
|
||||
inventory[f"slide-{slide_idx}"] = {sd.shape_id: sd for sd in sorted_shapes}
|
||||
|
||||
return inventory
|
||||
|
||||
|
||||
def _collect_shapes_from_slide(slide):
|
||||
"""Yield (shape, abs_left, abs_top) for all valid text shapes on a slide."""
|
||||
for shape in slide.shapes: # type: ignore
|
||||
yield from _collect_shapes(shape)
|
||||
|
||||
|
||||
def save_inventory(inventory: InventoryData, output_path: Path) -> None:
|
||||
"""Serialize inventory to a JSON file."""
|
||||
json_data = {
|
||||
slide_key: {k: sd.to_dict() for k, sd in shapes.items()}
|
||||
for slide_key, shapes in inventory.items()
|
||||
}
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Extract text inventory from a PowerPoint file.")
|
||||
parser.add_argument("input", help="Input .pptx file")
|
||||
parser.add_argument("output", help="Output .json file")
|
||||
parser.add_argument("--issues-only", action="store_true",
|
||||
help="Include only shapes with overflow/overlap issues")
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
if not input_path.exists():
|
||||
print(f"Error: File not found: {args.input}")
|
||||
sys.exit(1)
|
||||
if input_path.suffix.lower() != ".pptx":
|
||||
print("Error: Input must be a .pptx file")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
inventory = extract_text_inventory(input_path, issues_only=args.issues_only)
|
||||
output_path = Path(args.output)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
save_inventory(inventory, output_path)
|
||||
|
||||
total = sum(len(v) for v in inventory.values())
|
||||
if args.issues_only:
|
||||
print(f"Found {total} shapes with issues across {len(inventory)} slides → {args.output}")
|
||||
else:
|
||||
print(f"Found {total} text shapes across {len(inventory)} slides → {args.output}")
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"Error: {e}")
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
2046
skills/ppt/scripts/pdf.py
Executable file
2046
skills/ppt/scripts/pdf.py
Executable file
File diff suppressed because it is too large
Load Diff
144
skills/ppt/scripts/rearrange.py
Executable file
144
skills/ppt/scripts/rearrange.py
Executable file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Rearrange PowerPoint slides based on a sequence of indices.
|
||||
|
||||
Usage:
|
||||
python rearrange.py template.pptx output.pptx 0,34,34,50,52
|
||||
|
||||
Slides are 0-indexed. Indices can repeat to duplicate slides.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
from pptx import Presentation
|
||||
from pptx.oxml.ns import qn
|
||||
|
||||
|
||||
def copy_slide(src_prs: Presentation, dst_prs: Presentation, index: int, dst_layouts: dict) -> None:
|
||||
"""Append a copy of slide[index] from src_prs into dst_prs."""
|
||||
src_slide = src_prs.slides[index]
|
||||
|
||||
# Match layout by name across all masters; fall back to first available layout
|
||||
layout_name = src_slide.slide_layout.name
|
||||
dst_layout = dst_layouts.get(layout_name) or dst_prs.slide_layouts[0]
|
||||
|
||||
new_slide = dst_prs.slides.add_slide(dst_layout)
|
||||
|
||||
# Clear auto-added placeholder shapes
|
||||
for shape in list(new_slide.shapes):
|
||||
sp = shape.element
|
||||
sp.getparent().remove(sp)
|
||||
|
||||
# Copy ALL non-layout relationships from source and build old→new rId mapping.
|
||||
# This covers images, media, charts, hyperlinks, videos, and any other embedded content.
|
||||
# Without this, relationship attributes (r:embed, r:id, r:link) in copied shapes would
|
||||
# reference rIds that don't exist in the new slide, causing PowerPoint repair dialogs.
|
||||
R_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
SKIP_TYPES = {"slideLayout", "notesSlide", "slide"} # handled by python-pptx infrastructure
|
||||
rId_mapping: dict = {}
|
||||
for rel_id, rel in src_slide.part.rels.items():
|
||||
rel_short = rel.reltype.split("/")[-1]
|
||||
if rel_short in SKIP_TYPES:
|
||||
continue
|
||||
new_rId = new_slide.part.rels.get_or_add(rel.reltype, rel._target)
|
||||
rId_mapping[rel_id] = new_rId
|
||||
|
||||
# Copy all shape elements
|
||||
r_embed = f"{{{R_NS}}}embed"
|
||||
r_id = f"{{{R_NS}}}id"
|
||||
r_link = f"{{{R_NS}}}link"
|
||||
|
||||
for shape in src_slide.shapes:
|
||||
new_el = deepcopy(shape.element)
|
||||
new_slide.shapes._spTree.insert_element_before(new_el, "p:extLst")
|
||||
|
||||
# Remap ALL relationship references (images, charts, hyperlinks, video, etc.)
|
||||
for el in new_el.iter():
|
||||
for attr in (r_embed, r_id, r_link):
|
||||
old_rId = el.get(attr)
|
||||
if old_rId and old_rId in rId_mapping:
|
||||
el.set(attr, rId_mapping[old_rId])
|
||||
|
||||
# Copy slide-level background if defined.
|
||||
# p:bg lives inside p:cSld, not directly under p:sld.
|
||||
src_cSld = src_slide.element.find(qn("p:cSld"))
|
||||
dst_cSld = new_slide.element.find(qn("p:cSld"))
|
||||
if src_cSld is not None and dst_cSld is not None:
|
||||
src_bg = src_cSld.find(qn("p:bg"))
|
||||
if src_bg is not None:
|
||||
existing_bg = dst_cSld.find(qn("p:bg"))
|
||||
if existing_bg is not None:
|
||||
dst_cSld.remove(existing_bg)
|
||||
dst_cSld.insert(0, deepcopy(src_bg))
|
||||
|
||||
|
||||
def rearrange_presentation(
|
||||
template_path: Path, output_path: Path, slide_sequence: list[int]
|
||||
) -> None:
|
||||
src_prs = Presentation(template_path)
|
||||
total = len(src_prs.slides)
|
||||
|
||||
for idx in slide_sequence:
|
||||
if idx < 0 or idx >= total:
|
||||
raise ValueError(f"Slide index {idx} out of range (0–{total - 1})")
|
||||
|
||||
# Build a fresh presentation with the same dimensions
|
||||
dst_prs = Presentation(template_path)
|
||||
|
||||
# Remove all existing slides from dst_prs
|
||||
sldIdLst = dst_prs.slides._sldIdLst
|
||||
for sldId in list(sldIdLst):
|
||||
rId = sldId.get(qn("r:id")) # must use full namespace via qn(), not bare "r:id"
|
||||
if rId:
|
||||
dst_prs.part.drop_rel(rId)
|
||||
sldIdLst.remove(sldId)
|
||||
|
||||
# Search all slide masters for layout matching (templates may have multiple masters)
|
||||
all_layouts = {
|
||||
layout.name: layout
|
||||
for master in dst_prs.slide_masters
|
||||
for layout in master.slide_layouts
|
||||
}
|
||||
|
||||
# Append slides in requested order (duplicates included)
|
||||
for idx in slide_sequence:
|
||||
copy_slide(src_prs, dst_prs, idx, all_layouts)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
dst_prs.save(output_path)
|
||||
print(f"Saved {len(slide_sequence)} slides → {output_path}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Rearrange PowerPoint slides.",
|
||||
epilog="Example: python rearrange.py template.pptx output.pptx 0,34,34,50,52",
|
||||
)
|
||||
parser.add_argument("template", help="Path to template PPTX")
|
||||
parser.add_argument("output", help="Path for output PPTX")
|
||||
parser.add_argument("sequence", help="Comma-separated 0-based slide indices")
|
||||
args = parser.parse_args()
|
||||
|
||||
template_path = Path(args.template)
|
||||
if not template_path.exists():
|
||||
print(f"Error: Template not found: {args.template}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
slide_sequence = [int(x.strip()) for x in args.sequence.split(",")]
|
||||
except ValueError:
|
||||
print("Error: sequence must be comma-separated integers (e.g. 0,34,34,50,52)")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
rearrange_presentation(template_path, Path(args.output), slide_sequence)
|
||||
except ValueError as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
231
skills/ppt/scripts/replace.py
Executable file
231
skills/ppt/scripts/replace.py
Executable file
@@ -0,0 +1,231 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Apply text replacements to PowerPoint presentation.
|
||||
|
||||
Usage:
|
||||
python replace.py <input.pptx> <replacements.json> <output.pptx>
|
||||
|
||||
The replacements JSON should have the structure output by inventory.py.
|
||||
ALL text shapes identified by inventory.py will have their text cleared
|
||||
unless "paragraphs" is specified in the replacements for that shape.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from inventory import InventoryData, extract_text_inventory
|
||||
from pptx import Presentation
|
||||
from pptx.dml.color import RGBColor
|
||||
from pptx.enum.dml import MSO_THEME_COLOR
|
||||
from pptx.enum.text import PP_ALIGN
|
||||
from pptx.oxml.xmlchemy import OxmlElement
|
||||
from pptx.util import Pt
|
||||
|
||||
_ALIGN_MAP = {
|
||||
"LEFT": PP_ALIGN.LEFT,
|
||||
"CENTER": PP_ALIGN.CENTER,
|
||||
"RIGHT": PP_ALIGN.RIGHT,
|
||||
"JUSTIFY": PP_ALIGN.JUSTIFY,
|
||||
}
|
||||
|
||||
# Bullet indentation constants
|
||||
# marL = font_size × (1 + level) × 1.6 pts, converted to EMUs (1 pt = 12700 EMU)
|
||||
_INDENT_FACTOR = 1.6
|
||||
_EMU_PER_PT = 12700
|
||||
|
||||
|
||||
def _clear_paragraph_bullets(paragraph):
|
||||
"""Remove all bullet XML elements from a paragraph's pPr."""
|
||||
pPr = paragraph._element.get_or_add_pPr()
|
||||
for child in list(pPr):
|
||||
if any(child.tag.endswith(t) for t in ("buChar", "buNone", "buAutoNum", "buFont")):
|
||||
pPr.remove(child)
|
||||
return pPr
|
||||
|
||||
|
||||
def _apply_paragraph_properties(paragraph, para_data: Dict[str, Any]):
|
||||
text = para_data.get("text", "")
|
||||
pPr = _clear_paragraph_bullets(paragraph)
|
||||
|
||||
if para_data.get("bullet", False):
|
||||
level = para_data.get("level", 0)
|
||||
paragraph.level = level
|
||||
font_size = para_data.get("font_size", 18.0)
|
||||
pPr.attrib["marL"] = str(int(font_size * _INDENT_FACTOR * (1 + level) * _EMU_PER_PT))
|
||||
pPr.attrib["indent"] = str(int(-font_size * 0.8 * _EMU_PER_PT))
|
||||
buChar = OxmlElement("a:buChar")
|
||||
buChar.set("char", "•")
|
||||
pPr.append(buChar)
|
||||
if "alignment" not in para_data:
|
||||
paragraph.alignment = PP_ALIGN.LEFT
|
||||
else:
|
||||
pPr.attrib["marL"] = "0"
|
||||
pPr.attrib["indent"] = "0"
|
||||
pPr.insert(0, OxmlElement("a:buNone"))
|
||||
|
||||
if para_data.get("alignment") in _ALIGN_MAP:
|
||||
paragraph.alignment = _ALIGN_MAP[para_data["alignment"]]
|
||||
if "space_before" in para_data:
|
||||
paragraph.space_before = Pt(para_data["space_before"])
|
||||
if "space_after" in para_data:
|
||||
paragraph.space_after = Pt(para_data["space_after"])
|
||||
if "line_spacing" in para_data:
|
||||
paragraph.line_spacing = Pt(para_data["line_spacing"])
|
||||
|
||||
run = paragraph.runs[0] if paragraph.runs else paragraph.add_run()
|
||||
run.text = text
|
||||
_apply_font_properties(run, para_data)
|
||||
|
||||
|
||||
def _apply_font_properties(run, para_data: Dict[str, Any]):
|
||||
for attr in ("bold", "italic", "underline"):
|
||||
if attr in para_data:
|
||||
setattr(run.font, attr, para_data[attr])
|
||||
if "font_size" in para_data:
|
||||
run.font.size = Pt(para_data["font_size"])
|
||||
if "font_name" in para_data:
|
||||
run.font.name = para_data["font_name"]
|
||||
if "color" in para_data:
|
||||
h = para_data["color"].lstrip("#")
|
||||
if len(h) == 6:
|
||||
run.font.color.rgb = RGBColor(int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16))
|
||||
elif "theme_color" in para_data:
|
||||
try:
|
||||
run.font.color.theme_color = getattr(MSO_THEME_COLOR, para_data["theme_color"])
|
||||
except AttributeError:
|
||||
print(f" WARNING: Unknown theme color '{para_data['theme_color']}'")
|
||||
|
||||
|
||||
def _check_duplicate_keys(pairs):
|
||||
result = {}
|
||||
for key, value in pairs:
|
||||
if key in result:
|
||||
raise ValueError(f"Duplicate key in JSON: '{key}'")
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
def _validate_replacements(inventory: InventoryData, replacements: Dict) -> List[str]:
|
||||
errors = []
|
||||
for slide_key, shapes_data in replacements.items():
|
||||
if not slide_key.startswith("slide-"):
|
||||
continue
|
||||
if slide_key not in inventory:
|
||||
errors.append(f"Slide '{slide_key}' not found in inventory")
|
||||
continue
|
||||
for shape_key in shapes_data:
|
||||
if shape_key not in inventory[slide_key]:
|
||||
available = sorted(inventory[slide_key].keys())
|
||||
errors.append(
|
||||
f"Shape '{shape_key}' not found on '{slide_key}'. "
|
||||
f"Available: {', '.join(available)}"
|
||||
)
|
||||
return errors
|
||||
|
||||
|
||||
def apply_replacements(pptx_file: str, json_file: str, output_file: str):
|
||||
prs = Presentation(pptx_file)
|
||||
inventory = extract_text_inventory(Path(pptx_file), prs)
|
||||
|
||||
# Snapshot original overflow so we can detect if replacements make it worse
|
||||
original_overflow: Dict[str, Dict[str, float]] = {
|
||||
slide_key: {
|
||||
shape_key: sd.frame_overflow_bottom
|
||||
for shape_key, sd in shapes.items()
|
||||
if sd.frame_overflow_bottom is not None
|
||||
}
|
||||
for slide_key, shapes in inventory.items()
|
||||
}
|
||||
|
||||
with open(json_file) as f:
|
||||
replacements = json.load(f, object_pairs_hook=_check_duplicate_keys)
|
||||
|
||||
errors = _validate_replacements(inventory, replacements)
|
||||
if errors:
|
||||
print("ERROR: Invalid shapes in replacement JSON:")
|
||||
for e in errors:
|
||||
print(f" - {e}")
|
||||
raise ValueError(f"Found {len(errors)} validation error(s)")
|
||||
|
||||
shapes_cleared = shapes_replaced = 0
|
||||
|
||||
for slide_key, shapes_dict in inventory.items():
|
||||
if not slide_key.startswith("slide-"):
|
||||
continue
|
||||
for shape_key, shape_data in shapes_dict.items():
|
||||
if not shape_data.shape:
|
||||
continue
|
||||
tf = shape_data.shape.text_frame # type: ignore
|
||||
tf.clear()
|
||||
shapes_cleared += 1
|
||||
|
||||
para_list = replacements.get(slide_key, {}).get(shape_key, {}).get("paragraphs")
|
||||
if not para_list:
|
||||
continue
|
||||
shapes_replaced += 1
|
||||
# Inherit original font_size if not specified in replacement
|
||||
orig_paras = shape_data.paragraphs or []
|
||||
orig_font_size = orig_paras[0].get("font_size") if orig_paras else None
|
||||
for i, para_data in enumerate(para_list):
|
||||
p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
|
||||
if orig_font_size is not None and "font_size" not in para_data:
|
||||
para_data = {**para_data, "font_size": orig_font_size}
|
||||
_apply_paragraph_properties(p, para_data)
|
||||
|
||||
# Re-check overflow on the updated in-memory presentation.
|
||||
# Note: extract_text_inventory may add benign empty <a:solidFill/> elements
|
||||
# while reading font colors — these are harmless and ignored by PowerPoint.
|
||||
updated_inventory = extract_text_inventory(Path(pptx_file), prs)
|
||||
|
||||
overflow_errors: List[str] = []
|
||||
warnings: List[str] = []
|
||||
for slide_key, shapes_dict in updated_inventory.items():
|
||||
for shape_key, sd in shapes_dict.items():
|
||||
for w in sd.warnings:
|
||||
warnings.append(f"{slide_key}/{shape_key}: {w}")
|
||||
new_ov = sd.frame_overflow_bottom
|
||||
if new_ov is not None:
|
||||
old_ov = original_overflow.get(slide_key, {}).get(shape_key, 0.0)
|
||||
if new_ov > old_ov + 0.01:
|
||||
overflow_errors.append(
|
||||
f'{slide_key}/{shape_key}: overflow increased by {new_ov - old_ov:.2f}" '
|
||||
f'(was {old_ov:.2f}", now {new_ov:.2f}")'
|
||||
)
|
||||
|
||||
if overflow_errors or warnings:
|
||||
print("\nWARNING: Issues in replacement output:")
|
||||
for e in overflow_errors:
|
||||
print(f" overflow - {e}")
|
||||
for w in warnings:
|
||||
print(f" warning - {w}")
|
||||
|
||||
prs.save(output_file)
|
||||
print(f"Saved: {output_file}")
|
||||
print(f" Shapes cleared: {shapes_cleared}, replaced: {shapes_replaced}")
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 4:
|
||||
print(__doc__)
|
||||
sys.exit(1)
|
||||
|
||||
input_pptx, replacements_json, output_pptx = (
|
||||
Path(sys.argv[1]), Path(sys.argv[2]), Path(sys.argv[3])
|
||||
)
|
||||
for p in (input_pptx, replacements_json):
|
||||
if not p.exists():
|
||||
print(f"Error: File not found: {p}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
apply_replacements(str(input_pptx), str(replacements_json), str(output_pptx))
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"Error: {e}")
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
skills/ppt/scripts/tectonic
Executable file
BIN
skills/ppt/scripts/tectonic
Executable file
Binary file not shown.
352
skills/ppt/scripts/thumbnail.py
Executable file
352
skills/ppt/scripts/thumbnail.py
Executable file
@@ -0,0 +1,352 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Create thumbnail grids from PowerPoint presentation slides.
|
||||
|
||||
Creates a grid layout of slide thumbnails with configurable columns (max 6).
|
||||
Each grid contains up to cols×(cols+1) images. For presentations with more
|
||||
slides, multiple numbered grid files are created automatically.
|
||||
|
||||
The program outputs the names of all files created.
|
||||
|
||||
Output:
|
||||
- Single grid: {prefix}.jpg (if slides fit in one grid)
|
||||
- Multiple grids: {prefix}-1.jpg, {prefix}-2.jpg, etc.
|
||||
|
||||
Grid limits by column count:
|
||||
- 3 cols: max 12 slides per grid (3×4)
|
||||
- 4 cols: max 20 slides per grid (4×5)
|
||||
- 5 cols: max 30 slides per grid (5×6) [default]
|
||||
- 6 cols: max 42 slides per grid (6×7)
|
||||
|
||||
Usage:
|
||||
python thumbnail.py input.pptx [output_prefix] [--cols N] [--outline-placeholders]
|
||||
|
||||
Examples:
|
||||
python thumbnail.py presentation.pptx
|
||||
# Creates: thumbnails.jpg (using default prefix)
|
||||
# Outputs:
|
||||
# Created 1 grid(s):
|
||||
# - thumbnails.jpg
|
||||
|
||||
python thumbnail.py large-deck.pptx grid --cols 4
|
||||
# Creates: grid-1.jpg, grid-2.jpg, grid-3.jpg
|
||||
# Outputs:
|
||||
# Created 3 grid(s):
|
||||
# - grid-1.jpg
|
||||
# - grid-2.jpg
|
||||
# - grid-3.jpg
|
||||
|
||||
python thumbnail.py template.pptx analysis --outline-placeholders
|
||||
# Creates thumbnail grids with red outlines around text placeholders
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from inventory import extract_text_inventory
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from pptx import Presentation
|
||||
|
||||
# Constants
|
||||
THUMBNAIL_WIDTH = 300 # Fixed thumbnail width in pixels
|
||||
CONVERSION_DPI = 100 # DPI for PDF to image conversion
|
||||
MAX_COLS = 6 # Maximum number of columns
|
||||
DEFAULT_COLS = 5 # Default number of columns
|
||||
JPEG_QUALITY = 95 # JPEG compression quality
|
||||
|
||||
# Grid layout constants
|
||||
GRID_PADDING = 20 # Padding between thumbnails
|
||||
BORDER_WIDTH = 2 # Border width around thumbnails
|
||||
FONT_SIZE_RATIO = 0.12 # Font size as fraction of thumbnail width
|
||||
LABEL_PADDING_RATIO = 0.4 # Label padding as fraction of font size
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Create thumbnail grids from PowerPoint slides."
|
||||
)
|
||||
parser.add_argument("input", help="Input PowerPoint file (.pptx)")
|
||||
parser.add_argument(
|
||||
"output_prefix",
|
||||
nargs="?",
|
||||
default="thumbnails",
|
||||
help="Output prefix for image files (default: thumbnails, will create prefix.jpg or prefix-N.jpg)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cols",
|
||||
type=int,
|
||||
default=DEFAULT_COLS,
|
||||
help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--outline-placeholders",
|
||||
action="store_true",
|
||||
help="Outline text placeholders with a colored border",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
cols = min(args.cols, MAX_COLS)
|
||||
if args.cols > MAX_COLS:
|
||||
print(f"Warning: Columns limited to {MAX_COLS} (requested {args.cols})")
|
||||
|
||||
input_path = Path(args.input)
|
||||
if not input_path.is_file() or input_path.suffix.lower() != ".pptx":
|
||||
sys.exit(f"Error: Invalid PowerPoint file: {args.input}")
|
||||
|
||||
output_path = Path(f"{args.output_prefix}.jpg")
|
||||
print(f"Processing: {args.input}")
|
||||
|
||||
try:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_path = Path(temp_dir)
|
||||
|
||||
placeholder_regions = None
|
||||
slide_dimensions = None
|
||||
if args.outline_placeholders:
|
||||
print("Extracting placeholder regions...")
|
||||
placeholder_regions, slide_dimensions = get_placeholder_regions(input_path)
|
||||
if placeholder_regions:
|
||||
print(f"Found placeholders on {len(placeholder_regions)} slides")
|
||||
|
||||
prs = Presentation(str(input_path))
|
||||
total_slides = len(prs.slides)
|
||||
hidden_slides = {
|
||||
idx + 1
|
||||
for idx, slide in enumerate(prs.slides)
|
||||
if slide.element.get("show") == "0"
|
||||
}
|
||||
|
||||
hidden_info = f" ({len(hidden_slides)} hidden)" if hidden_slides else ""
|
||||
print(f"Found {total_slides} slides{hidden_info}")
|
||||
|
||||
slide_images = convert_to_images(input_path, temp_path, CONVERSION_DPI, total_slides, hidden_slides)
|
||||
if not slide_images:
|
||||
sys.exit("Error: No slides found")
|
||||
|
||||
grid_files = create_grids(
|
||||
slide_images, cols, THUMBNAIL_WIDTH, output_path,
|
||||
placeholder_regions, slide_dimensions,
|
||||
)
|
||||
|
||||
print(f"Created {len(grid_files)} grid(s):")
|
||||
for grid_file in grid_files:
|
||||
print(f" - {grid_file}")
|
||||
|
||||
except RuntimeError as e:
|
||||
sys.exit(f"Error: {e}")
|
||||
|
||||
|
||||
def create_hidden_slide_placeholder(size):
|
||||
"""Create placeholder image for hidden slides."""
|
||||
img = Image.new("RGB", size, color="#F0F0F0")
|
||||
draw = ImageDraw.Draw(img)
|
||||
line_width = max(5, min(size) // 100)
|
||||
draw.line([(0, 0), size], fill="#CCCCCC", width=line_width)
|
||||
draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width)
|
||||
return img
|
||||
|
||||
|
||||
def get_placeholder_regions(pptx_path):
|
||||
"""Extract ALL text regions from the presentation.
|
||||
|
||||
Returns a tuple of (placeholder_regions, slide_dimensions).
|
||||
text_regions is a dict mapping slide indices to lists of text regions.
|
||||
Each region is a dict with 'left', 'top', 'width', 'height' in inches.
|
||||
slide_dimensions is a tuple of (width_inches, height_inches).
|
||||
"""
|
||||
prs = Presentation(str(pptx_path))
|
||||
inventory = extract_text_inventory(pptx_path, prs)
|
||||
placeholder_regions = {}
|
||||
|
||||
slide_width_inches = (prs.slide_width or 9144000) / 914400.0
|
||||
slide_height_inches = (prs.slide_height or 5143500) / 914400.0
|
||||
|
||||
for slide_key, shapes in inventory.items():
|
||||
slide_idx = int(slide_key.split("-")[1])
|
||||
regions = [
|
||||
{"left": s.left, "top": s.top, "width": s.width, "height": s.height}
|
||||
for s in shapes.values()
|
||||
]
|
||||
if regions:
|
||||
placeholder_regions[slide_idx] = regions
|
||||
|
||||
return placeholder_regions, (slide_width_inches, slide_height_inches)
|
||||
|
||||
|
||||
def _pptx_to_pdf(pptx_path, temp_dir):
|
||||
"""Convert PPTX to PDF via LibreOffice. Returns path to the PDF file."""
|
||||
pdf_path = temp_dir / f"{pptx_path.stem}.pdf"
|
||||
result = subprocess.run(
|
||||
["soffice", "--headless", "--convert-to", "pdf", "--outdir", str(temp_dir), str(pptx_path)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0 or not pdf_path.exists():
|
||||
raise RuntimeError("PDF conversion failed")
|
||||
return pdf_path
|
||||
|
||||
|
||||
def _pdf_to_images(pdf_path, temp_dir, dpi):
|
||||
"""Convert PDF pages to JPEG images via pdftoppm. Returns sorted image paths."""
|
||||
result = subprocess.run(
|
||||
["pdftoppm", "-jpeg", "-r", str(dpi), str(pdf_path), str(temp_dir / "slide")],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError("Image conversion failed")
|
||||
return sorted(temp_dir.glob("slide-*.jpg"))
|
||||
|
||||
|
||||
def convert_to_images(pptx_path, temp_dir, dpi, total_slides, hidden_slides):
|
||||
"""Convert PowerPoint to images via PDF, inserting placeholders for hidden slides."""
|
||||
pdf_path = _pptx_to_pdf(pptx_path, temp_dir)
|
||||
visible_images = _pdf_to_images(pdf_path, temp_dir, dpi)
|
||||
|
||||
if not visible_images:
|
||||
return []
|
||||
|
||||
with Image.open(visible_images[0]) as img:
|
||||
placeholder_size = img.size
|
||||
|
||||
all_images = []
|
||||
visible_idx = 0
|
||||
for slide_num in range(1, total_slides + 1):
|
||||
if slide_num in hidden_slides:
|
||||
placeholder_path = temp_dir / f"hidden-{slide_num:03d}.jpg"
|
||||
create_hidden_slide_placeholder(placeholder_size).save(placeholder_path, "JPEG")
|
||||
all_images.append(placeholder_path)
|
||||
else:
|
||||
if visible_idx < len(visible_images):
|
||||
all_images.append(visible_images[visible_idx])
|
||||
visible_idx += 1
|
||||
|
||||
return all_images
|
||||
|
||||
|
||||
def create_grids(
|
||||
image_paths,
|
||||
cols,
|
||||
width,
|
||||
output_path,
|
||||
placeholder_regions=None,
|
||||
slide_dimensions=None,
|
||||
):
|
||||
"""Create multiple thumbnail grids from slide images, max cols×(cols+1) images per grid."""
|
||||
max_images_per_grid = cols * (cols + 1)
|
||||
grid_files = []
|
||||
total_images = len(image_paths)
|
||||
|
||||
for chunk_idx, start_idx in enumerate(range(0, total_images, max_images_per_grid)):
|
||||
chunk_images = image_paths[start_idx: start_idx + max_images_per_grid]
|
||||
|
||||
grid = create_grid(chunk_images, cols, width, start_idx, placeholder_regions, slide_dimensions)
|
||||
|
||||
if total_images <= max_images_per_grid:
|
||||
grid_filename = output_path
|
||||
else:
|
||||
grid_filename = output_path.parent / f"{output_path.stem}-{chunk_idx + 1}{output_path.suffix}"
|
||||
|
||||
grid_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||
grid.save(str(grid_filename), quality=JPEG_QUALITY)
|
||||
grid_files.append(str(grid_filename))
|
||||
|
||||
return grid_files
|
||||
|
||||
|
||||
def create_grid(
|
||||
image_paths,
|
||||
cols,
|
||||
width,
|
||||
start_slide_num=0,
|
||||
placeholder_regions=None,
|
||||
slide_dimensions=None,
|
||||
):
|
||||
"""Create thumbnail grid from slide images with optional placeholder outlining."""
|
||||
font_size = int(width * FONT_SIZE_RATIO)
|
||||
label_padding = int(font_size * LABEL_PADDING_RATIO)
|
||||
|
||||
with Image.open(image_paths[0]) as img:
|
||||
aspect = img.height / img.width
|
||||
height = int(width * aspect)
|
||||
|
||||
rows = (len(image_paths) + cols - 1) // cols
|
||||
grid_w = cols * width + (cols + 1) * GRID_PADDING
|
||||
grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING
|
||||
|
||||
grid = Image.new("RGB", (grid_w, grid_h), "white")
|
||||
draw = ImageDraw.Draw(grid)
|
||||
|
||||
try:
|
||||
font = ImageFont.load_default(size=font_size)
|
||||
except Exception:
|
||||
font = ImageFont.load_default()
|
||||
|
||||
for i, img_path in enumerate(image_paths):
|
||||
row, col = i // cols, i % cols
|
||||
x = col * width + (col + 1) * GRID_PADDING
|
||||
y_base = row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING
|
||||
|
||||
label = f"{start_slide_num + i}"
|
||||
bbox = draw.textbbox((0, 0), label, font=font)
|
||||
text_w = bbox[2] - bbox[0]
|
||||
draw.text((x + (width - text_w) // 2, y_base + label_padding), label, fill="black", font=font)
|
||||
|
||||
y_thumbnail = y_base + label_padding + font_size + label_padding
|
||||
|
||||
with Image.open(img_path) as img:
|
||||
orig_w, orig_h = img.size
|
||||
|
||||
if placeholder_regions and (start_slide_num + i) in placeholder_regions:
|
||||
if img.mode != "RGBA":
|
||||
img = img.convert("RGBA")
|
||||
|
||||
regions = placeholder_regions[start_slide_num + i]
|
||||
if slide_dimensions:
|
||||
slide_w_in, slide_h_in = slide_dimensions
|
||||
else:
|
||||
slide_w_in = orig_w / CONVERSION_DPI
|
||||
slide_h_in = orig_h / CONVERSION_DPI
|
||||
|
||||
x_scale = orig_w / slide_w_in
|
||||
y_scale = orig_h / slide_h_in
|
||||
|
||||
overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
|
||||
overlay_draw = ImageDraw.Draw(overlay)
|
||||
stroke_width = max(5, min(orig_w, orig_h) // 150)
|
||||
|
||||
for region in regions:
|
||||
px_left = int(region["left"] * x_scale)
|
||||
px_top = int(region["top"] * y_scale)
|
||||
px_right = px_left + int(region["width"] * x_scale)
|
||||
px_bottom = px_top + int(region["height"] * y_scale)
|
||||
overlay_draw.rectangle(
|
||||
[(px_left, px_top), (px_right, px_bottom)],
|
||||
outline=(255, 0, 0, 255),
|
||||
width=stroke_width,
|
||||
)
|
||||
|
||||
img = Image.alpha_composite(img, overlay).convert("RGB")
|
||||
|
||||
img.thumbnail((width, height), Image.Resampling.LANCZOS)
|
||||
w, h = img.size
|
||||
tx = x + (width - w) // 2
|
||||
ty = y_thumbnail + (height - h) // 2
|
||||
grid.paste(img, (tx, ty))
|
||||
|
||||
if BORDER_WIDTH > 0:
|
||||
draw.rectangle(
|
||||
[(tx - BORDER_WIDTH, ty - BORDER_WIDTH), (tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1)],
|
||||
outline="gray",
|
||||
width=BORDER_WIDTH,
|
||||
)
|
||||
|
||||
return grid
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user