Initial commit

2026-06-06 05:21:10 +00:00
commit 6664758a6d
493 changed files with 135653 additions and 0 deletions
--- a/skills/xlsx/scenes/edit-patterns.md
+++ b/skills/xlsx/scenes/edit-patterns.md
@@ -0,0 +1,222 @@
+# Edit Patterns — Reusable Code for Complex Edit Operations
+
+> Load this file ON DEMAND when you encounter grouping, sorting, block detection, or other complex edit patterns.
+> Do NOT load upfront for simple edits.
+
+---
+
+## Pattern: Block Detection
+
+Data is often split into independent blocks separated by blank rows or keyword rows (e.g., TOTAL, Subtotal).
+
+```python
+def detect_blocks(ws, col=1, start_row=1, end_row=None,
+                  separator='blank', keyword='TOTAL'):
+    """
+    Detect data block boundaries.
+    separator: 'blank' (empty row) or 'keyword' (row containing keyword)
+    Returns: list of (start_row, end_row) tuples
+    """
+    if end_row is None:
+        end_row = ws.max_row
+    blocks, block_start = [], None
+    for row in range(start_row, end_row + 1):
+        val = ws.cell(row=row, column=col).value
+        is_blank = val is None or (isinstance(val, str) and val.strip() == '')
+        is_kw = (separator == 'keyword' and
+                 isinstance(val, str) and keyword in str(val).upper())
+        if separator == 'blank':
+            if not is_blank and block_start is None:
+                block_start = row
+            elif is_blank and block_start is not None:
+                blocks.append((block_start, row - 1))
+                block_start = None
+        elif separator == 'keyword':
+            if is_kw:
+                if block_start:
+                    blocks.append((block_start, row))
+                    block_start = None
+            elif not is_blank and block_start is None:
+                block_start = row
+    if block_start:
+        blocks.append((block_start, end_row))
+    return blocks
+```
+
+---
+
+## Pattern: Pre-filter Null Rows
+
+Before any groupby/aggregation, filter out rows where key columns are empty.
+
+```python
+def pre_filter_rows(ws, key_cols, start_row, end_row):
+    """Return row numbers where ALL key columns are non-null."""
+    return [row for row in range(start_row, end_row + 1)
+            if all(normalize_cell_value(ws.cell(row=row, column=c).value) is not None
+                   for c in key_cols)]
+```
+
+---
+
+## Pattern: Sort with Formula Rewrite
+
+When sorting rows by swapping data (not using `insert_rows`), formulas must be regenerated with new row numbers.
+
+```python
+def sort_block_with_formulas(ws, block_rows, sort_col, formula_templates,
+                             descending=True):
+    """
+    Sort rows within a block, regenerating formulas.
+    formula_templates: dict {col_index: '=B{row}+C{row}'}
+    """
+    # 1. Read all row data + compute sort key
+    rows_data = []
+    for r in block_rows:
+        vals = {c: ws.cell(row=r, column=c).value for c in range(1, ws.max_column + 1)}
+        rows_data.append(vals)
+    rows_data.sort(key=lambda x: (x.get(sort_col) or 0), reverse=descending)
+
+    # 2. Write back with new row numbers
+    for i, rd in enumerate(rows_data):
+        target = block_rows[i]
+        for col, val in rd.items():
+            if col in formula_templates:
+                ws.cell(row=target, column=col).value = formula_templates[col].format(row=target)
+            else:
+                ws.cell(row=target, column=col).value = val
+```
+
+---
+
+## Pattern: Group-Merge (Aggregate by Key)
+
+Group rows by a key column. Take first-row values for some columns, sum for others.
+
+```python
+from collections import OrderedDict
+
+def group_merge_rows(ws, key_col, start_row, end_row, first_cols, sum_cols):
+    """
+    Group by key_col, merge rows.
+    first_cols: take value from first row in group
+    sum_cols: sum values across group
+    """
+    groups = OrderedDict()
+    for row in range(start_row, end_row + 1):
+        key = normalize_cell_value(ws.cell(row=row, column=key_col).value)
+        if key is None:
+            continue
+        if key not in groups:
+            groups[key] = {
+                'first': {c: ws.cell(row=row, column=c).value for c in first_cols},
+                'sums': {c: 0.0 for c in sum_cols},
+            }
+        for c in sum_cols:
+            v = normalize_cell_value(ws.cell(row=row, column=c).value)
+            if v is not None:
+                try:
+                    groups[key]['sums'][c] += float(v)
+                except (ValueError, TypeError):
+                    pass
+    return groups
+```
+
+---
+
+## Pattern: Group-Max-Keep-Ties
+
+Group by key, find max value per group, keep ALL rows that match the max (not just the first).
+
+```python
+from collections import defaultdict
+
+def group_max_keep_ties(rows, key_func, value_func, filter_null=True):
+    """
+    Keep all rows with the maximum value per group (ties preserved).
+    rows: list of row dicts or tuples
+    key_func: row → group key
+    value_func: row → comparable value (e.g., date)
+    """
+    groups = defaultdict(list)
+    for row in rows:
+        val = value_func(row)
+        if filter_null and val is None:
+            continue
+        groups[key_func(row)].append(row)
+
+    kept = []
+    for key, group in groups.items():
+        max_val = max(value_func(r) for r in group)
+        kept.extend(r for r in group if value_func(r) == max_val)
+    return kept
+```
+
+---
+
+## Pattern: Sequence Fill (Smart Numbering)
+
+Fill blank rows with "parent number + letter suffix" (e.g., 5 → 5a, 5b, ..., 5z, 5aa).
+
+```python
+import re
+
+def get_letter_suffix(n):
+    """0=a, 25=z, 26=aa, 27=ab..."""
+    if n < 26:
+        return chr(ord('a') + n)
+    return chr(ord('a') + (n // 26) - 1) + chr(ord('a') + (n % 26))
+
+def fill_sequential_labels(ws, col, start_row, end_row):
+    last_base, blank_count = None, 0
+    for row in range(start_row, end_row + 1):
+        val = ws.cell(row=row, column=col).value
+        if val is not None:
+            m = re.match(r'^(\d+)', str(val))
+            if m:
+                last_base = m.group(1)
+            blank_count = 0
+        else:
+            if last_base is not None:
+                ws.cell(row=row, column=col).value = f"{last_base}{get_letter_suffix(blank_count)}"
+                blank_count += 1
+```
+
+---
+
+## Pattern: Zero-as-Blank Output
+
+When merged/aggregated values of 0 should display as empty:
+
+```python
+# Method 1: Write None (best for programmatic verification)
+cell.value = computed_value if computed_value != 0 else None
+
+# Method 2: Number format (best for Excel viewing)
+cell.value = computed_value
+cell.number_format = '0.00;-0.00;""'  # positive;negative;zero(blank)
+```
+
+---
+
+## Pattern: Side-by-Side Table Detection
+
+Some sheets contain multiple independent tables arranged horizontally (separated by empty columns).
+
+```python
+def detect_side_by_side_tables(ws):
+    """Find column groups separated by all-null columns."""
+    tables = []
+    current_start = None
+    for col in range(1, ws.max_column + 1):
+        has_data = any(ws.cell(row=r, column=col).value is not None
+                       for r in range(1, ws.max_row + 1))
+        if has_data and current_start is None:
+            current_start = col
+        elif not has_data and current_start is not None:
+            tables.append((current_start, col - 1))
+            current_start = None
+    if current_start:
+        tables.append((current_start, ws.max_column))
+    return tables  # [(start_col, end_col), ...]
+```