7.0 KiB
Executable File
Edit Patterns — Reusable Code for Complex Edit Operations
Load this file ON DEMAND when you encounter grouping, sorting, block detection, or other complex edit patterns. Do NOT load upfront for simple edits.
Pattern: Block Detection
Data is often split into independent blocks separated by blank rows or keyword rows (e.g., TOTAL, Subtotal).
def detect_blocks(ws, col=1, start_row=1, end_row=None,
separator='blank', keyword='TOTAL'):
"""
Detect data block boundaries.
separator: 'blank' (empty row) or 'keyword' (row containing keyword)
Returns: list of (start_row, end_row) tuples
"""
if end_row is None:
end_row = ws.max_row
blocks, block_start = [], None
for row in range(start_row, end_row + 1):
val = ws.cell(row=row, column=col).value
is_blank = val is None or (isinstance(val, str) and val.strip() == '')
is_kw = (separator == 'keyword' and
isinstance(val, str) and keyword in str(val).upper())
if separator == 'blank':
if not is_blank and block_start is None:
block_start = row
elif is_blank and block_start is not None:
blocks.append((block_start, row - 1))
block_start = None
elif separator == 'keyword':
if is_kw:
if block_start:
blocks.append((block_start, row))
block_start = None
elif not is_blank and block_start is None:
block_start = row
if block_start:
blocks.append((block_start, end_row))
return blocks
Pattern: Pre-filter Null Rows
Before any groupby/aggregation, filter out rows where key columns are empty.
def pre_filter_rows(ws, key_cols, start_row, end_row):
"""Return row numbers where ALL key columns are non-null."""
return [row for row in range(start_row, end_row + 1)
if all(normalize_cell_value(ws.cell(row=row, column=c).value) is not None
for c in key_cols)]
Pattern: Sort with Formula Rewrite
When sorting rows by swapping data (not using insert_rows), formulas must be regenerated with new row numbers.
def sort_block_with_formulas(ws, block_rows, sort_col, formula_templates,
descending=True):
"""
Sort rows within a block, regenerating formulas.
formula_templates: dict {col_index: '=B{row}+C{row}'}
"""
# 1. Read all row data + compute sort key
rows_data = []
for r in block_rows:
vals = {c: ws.cell(row=r, column=c).value for c in range(1, ws.max_column + 1)}
rows_data.append(vals)
rows_data.sort(key=lambda x: (x.get(sort_col) or 0), reverse=descending)
# 2. Write back with new row numbers
for i, rd in enumerate(rows_data):
target = block_rows[i]
for col, val in rd.items():
if col in formula_templates:
ws.cell(row=target, column=col).value = formula_templates[col].format(row=target)
else:
ws.cell(row=target, column=col).value = val
Pattern: Group-Merge (Aggregate by Key)
Group rows by a key column. Take first-row values for some columns, sum for others.
from collections import OrderedDict
def group_merge_rows(ws, key_col, start_row, end_row, first_cols, sum_cols):
"""
Group by key_col, merge rows.
first_cols: take value from first row in group
sum_cols: sum values across group
"""
groups = OrderedDict()
for row in range(start_row, end_row + 1):
key = normalize_cell_value(ws.cell(row=row, column=key_col).value)
if key is None:
continue
if key not in groups:
groups[key] = {
'first': {c: ws.cell(row=row, column=c).value for c in first_cols},
'sums': {c: 0.0 for c in sum_cols},
}
for c in sum_cols:
v = normalize_cell_value(ws.cell(row=row, column=c).value)
if v is not None:
try:
groups[key]['sums'][c] += float(v)
except (ValueError, TypeError):
pass
return groups
Pattern: Group-Max-Keep-Ties
Group by key, find max value per group, keep ALL rows that match the max (not just the first).
from collections import defaultdict
def group_max_keep_ties(rows, key_func, value_func, filter_null=True):
"""
Keep all rows with the maximum value per group (ties preserved).
rows: list of row dicts or tuples
key_func: row → group key
value_func: row → comparable value (e.g., date)
"""
groups = defaultdict(list)
for row in rows:
val = value_func(row)
if filter_null and val is None:
continue
groups[key_func(row)].append(row)
kept = []
for key, group in groups.items():
max_val = max(value_func(r) for r in group)
kept.extend(r for r in group if value_func(r) == max_val)
return kept
Pattern: Sequence Fill (Smart Numbering)
Fill blank rows with "parent number + letter suffix" (e.g., 5 → 5a, 5b, ..., 5z, 5aa).
import re
def get_letter_suffix(n):
"""0=a, 25=z, 26=aa, 27=ab..."""
if n < 26:
return chr(ord('a') + n)
return chr(ord('a') + (n // 26) - 1) + chr(ord('a') + (n % 26))
def fill_sequential_labels(ws, col, start_row, end_row):
last_base, blank_count = None, 0
for row in range(start_row, end_row + 1):
val = ws.cell(row=row, column=col).value
if val is not None:
m = re.match(r'^(\d+)', str(val))
if m:
last_base = m.group(1)
blank_count = 0
else:
if last_base is not None:
ws.cell(row=row, column=col).value = f"{last_base}{get_letter_suffix(blank_count)}"
blank_count += 1
Pattern: Zero-as-Blank Output
When merged/aggregated values of 0 should display as empty:
# Method 1: Write None (best for programmatic verification)
cell.value = computed_value if computed_value != 0 else None
# Method 2: Number format (best for Excel viewing)
cell.value = computed_value
cell.number_format = '0.00;-0.00;""' # positive;negative;zero(blank)
Pattern: Side-by-Side Table Detection
Some sheets contain multiple independent tables arranged horizontally (separated by empty columns).
def detect_side_by_side_tables(ws):
"""Find column groups separated by all-null columns."""
tables = []
current_start = None
for col in range(1, ws.max_column + 1):
has_data = any(ws.cell(row=r, column=col).value is not None
for r in range(1, ws.max_row + 1))
if has_data and current_start is None:
current_start = col
elif not has_data and current_start is not None:
tables.append((current_start, col - 1))
current_start = None
if current_start:
tables.append((current_start, ws.max_column))
return tables # [(start_col, end_col), ...]