Initial commit

2026-06-06 05:21:10 +00:00
commit 6664758a6d
493 changed files with 135653 additions and 0 deletions
--- a/skills/xlsx/scenes/advanced.md
+++ b/skills/xlsx/scenes/advanced.md
@@ -0,0 +1,271 @@
+# Scene: Advanced Operations
+
+## When This Applies
+Batch processing multiple files, handling very large datasets, data validation, conditional formatting, sheet protection, or other power-user features.
+
+---
+
+## Large File Handling (>100K rows)
+
+### Read-Only Mode
+```python
+from openpyxl import load_workbook
+
+# Memory-efficient reading — does NOT load entire file
+wb = load_workbook('huge.xlsx', read_only=True)
+ws = wb.active
+
+for row in ws.iter_rows(min_row=2, values_only=True):
+    process(row)  # Yields rows one at a time
+
+wb.close()  # MUST close read-only workbooks
+```
+
+### Write-Only Mode
+```python
+from openpyxl import Workbook
+
+wb = Workbook(write_only=True)
+ws = wb.create_sheet()
+
+# Write rows sequentially — cannot random-access cells
+for data_row in large_dataset:
+    ws.append(data_row)
+
+wb.save('output.xlsx')
+```
+
+### Chunked Processing with pandas
+```python
+# Read in chunks
+chunks = pd.read_excel('huge.xlsx', chunksize=10000)
+# Note: chunksize only works with read_csv, not read_excel
+
+# For Excel, read specific columns/rows
+df = pd.read_excel('huge.xlsx',
+    usecols=['A', 'C', 'E'],     # Only needed columns
+    nrows=50000,                   # Limit rows
+    dtype={'id': str}              # Prevent type inference overhead
+)
+```
+
+---
+
+## Batch Processing Multiple Files
+
+```python
+import os
+import glob
+import pandas as pd
+
+# Collect all Excel files
+files = glob.glob('data/*.xlsx')
+
+# Method 1: Concatenate into one DataFrame
+all_data = []
+for f in files:
+    df = pd.read_excel(f)
+    df['source_file'] = os.path.basename(f)
+    all_data.append(df)
+
+combined = pd.concat(all_data, ignore_index=True)
+combined.to_excel('combined.xlsx', index=False)
+
+# Method 2: One sheet per file
+wb = Workbook()
+wb.remove(wb.active)  # Remove default sheet
+
+for f in files:
+    df = pd.read_excel(f)
+    ws = wb.create_sheet(title=os.path.splitext(os.path.basename(f))[0][:31])
+    for r in dataframe_to_rows(df, index=False, header=True):
+        ws.append(r)
+
+wb.save('all_files.xlsx')
+```
+
+---
+
+## Data Validation (Dropdown Lists)
+
+```python
+from openpyxl.worksheet.datavalidation import DataValidation
+
+# Dropdown list
+dv = DataValidation(
+    type="list",
+    formula1='"High,Medium,Low"',
+    allow_blank=True,
+    showErrorMessage=True,
+    errorTitle="Invalid",
+    error="Please select High, Medium, or Low"
+)
+ws.add_data_validation(dv)
+dv.add('D5:D100')  # Apply to range
+
+# Number range validation
+dv_num = DataValidation(
+    type="whole",
+    operator="between",
+    formula1=1,
+    formula2=100,
+    errorTitle="Out of range",
+    error="Enter a number between 1 and 100"
+)
+ws.add_data_validation(dv_num)
+dv_num.add('E5:E100')
+
+# Date validation
+dv_date = DataValidation(
+    type="date",
+    operator="greaterThan",
+    formula1="2024-01-01"
+)
+ws.add_data_validation(dv_date)
+dv_date.add('F5:F100')
+```
+
+---
+
+## Conditional Formatting
+
+For full conditional formatting rules, color usage, and code examples → see **`engines/design.md §8`**.
+
+Quick reference for advanced-only patterns (FormulaRule for row-level highlighting):
+
+```python
+from openpyxl.formatting.rule import FormulaRule
+from openpyxl.styles import PatternFill
+
+# Formula-based: highlight entire row if status = "Overdue"
+ws.conditional_formatting.add('B5:H100',
+    FormulaRule(formula=['$G5="Overdue"'],
+               fill=PatternFill('solid', fgColor='FFEBEE')))
+
+# Note: Icon sets are NOT supported by openpyxl — use color fills instead
+```
+
+---
+
+## Sheet Protection
+
+```python
+# Protect sheet (allow select + sort, prevent edits)
+ws.protection.sheet = True
+ws.protection.password = 'mypassword'
+ws.protection.sort = True
+ws.protection.autoFilter = True
+
+# Unlock specific cells for user input
+from openpyxl.styles import Protection
+unlocked = Protection(locked=False)
+for row in range(5, 101):
+    ws.cell(row=row, column=4).protection = unlocked  # Column D is editable
+
+# Protect workbook structure (prevent adding/deleting sheets)
+wb.security.workbookPassword = 'structpass'
+wb.security.lockStructure = True
+```
+
+---
+
+## Named Ranges
+
+```python
+from openpyxl.workbook.defined_name import DefinedName
+
+# Create named range
+ref = f"'Data'!$B$5:$B$100"
+defn = DefinedName('SalesData', attr_text=ref)
+wb.defined_names.add(defn)
+
+# Use in formulas
+ws['H5'] = '=SUM(SalesData)'
+```
+
+---
+
+## Auto-Filter & Sort
+
+```python
+# Apply auto-filter
+ws.auto_filter.ref = 'B4:H100'
+
+# Add filter criteria (for saved state — user can change in Excel)
+ws.auto_filter.add_filter_column(0, ['Active', 'Pending'])
+
+# Sort (openpyxl can set sort state, but actual reordering
+# must be done in Python before writing)
+df = df.sort_values(['Category', 'Revenue'], ascending=[True, False])
+```
+
+---
+
+## Merged Cells
+
+```python
+# Merge cells
+ws.merge_cells('B2:H2')  # Title spanning full width
+
+# Write to merged range (write to top-left cell)
+ws['B2'] = 'Report Title'
+
+# Check existing merges before editing
+for merge_range in ws.merged_cells.ranges:
+    print(f"Merged: {merge_range}")
+
+# Unmerge if needed
+ws.unmerge_cells('B2:H2')
+```
+
+**Warning**: Never write to cells within a merged range except the top-left cell. This causes corruption.
+
+---
+
+## Performance Tips
+
+| Technique | When | Impact |
+|-----------|------|--------|
+| `read_only=True` | Reading files >50K rows | ~10x less memory |
+| `write_only=True` | Writing files >50K rows | ~5x faster |
+| `usecols` parameter | Only need specific columns | Faster read |
+| Avoid `ws.cell()` in tight loops | Use `ws.append()` instead | Faster write |
+| Batch style application | Apply to ranges, not cell-by-cell | Faster formatting |
+| `data_only=True` for analysis | Need values not formulas | Faster read |
+
+---
+
+## VBA Module Inspection
+
+When working with `.xlsm` files, you can read and list VBA modules:
+
+```python
+from openpyxl import load_workbook
+import zipfile
+import os
+
+def list_vba_modules(filepath):
+    """List all VBA modules in an .xlsm file."""
+    if not filepath.endswith(('.xlsm', '.xlsb')):
+        return {"has_vba": False, "modules": []}
+    
+    modules = []
+    try:
+        with zipfile.ZipFile(filepath, 'r') as zf:
+            vba_files = [f for f in zf.namelist() if f.startswith('xl/vbaProject')]
+            if not vba_files:
+                return {"has_vba": False, "modules": []}
+            
+            # Read with keep_vba to access vba_archive
+            wb = load_workbook(filepath, keep_vba=True)
+            if wb.vba_archive:
+                for name in wb.vba_archive.namelist():
+                    modules.append(name)
+            wb.close()
+    except Exception as e:
+        return {"has_vba": False, "error": str(e)}
+    
+    return {"has_vba": True, "modules": modules}
+```
+
+Use this to inspect before editing — know what VBA exists before you touch the file.
--- a/skills/xlsx/scenes/analyze-recipes.md
+++ b/skills/xlsx/scenes/analyze-recipes.md
@@ -0,0 +1,234 @@
+# Analyze Recipes — Code Patterns for Data Analysis
+
+> Load this file ON DEMAND when you need specific code patterns. Do NOT load upfront.
+
+---
+
+## Load & Explore
+
+```python
+import pandas as pd
+
+df = pd.read_excel('input.xlsx')  # or read_csv, read_json
+# Multi-sheet: pd.read_excel('input.xlsx', sheet_name=None) → dict
+
+print(f"Shape: {df.shape}")
+print(f"Columns: {list(df.columns)}")
+print(f"Dtypes:\n{df.dtypes}")
+print(f"Nulls:\n{df.isnull().sum()}")
+print(f"Duplicates: {df.duplicated().sum()}")
+print(f"\nDescribe:\n{df.describe()}")
+```
+
+---
+
+## Aggregation & Grouping
+
+```python
+summary = df.groupby('Category').agg(
+    total=('Revenue', 'sum'),
+    avg=('Revenue', 'mean'),
+    count=('Revenue', 'count'),
+    max_val=('Revenue', 'max')
+).round(2)
+
+pivot = df.pivot_table(
+    values='Amount', index='Category', columns='Quarter',
+    aggfunc='sum', margins=True
+)
+```
+
+---
+
+## Time Series
+
+```python
+df['date'] = pd.to_datetime(df['date'])
+monthly = df.resample('M', on='date').agg({'revenue': 'sum', 'orders': 'count'})
+monthly['growth'] = monthly['revenue'].pct_change()
+monthly['rolling_3m'] = monthly['revenue'].rolling(3).mean()
+```
+
+---
+
+## Comparison / Diff
+
+```python
+df1 = pd.read_excel('this_month.xlsx')
+df2 = pd.read_excel('last_month.xlsx')
+merged = df1.merge(df2, on='ID', suffixes=('_new', '_old'))
+merged['change'] = merged['value_new'] - merged['value_old']
+merged['change_pct'] = (merged['change'] / merged['value_old'] * 100).round(1)
+```
+
+---
+
+## Statistical Analysis
+
+```python
+stats = df.describe().T
+stats['median'] = df.median()
+stats['skew'] = df.skew()
+corr = df.select_dtypes(include='number').corr().round(3)
+top_10 = df.nlargest(10, 'Revenue')
+bottom_10 = df.nsmallest(10, 'Revenue')
+```
+
+---
+
+## Data Cleaning
+
+```python
+df = df.drop_duplicates()
+df['amount'] = df['amount'].fillna(0)
+df['name'] = df['name'].fillna('Unknown')
+df['date'] = pd.to_datetime(df['date'], errors='coerce')
+df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
+
+# Remove outliers (IQR)
+Q1, Q3 = df['value'].quantile([0.25, 0.75])
+IQR = Q3 - Q1
+df = df[(df['value'] >= Q1 - 1.5*IQR) & (df['value'] <= Q3 + 1.5*IQR)]
+```
+
+---
+
+## Bridge Pattern: pandas → openpyxl
+
+```python
+from openpyxl import Workbook
+from openpyxl.utils.dataframe import dataframe_to_rows
+
+wb = Workbook()
+ws = wb.active
+ws.title = "Analysis"
+
+for r_idx, row in enumerate(dataframe_to_rows(summary, index=True, header=True), 1):
+    for c_idx, value in enumerate(row, 1):
+        ws.cell(row=r_idx + 3, column=c_idx + 1, value=value)
+```
+
+---
+
+## KPI Summary Card
+
+```python
+kpis = [
+    ('Total Revenue', total_revenue, '$#,##0'),
+    ('Avg Order Value', avg_order, '$#,##0.00'),
+    ('Growth Rate', growth_rate, '0.0%'),
+    ('Total Orders', total_orders, '#,##0'),
+]
+col = 2
+for label, value, fmt in kpis:
+    ws.cell(row=3, column=col, value=label)
+    ws.cell(row=4, column=col, value=value)
+    ws.cell(row=4, column=col).number_format = fmt
+    col += 3
+```
+
+---
+
+## Cross-Validation Review Sheet
+
+```python
+review_ws = wb.create_sheet("Review")
+review_ws.sheet_properties.tabColor = "FFC000"
+
+checks = [
+    ["Check", "Expected", "Actual", "Status"],
+    ["Total Revenue", "=SUM(Data!B2:B100)", "=Summary!B10", '=IF(B2=C2,"✓ PASS","✗ FAIL")'],
+    ["Row Count", "=COUNTA(Data!A:A)-1", "=Summary!B3", '=IF(B3=C3,"✓ PASS","✗ FAIL")'],
+]
+for i, row in enumerate(checks, 1):
+    for j, val in enumerate(row, 1):
+        review_ws.cell(row=i, column=j, value=val)
+```
+
+---
+
+## xlsx.py Pivot Workflow
+
+```bash
+python3 "$XLSX_SKILL_DIR/xlsx.py" inspect data.xlsx --pretty
+python3 "$XLSX_SKILL_DIR/xlsx.py" pivot data.xlsx output.xlsx \
+    --source "Data!A1:F500" \
+    --rows "Product,Region" \
+    --values "Revenue:sum,Units:count" \
+    --location "Summary!A3" \
+    --style "finance" \
+    --chart "bar"
+python3 "$XLSX_SKILL_DIR/xlsx.py" validate output.xlsx
+```
+
+### PivotTable Best Practices
+- Source data: first row must have unique, non-blank headers
+- No merged cells or blank rows in source range
+- Place pivot on a dedicated sheet, position at A3 or B2
+- Row axis: primary grouping; Column axis: ≤10 distinct values
+- Values: numeric measures only
+
+### PivotTable Troubleshooting
+| Symptom | Remedy |
+|---------|--------|
+| "Field not found" | Check header spelling via `inspect` |
+| PivotTable empty | Ensure `--source` covers all data rows |
+| `validate` reports pivot errors | Critical — must fix |
+| `validate` reports `pass_with_warnings` | Safe to deliver |
+
+---
+
+## Alternating Column Structure (Key-Value Pairs)
+
+When odd columns contain identifiers and even columns contain corresponding values (e.g., O=PartNo, P=Qty, Q=PartNo, R=Qty, ...):
+
+**Detection heuristic**:
+- Odd columns have repeated values or category codes
+- Even columns are numeric
+- Headers alternate between descriptive and quantitative names
+
+**Solution**: Use SUMIF across the combined key/value ranges:
+
+```python
+# Excel formula: =SUMIF(O2:W2, A2, P2:X2)
+# SUMIF matches position-by-position across multi-column ranges
+formula = f'=SUMIF(O{row}:W{row},A{row},P{row}:X{row})'
+```
+
+---
+
+## FIFO Allocation Formula (Cumulative Deduction)
+
+Scenario: Allocate limited inventory to order lines in sequence — each row gets what's left after previous rows consumed their share.
+
+**Formula template** (row N):
+```
+=MAX(0, MIN(OrderQty_N,
+    TotalInventory_for_key - SUM_of_already_allocated_above))
+```
+
+**Example** (H column = allocated qty):
+```python
+# Row 2 (first row): allocate up to available inventory
+f'=MIN(G2, SUMIFS(Sheet2!D:D, Sheet2!A:A, A2, Sheet2!B:B, D2))'
+
+# Row 3+ (subsequent): subtract already-allocated from rows above
+f'=MAX(0, MIN(G{r}, SUMIFS(Sheet2!D:D, Sheet2!A:A, A{r}, Sheet2!B:B, D{r})'
+f'  - SUMIFS(H$1:H{r-1}, A$1:A{r-1}, A{r}, D$1:D{r-1}, D{r})))'
+```
+
+**Key**: `SUMIFS(H$1:H{r-1}, ...)` creates a running total of already-allocated amounts, achieving row-by-row deduction.
+
+⚠️ This is a self-referencing formula pattern — openpyxl cannot verify it. Must open in Excel to confirm calculation.
+
+### Data Provenance Implementation
+
+```python
+src_ws = wb.create_sheet("Sources")
+src_ws.sheet_properties.tabColor = PRIMARY
+headers = ["Data Description", "Source Name", "Source URL", "Access Date"]
+for col, h in enumerate(headers, 1):
+    cell = src_ws.cell(row=1, column=col, value=h)
+    cell.font = Font(name=FONT_NAME, bold=HEADER_BOLD, color="FFFFFF")
+    cell.fill = PatternFill(start_color=PRIMARY, end_color=PRIMARY, fill_type="solid")
+```
--- a/skills/xlsx/scenes/analyze.md
+++ b/skills/xlsx/scenes/analyze.md
@@ -0,0 +1,95 @@
+# Scene: Data Analysis → Excel Output
+
+## When This Applies
+User wants to analyze data (statistics, trends, comparisons, pivots, aggregation) and receive results as an Excel file — possibly with charts, summary tables, or dashboards.
+
+This scene bridges **pandas analysis** with **openpyxl output**. The deliverable is always an .xlsx file.
+
+## Workflow
+
+```
+1. LOAD       → Read input data (CSV/XLSX/JSON/DB)
+2. EXPLORE    → Understand structure, quality, distributions
+3. ANALYZE    → Compute metrics, aggregations, statistical tests
+4. DESIGN     → Plan Excel output (sheets, charts, KPIs)
+5. BUILD      → Write analysis results to .xlsx with formatting
+6. CHART      → Add charts (Excel-native or embedded matplotlib)
+7. QA         → recalc → audit → scan → chart-verify
+8. PIVOT      → If needed, run xlsx.py pivot as final step
+9. VALIDATE   → validate → deliver
+```
+
+## Analysis Framework
+
+### Phase A: Problem Framing
+- What question is the user trying to answer?
+- Who will consume this output? (executive summary vs. detailed analysis)
+- What decisions will be made based on this data?
+
+### Phase B: Data Quality Assessment
+- Missing values: count, pattern (random vs. systematic)
+- Outliers: statistical detection (IQR, z-score)
+- Data types: numeric vs. categorical, date parsing
+- Duplicates: exact and fuzzy
+
+### Phase C: Exploratory Analysis
+- Distributions: histograms, box plots for key variables
+- Correlations: pairwise for numeric columns
+- Segmentation: group-by analysis on categorical dimensions
+- Time patterns: trends, seasonality if time-series data
+
+### Phase D: Insight Extraction
+- Rank findings by business impact, not statistical significance
+- Each insight must be actionable — "so what?" test
+- Cross-validate: check the same insight from a different angle
+
+### Phase E: Cross-Validation
+- Sanity check totals against known benchmarks
+- Verify computed metrics with alternative formulas
+- Document any assumptions or limitations in the output
+
+**Industry-specific frameworks:**
+- **Finance**: Variance analysis → trend decomposition → ratio analysis → peer comparison
+- **Marketing**: Funnel analysis → cohort analysis → attribution → ROI calculation
+- **Operations**: Throughput analysis → bottleneck identification → utilization rates → SLA compliance
+
+---
+
+## Multi-Sheet Report Layout
+
+```
+Sheet 1: "Dashboard"     — KPI cards + summary chart
+Sheet 2: "Detail"        — Full analysis table with formatting
+Sheet 3: "Charts"        — Additional visualizations
+Sheet 4: "Raw Data"      — Original data for reference (tab color: gray)
+```
+
+### KPI Summary Card Pattern
+
+Place 4-6 KPI metrics at the top of Dashboard sheet (row 3-4), each spaced 3 columns apart. Include label (small, gray) and value (large, bold, themed) with appropriate number format.
+
+---
+
+## PivotTable Decision
+
+| Situation | Use |
+|-----------|-----|
+| Need interactive PivotTable in Excel | `"$XLSX_SKILL_DIR/xlsx.py" pivot` |
+| Just need a summary table (static) | pandas `pivot_table` → openpyxl |
+| Simple aggregation (1 dimension) | pandas `groupby` → openpyxl |
+
+**Trigger phrases**: summarize, aggregate, group by, categorize, breakdown, distribution, tally, totals per, cross-tab, 汇总, 透视, 分类统计, 交叉分析
+
+---
+
+## Data Provenance
+
+When analysis uses external data, create a **"Sources" sheet** (tab color: `PRIMARY`) with columns: Data Description | Source Name | Source URL | Access Date.
+
+Skip when user provides all data directly.
+
+---
+
+## Code Recipes
+
+For specific code patterns (aggregation, time series, comparison, cleaning, bridge pattern), load `scenes/analyze-recipes.md` on demand.
--- a/skills/xlsx/scenes/convert.md
+++ b/skills/xlsx/scenes/convert.md
@@ -0,0 +1,133 @@
+# Scene: Format Conversion
+
+## When This Applies
+User wants to convert between tabular file formats: CSV↔XLSX, JSON→XLSX, TSV→XLSX, PDF table→XLSX, or XLSX→CSV/JSON.
+
+## Conversion Matrix
+
+| From | To | Method |
+|------|-----|--------|
+| CSV/TSV → XLSX | pandas read → openpyxl write with formatting | Most common |
+| JSON → XLSX | pandas json_normalize → openpyxl | Flatten nested structures |
+| XLSX → CSV | pandas read_excel → to_csv | Simple export |
+| XLSX → JSON | pandas read_excel → to_json | With orient parameter |
+| PDF table → XLSX | pdfplumber/tabula extract → openpyxl | Needs table detection |
+| Image table → XLSX | OCR → pandas → openpyxl | Last resort, error-prone |
+
+## CSV/TSV → XLSX
+
+```python
+import pandas as pd
+from openpyxl import Workbook
+from openpyxl.utils.dataframe import dataframe_to_rows
+
+# Read with encoding detection
+df = pd.read_csv('input.csv', encoding='utf-8')  
+# Common encodings: utf-8, gbk, gb2312, latin-1, shift_jis
+
+# Handle messy CSVs
+df = pd.read_csv('input.csv',
+    encoding='utf-8',
+    sep=',',              # or '\t', ';', '|'
+    skiprows=2,           # skip junk header rows
+    na_values=['N/A', '-', ''],
+    dtype=str,            # read everything as string first, convert later
+    on_bad_lines='skip'   # skip malformed rows
+)
+
+# Convert types after reading
+df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
+df['date'] = pd.to_datetime(df['date'], errors='coerce')
+
+# Write to Excel with formatting
+wb = Workbook()
+ws = wb.active
+
+# Write data starting at B4 (with theme formatting)
+for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True), 4):
+    for c_idx, value in enumerate(row, 2):
+        ws.cell(row=r_idx, column=c_idx, value=value)
+
+# Apply design tokens from engines/design.md
+# ...
+
+wb.save('output.xlsx')
+```
+
+## JSON → XLSX
+
+```python
+import pandas as pd
+import json
+
+# Flat JSON
+df = pd.read_json('input.json')
+
+# Nested JSON — flatten
+with open('input.json') as f:
+    data = json.load(f)
+
+# If it's a list of objects
+df = pd.json_normalize(data, max_level=2)
+
+# If nested with specific record path
+df = pd.json_normalize(data, record_path='items', meta=['id', 'name'])
+
+# Write to Excel...
+```
+
+## XLSX → CSV/JSON
+
+```python
+# To CSV
+df = pd.read_excel('input.xlsx', sheet_name='Data')
+df.to_csv('output.csv', index=False, encoding='utf-8-sig')  # utf-8-sig for Excel compatibility
+
+# To JSON
+df.to_json('output.json', orient='records', force_ascii=False, indent=2)
+
+# Multiple sheets → multiple CSVs
+sheets = pd.read_excel('input.xlsx', sheet_name=None)
+for name, df in sheets.items():
+    df.to_csv(f'output_{name}.csv', index=False, encoding='utf-8-sig')
+```
+
+## PDF Table → XLSX
+
+```python
+# Method 1: pdfplumber (preferred for most PDFs)
+import pdfplumber
+
+tables = []
+with pdfplumber.open('input.pdf') as pdf:
+    for page in pdf.pages:
+        page_tables = page.extract_tables()
+        for table in page_tables:
+            tables.extend(table)
+
+# Clean and convert to DataFrame
+df = pd.DataFrame(tables[1:], columns=tables[0])
+
+# Method 2: tabula-py (Java-based, good for complex tables)
+# import tabula
+# dfs = tabula.read_pdf('input.pdf', pages='all', multiple_tables=True)
+```
+
+## Encoding Gotchas
+
+| Scenario | Encoding | Tip |
+|----------|----------|-----|
+| Chinese data from Windows | `gbk` or `gb2312` | Try gbk first |
+| Japanese data | `shift_jis` or `cp932` | |
+| European data | `latin-1` or `cp1252` | |
+| Excel-generated CSV | `utf-8-sig` (has BOM) | pandas handles automatically |
+| Output CSV for Excel | Write with `utf-8-sig` | Prevents garbled Chinese in Excel |
+
+## Quality Checks After Conversion
+
+- [ ] Row count matches source
+- [ ] No garbled characters (encoding correct)
+- [ ] Numeric columns are numbers, not strings
+- [ ] Dates are date objects, not text
+- [ ] No blank rows/columns from source artifacts
+- [ ] Headers are in the correct row
--- a/skills/xlsx/scenes/create.md
+++ b/skills/xlsx/scenes/create.md
@@ -0,0 +1,105 @@
+# Scene: Create New Spreadsheet
+
+## When This Applies
+User wants to create a new Excel file from scratch — a table, template, schedule, report, or any structured data output.
+
+For financial models, also load `scenes/finance.md`.
+
+## Workflow
+
+```
+1. PLAN     → Identify all sheets, their structure, formulas, cross-references
+2. STYLE    → Load engines/design.md, apply default palette
+3. BUILD    → Create workbook, write data/formulas/formatting per sheet
+4. QA       → recalc → audit → scan → chart-verify (if charts)
+5. PIVOT    → If needed, run pivot command LAST
+6. VALIDATE → validate → exit 0 = deliver
+```
+
+## Layout & Styling
+
+All layout rules (Canvas Origin B2, column widths, row heights, margins) and styling (title/header/data/totals) are defined in **`engines/design.md`** — the single source of truth. Do not duplicate here.
+
+Quick reference for sheet structure:
+```
+Row 1:  [top margin]
+Row 2:  Title (B2)
+Row 3:  [spacer]
+Row 4:  Column headers
+Row 5+: Data rows
+Last+1: Totals row
+Last+3: Notes/sources
+```
+
+## Multi-Sheet Workbooks
+
+### Cross-Sheet References
+```python
+# Reference another sheet
+sheet['C5'] = "=Data!B10"
+
+# Sheet names with spaces need quotes
+sheet['C5'] = "='Sales Data'!B10"
+
+# Green font for cross-sheet links (Finance theme)
+sheet['C5'].font = Font(color="008000")
+```
+
+### Common Multi-Sheet Patterns
+- **Data + Summary**: Raw data on Sheet1, formulas/charts on Summary
+- **Monthly tabs**: Jan, Feb, Mar... + Annual Summary
+- **Input + Output**: Assumptions sheet + Calculations sheet + Dashboard
+
+## Template Patterns
+
+### Simple Data Table
+```python
+wb = Workbook()
+ws = wb.active
+ws.title = "Data"
+
+# Title + Headers + Data + Totals styling → see engines/design.md §11 Code Templates
+# Only show formula logic here:
+
+# Headers at B4
+headers = ['Product', 'Q1', 'Q2', 'Q3', 'Q4', 'Total']
+for col, h in enumerate(headers, 2):
+    cell = ws.cell(row=4, column=col, value=h)
+
+# Data rows starting at row 5
+# ...
+
+# Totals row
+total_row = last_data_row + 1
+ws.cell(row=total_row, column=2, value='Total')
+for col in range(3, 7):  # Q1-Q4
+    letter = get_column_letter(col)
+    ws.cell(row=total_row, column=col).value = f'=SUM({letter}5:{letter}{last_data_row})'
+
+# Grand total
+ws.cell(row=total_row, column=7).value = f'=SUM(C{total_row}:F{total_row})'
+```
+
+### Schedule / Calendar
+- Use merged cells for day headers
+- Conditional formatting for weekends (light gray fill)
+- Freeze panes: `ws.freeze_panes = 'C5'` (freeze header + left labels)
+
+### Checklist / Tracker
+- Checkbox column using data validation (`TRUE`/`FALSE`)
+- Status column with conditional formatting (green/amber/red)
+- Progress bar using data bar conditional formatting
+
+## Freeze Panes & Print
+
+```python
+# Freeze headers (row 4) and label column (col B)
+ws.freeze_panes = 'C5'  # Rows 1-4 and cols A-B stay visible
+
+# Print setup
+ws.page_setup.orientation = 'landscape'
+ws.page_setup.fitToWidth = 1
+ws.page_setup.fitToHeight = 0
+ws.print_area = 'B2:H50'
+ws.print_title_rows = '4:4'  # Repeat header on each page
+```
--- a/skills/xlsx/scenes/edit-patterns.md
+++ b/skills/xlsx/scenes/edit-patterns.md
@@ -0,0 +1,222 @@
+# Edit Patterns — Reusable Code for Complex Edit Operations
+
+> Load this file ON DEMAND when you encounter grouping, sorting, block detection, or other complex edit patterns.
+> Do NOT load upfront for simple edits.
+
+---
+
+## Pattern: Block Detection
+
+Data is often split into independent blocks separated by blank rows or keyword rows (e.g., TOTAL, Subtotal).
+
+```python
+def detect_blocks(ws, col=1, start_row=1, end_row=None,
+                  separator='blank', keyword='TOTAL'):
+    """
+    Detect data block boundaries.
+    separator: 'blank' (empty row) or 'keyword' (row containing keyword)
+    Returns: list of (start_row, end_row) tuples
+    """
+    if end_row is None:
+        end_row = ws.max_row
+    blocks, block_start = [], None
+    for row in range(start_row, end_row + 1):
+        val = ws.cell(row=row, column=col).value
+        is_blank = val is None or (isinstance(val, str) and val.strip() == '')
+        is_kw = (separator == 'keyword' and
+                 isinstance(val, str) and keyword in str(val).upper())
+        if separator == 'blank':
+            if not is_blank and block_start is None:
+                block_start = row
+            elif is_blank and block_start is not None:
+                blocks.append((block_start, row - 1))
+                block_start = None
+        elif separator == 'keyword':
+            if is_kw:
+                if block_start:
+                    blocks.append((block_start, row))
+                    block_start = None
+            elif not is_blank and block_start is None:
+                block_start = row
+    if block_start:
+        blocks.append((block_start, end_row))
+    return blocks
+```
+
+---
+
+## Pattern: Pre-filter Null Rows
+
+Before any groupby/aggregation, filter out rows where key columns are empty.
+
+```python
+def pre_filter_rows(ws, key_cols, start_row, end_row):
+    """Return row numbers where ALL key columns are non-null."""
+    return [row for row in range(start_row, end_row + 1)
+            if all(normalize_cell_value(ws.cell(row=row, column=c).value) is not None
+                   for c in key_cols)]
+```
+
+---
+
+## Pattern: Sort with Formula Rewrite
+
+When sorting rows by swapping data (not using `insert_rows`), formulas must be regenerated with new row numbers.
+
+```python
+def sort_block_with_formulas(ws, block_rows, sort_col, formula_templates,
+                             descending=True):
+    """
+    Sort rows within a block, regenerating formulas.
+    formula_templates: dict {col_index: '=B{row}+C{row}'}
+    """
+    # 1. Read all row data + compute sort key
+    rows_data = []
+    for r in block_rows:
+        vals = {c: ws.cell(row=r, column=c).value for c in range(1, ws.max_column + 1)}
+        rows_data.append(vals)
+    rows_data.sort(key=lambda x: (x.get(sort_col) or 0), reverse=descending)
+
+    # 2. Write back with new row numbers
+    for i, rd in enumerate(rows_data):
+        target = block_rows[i]
+        for col, val in rd.items():
+            if col in formula_templates:
+                ws.cell(row=target, column=col).value = formula_templates[col].format(row=target)
+            else:
+                ws.cell(row=target, column=col).value = val
+```
+
+---
+
+## Pattern: Group-Merge (Aggregate by Key)
+
+Group rows by a key column. Take first-row values for some columns, sum for others.
+
+```python
+from collections import OrderedDict
+
+def group_merge_rows(ws, key_col, start_row, end_row, first_cols, sum_cols):
+    """
+    Group by key_col, merge rows.
+    first_cols: take value from first row in group
+    sum_cols: sum values across group
+    """
+    groups = OrderedDict()
+    for row in range(start_row, end_row + 1):
+        key = normalize_cell_value(ws.cell(row=row, column=key_col).value)
+        if key is None:
+            continue
+        if key not in groups:
+            groups[key] = {
+                'first': {c: ws.cell(row=row, column=c).value for c in first_cols},
+                'sums': {c: 0.0 for c in sum_cols},
+            }
+        for c in sum_cols:
+            v = normalize_cell_value(ws.cell(row=row, column=c).value)
+            if v is not None:
+                try:
+                    groups[key]['sums'][c] += float(v)
+                except (ValueError, TypeError):
+                    pass
+    return groups
+```
+
+---
+
+## Pattern: Group-Max-Keep-Ties
+
+Group by key, find max value per group, keep ALL rows that match the max (not just the first).
+
+```python
+from collections import defaultdict
+
+def group_max_keep_ties(rows, key_func, value_func, filter_null=True):
+    """
+    Keep all rows with the maximum value per group (ties preserved).
+    rows: list of row dicts or tuples
+    key_func: row → group key
+    value_func: row → comparable value (e.g., date)
+    """
+    groups = defaultdict(list)
+    for row in rows:
+        val = value_func(row)
+        if filter_null and val is None:
+            continue
+        groups[key_func(row)].append(row)
+
+    kept = []
+    for key, group in groups.items():
+        max_val = max(value_func(r) for r in group)
+        kept.extend(r for r in group if value_func(r) == max_val)
+    return kept
+```
+
+---
+
+## Pattern: Sequence Fill (Smart Numbering)
+
+Fill blank rows with "parent number + letter suffix" (e.g., 5 → 5a, 5b, ..., 5z, 5aa).
+
+```python
+import re
+
+def get_letter_suffix(n):
+    """0=a, 25=z, 26=aa, 27=ab..."""
+    if n < 26:
+        return chr(ord('a') + n)
+    return chr(ord('a') + (n // 26) - 1) + chr(ord('a') + (n % 26))
+
+def fill_sequential_labels(ws, col, start_row, end_row):
+    last_base, blank_count = None, 0
+    for row in range(start_row, end_row + 1):
+        val = ws.cell(row=row, column=col).value
+        if val is not None:
+            m = re.match(r'^(\d+)', str(val))
+            if m:
+                last_base = m.group(1)
+            blank_count = 0
+        else:
+            if last_base is not None:
+                ws.cell(row=row, column=col).value = f"{last_base}{get_letter_suffix(blank_count)}"
+                blank_count += 1
+```
+
+---
+
+## Pattern: Zero-as-Blank Output
+
+When merged/aggregated values of 0 should display as empty:
+
+```python
+# Method 1: Write None (best for programmatic verification)
+cell.value = computed_value if computed_value != 0 else None
+
+# Method 2: Number format (best for Excel viewing)
+cell.value = computed_value
+cell.number_format = '0.00;-0.00;""'  # positive;negative;zero(blank)
+```
+
+---
+
+## Pattern: Side-by-Side Table Detection
+
+Some sheets contain multiple independent tables arranged horizontally (separated by empty columns).
+
+```python
+def detect_side_by_side_tables(ws):
+    """Find column groups separated by all-null columns."""
+    tables = []
+    current_start = None
+    for col in range(1, ws.max_column + 1):
+        has_data = any(ws.cell(row=r, column=col).value is not None
+                       for r in range(1, ws.max_row + 1))
+        if has_data and current_start is None:
+            current_start = col
+        elif not has_data and current_start is not None:
+            tables.append((current_start, col - 1))
+            current_start = None
+    if current_start:
+        tables.append((current_start, ws.max_column))
+    return tables  # [(start_col, end_col), ...]
+```
--- a/skills/xlsx/scenes/edit.md
+++ b/skills/xlsx/scenes/edit.md
@@ -0,0 +1,195 @@
+# Scene: Edit Existing Spreadsheet
+
+## When This Applies
+User provides an existing .xlsx/.xlsm file and wants to modify it — fill data, fix formulas, beautify layout, add sheets, restructure.
+
+## Core Principle: Preserve First
+
+**Study the existing file before making ANY changes.** The original format, style, and conventions take absolute priority over default guidelines.
+
+### VBA Preservation Rule
+When opening `.xlsm` files, **always** use `keep_vba=True`:
+```python
+wb = load_workbook('file.xlsm', keep_vba=True)
+# Edit data/formatting as usual
+wb.save('output.xlsm')  # VBA modules preserved
+```
+**Never** save a `.xlsm` as `.xlsx` unless the user explicitly requests macro removal. This silently destroys all VBA code.
+
+## Workflow
+
+```
+1. INSPECT   → Read the file, understand structure
+2. PLAN      → Identify what to change vs what to preserve
+3. BACKUP    → If destructive changes, suggest user keeps original
+4. MODIFY    → Make targeted changes
+5. QA        → recalc → audit → scan
+6. VALIDATE  → validate → deliver
+```
+
+## Step 1: Inspect the File
+
+### 1a. Structure Survey
+
+```python
+from openpyxl import load_workbook
+
+# Read with formulas preserved
+wb = load_workbook('input.xlsx')
+
+# Survey structure
+for name in wb.sheetnames:
+    ws = wb[name]
+    print(f"Sheet: {name}, Dimensions: {ws.dimensions}, "
+          f"Rows: {ws.max_row}, Cols: {ws.max_column}")
+
+# Check for existing styles
+sample = ws['B4']
+print(f"Font: {sample.font.name}, Size: {sample.font.size}, "
+      f"Bold: {sample.font.bold}, Fill: {sample.fill.fgColor}")
+```
+
+Also run `python3 "$XLSX_SKILL_DIR/xlsx.py" inspect input.xlsx --pretty` for structured overview.
+
+### 1b. Semantic Data Sampling (MANDATORY for merge/copy/aggregate operations)
+
+**Don't just print headers — print actual data rows to understand column semantics:**
+
+```python
+# Sample first 5 data rows from each sheet
+for name in wb.sheetnames:
+    ws = wb[name]
+    print(f"\n=== {name} ===")
+    for row in range(1, min(6, ws.max_row + 1)):
+        vals = []
+        for col in range(1, ws.max_column + 1):
+            v = ws.cell(row=row, column=col).value
+            if v is not None:
+                vals.append(f"{get_column_letter(col)}={v}")
+        if vals:
+            print(f"  Row {row}: {vals}")
+```
+
+### 1c. Cross-Sheet Column Semantic Mapping (MANDATORY before any merge/copy)
+
+**⚠️ NEVER copy columns by position index alone when merging sheets.**
+
+When two sheets have similar headers (e.g., both have columns A-V), the same column position may hold completely different data. Always:
+
+1. Print sample data (not just headers) from both source and target sheets
+2. For each column, identify the data type and value domain
+3. Create an explicit column mapping dict before writing any data
+
+```python
+# Example: source sheet E column = amount, target sheet E column = type code
+# → Do NOT copy source.E → target.E. Build semantic mapping first.
+column_mapping = {
+    'src_I': 'dst_E',   # amount → amount (different positions!)
+    'src_E': 'dst_I',   # type → type
+}
+```
+
+### 1d. Cell Value Normalization
+
+Canonical implementation lives in **`templates/base.py → normalize_cell_value()`**.
+Referenced by `edit-patterns.md` and `quality/pipeline.md`.
+
+```python
+from base import normalize_cell_value
+# normalize_cell_value(value) → None for blank/NBSP/ZWSP, otherwise original value
+```
+
+**Always use this when checking for empty cells** — `\xa0` (NBSP) looks blank but fails `is None`.
+
+## Step 2: Match Existing Styles
+
+When adding new cells/rows to a styled file, use **`copy_style()` from `templates/base.py`**:
+
+```python
+from base import copy_style
+
+# copy_style(source_cell, target_cell)
+# → copies font, fill, border, alignment, number_format
+```
+
+## Common Edit Operations
+
+### Fill / Complete Data
+```python
+# Add data to empty cells while preserving existing formatting
+for row in range(start, end + 1):
+    cell = ws.cell(row=row, column=col)
+    if cell.value is None:
+        cell.value = new_value
+        # Copy style from the cell above
+        copy_style(ws.cell(row=row-1, column=col), cell)
+```
+
+### Insert Rows / Columns
+```python
+# Insert 3 rows at position 10
+ws.insert_rows(10, amount=3)
+# Note: formulas referencing rows below 10 will auto-adjust
+
+# Insert column at position D
+ws.insert_cols(4)
+```
+
+**Warning**: Inserting/deleting rows can break chart references and named ranges. Verify after insertion.
+
+### Restructure Data
+```python
+# Move data from one layout to another
+# Read all data first, then rewrite
+data = []
+for row in ws.iter_rows(min_row=2, values_only=True):
+    data.append(row)
+
+# Clear and rewrite in new structure
+# ...
+```
+
+### Fix Formulas
+```python
+# Find cells with errors (after recalc)
+wb_data = load_workbook('input.xlsx', data_only=True)
+ws_data = wb_data.active
+
+wb_formula = load_workbook('input.xlsx')
+ws_formula = wb_formula.active
+
+for row in ws_data.iter_rows():
+    for cell in row:
+        if isinstance(cell.value, str) and cell.value.startswith('#'):
+            formula_cell = ws_formula[cell.coordinate]
+            print(f"Error at {cell.coordinate}: {cell.value}, Formula: {formula_cell.value}")
+```
+
+## Format Beautification
+
+When the user asks to "make it look better" or "format nicely":
+
+→ **Load `engines/design.md`** and apply its complete styling system (tokens, fonts, layout, colors).
+
+**But**: if the file already has a consistent style, enhance it rather than replacing it. Add what's missing (alignment, column widths, alternating fills) without changing existing colors or fonts. Use `copy_style()` (above) to match adjacent cells.
+
+## ⚠️ Dangerous Operations
+
+| Operation | Risk | Mitigation |
+|-----------|------|-----------|
+| `load_workbook(data_only=True)` then save | Formulas permanently lost | Never save after data_only read |
+| Delete rows/cols with formula dependencies | #REF! errors | Run audit after deletion |
+| Modify pivot table output with openpyxl | Corrupt pivotCache | Never — regenerate via xlsx.py pivot |
+| Overwrite merged cells | Layout breaks | Check `ws.merged_cells.ranges` first |
+| Manual row sort (swap row data) | Formulas still reference old row numbers | **Regenerate formula strings with target row number** (see Common Patterns → Sort with Formula Rewrite) |
+| Write SUM formula → verify with data_only | Get `None` — formula not evaluated | Compute value in Python for verification; write computed value or use recalc |
+
+---
+
+## Common Patterns
+
+For complex edit operations (grouping, sorting, block detection, merging, sequence fill, etc.):
+
+→ **Load `scenes/edit-patterns.md`** on demand.
+
+Available patterns: Block Detection, Pre-filter Null, Sort with Formula Rewrite, Group-Merge, Group-Max-Keep-Ties, Sequence Fill, Zero-as-Blank, Side-by-Side Table Detection.
--- a/skills/xlsx/scenes/finance.md
+++ b/skills/xlsx/scenes/finance.md
@@ -0,0 +1,318 @@
+# Financial Model Specialist Guide
+
+Load this reference when the task involves: financial statements, budgets, forecasts, DCF models, LBO, valuation, P&L, balance sheets, cash flow, or any investment banking deliverable.
+
+Also load `engines/design.md` → use **Finance** scene overrides (IB text color rules, section dividers).
+
+---
+
+## Financial Model Architecture
+
+### Standard Sheet Structure
+```
+Assumptions Sheet:
+  - All inputs, growth rates, margins, multiples
+  - Blue font for every changeable number
+  - Yellow background for key assumptions
+  - Source citations in adjacent cells or comments
+
+Income Statement / P&L:
+  - Revenue → COGS → Gross Profit → OpEx → EBIT → Interest → Tax → Net Income
+  - All values are formulas referencing Assumptions
+
+Balance Sheet:
+  - Assets = Liabilities + Equity (must balance!)
+  - Include balance check row: =Assets-Liabilities-Equity (should be 0)
+
+Cash Flow Statement:
+  - Operating → Investing → Financing → Net Change
+  - Ending Cash = Beginning Cash + Net Change
+
+Valuation / Output:
+  - DCF, comparables, or whatever model the user needs
+  - Green font for values pulled from other sheets
+```
+
+### Formula Construction Rules
+
+```python
+# ✅ CORRECT: Reference assumptions
+sheet['C10'] = '=C9*(1+Assumptions!$B$5)'  # Growth rate from assumptions
+
+# ❌ WRONG: Hardcoded magic number
+sheet['C10'] = '=C9*1.05'
+
+# ✅ CORRECT: Protected division
+sheet['D15'] = '=IF(C15=0,"-",B15/C15)'
+
+# ✅ CORRECT: Consistent formula across periods
+# If D10 = '=D9*(1+Assumptions!$B$5)' then E10 must follow the same pattern
+```
+
+### Assumptions Sheet Layout
+```
+B4: "Key Assumptions"           (section header, bold)
+B6: "Revenue Growth Rate"       C6: 0.05    (blue font, yellow bg)
+B7: "Gross Margin"              C7: 0.65    (blue font, yellow bg)
+B8: "OpEx as % Revenue"         C8: 0.30    (blue font, yellow bg)
+B9: "Tax Rate"                  C9: 0.21    (blue font, yellow bg)
+B10: "Discount Rate (WACC)"     C10: 0.10   (blue font, yellow bg)
+B11: "Terminal Growth Rate"     C11: 0.02   (blue font, yellow bg)
+```
+
+### Source Documentation for Hardcodes
+
+Every hardcoded input MUST have a source citation:
+
+```python
+# In cell comment
+ws['C6'].comment = Comment(
+    "Source: Company 10-K, FY2024, Page 45, Revenue Growth",
+    "Z.ai"
+)
+
+# Or in adjacent cell (if end of table)
+ws['D6'] = "Source: Management guidance, Q3 2024 earnings call"
+ws['D6'].font = Font(size=8, italic=True, color="808080")
+```
+
+---
+
+## Number Formatting (CRITICAL)
+
+> Finance-specific formats below. For general number formats, see `engines/design.md §10`.
+> Finance formats take priority when both apply.
+
+```python
+FINANCE_FORMATS = {
+    # Currency — zeros as dash, negatives in parentheses
+    'currency': '$#,##0;($#,##0);"-"',
+    'currency_k': '$#,##0,"K";($#,##0,"K");"-"',
+    'currency_mm': '$#,##0.0,,"M";($#,##0.0,,"M");"-"',
+
+    # Percentages — one decimal
+    'pct': '0.0%;(0.0%);"-"',
+
+    # Multiples — for EV/EBITDA, P/E etc.
+    'multiple': '0.0"x";(0.0"x");"-"',
+
+    # Years — MUST be text, not number (avoids "2,024")
+    'year': '@',
+
+    # Integer with thousands separator
+    'integer': '#,##0;(#,##0);"-"',
+
+    # Two decimal places
+    'decimal': '#,##0.00;(#,##0.00);"-"',
+
+    # Shares (millions)
+    'shares': '#,##0.0,,"M"',
+}
+
+# Apply
+cell.number_format = FINANCE_FORMATS['currency_mm']
+```
+
+**Always specify units in column headers**: "Revenue ($mm)", "Shares (M)", "Growth (%)"
+
+---
+
+## IB Model Layout Rules
+
+> All colors below use **design tokens from `engines/design.md`**. Do not hardcode hex values.
+> Finance-specific overrides (IB text color rules, section dividers) are in `design.md §2.4`.
+
+### Section Headers
+```python
+# Dark background, white bold text, merged across data width
+# Uses PRIMARY from design.md (or Finance palette PRIMARY from design.md)
+ws.merge_cells('B10:H10')
+ws['B10'] = 'Income Statement'
+ws['B10'].fill = PatternFill('solid', fgColor=PRIMARY)
+ws['B10'].font = Font(name=FONT_NAME, size=12, bold=HEADER_BOLD, color='FFFFFF')
+```
+
+### Data Alignment
+- Column labels (years, quarters): **right-aligned**
+- Row labels (line items): **left-aligned**
+- Submetrics: **indented** (add 2-3 spaces prefix)
+
+```python
+# Parent line item
+ws['B12'] = 'Revenue'
+ws['B12'].font = Font(name=FONT_NAME, bold=HEADER_BOLD)
+
+# Sub line item (indented)
+ws['B13'] = '   Product Revenue'
+ws['B14'] = '   Service Revenue'
+```
+
+### Totals Formatting
+```python
+# Uses design tokens — see engines/design.md §6.3
+total_border = Border(top=Side(style='thin', color=PRIMARY))
+for col in range(3, 9):  # C through H
+    cell = ws.cell(row=total_row, column=col)
+    cell.font = Font(name=FONT_NAME, bold=HEADER_BOLD)
+    cell.border = total_border
+```
+
+### Grid Lines
+```python
+ws.sheet_view.showGridLines = False  # Standard — defined in design.md §7.3
+```
+
+---
+
+## Balance Check Pattern
+
+For any financial model with a balance sheet:
+
+```python
+# Balance check row (should always be 0)
+check_row = bs_end + 2
+ws.cell(row=check_row, column=2, value='Balance Check')
+for col in range(3, last_col + 1):
+    letter = get_column_letter(col)
+    ws.cell(row=check_row, column=col).value = \
+        f'={letter}{assets_total_row}-{letter}{liab_total_row}-{letter}{equity_total_row}'
+    # Conditional: red if not zero
+    ws.conditional_formatting.add(
+        f'{letter}{check_row}',
+        CellIsRule(operator='notEqual', formula=['0'],
+                   font=Font(color='FF0000', bold=True))
+    )
+```
+
+---
+
+## Sensitivity / Scenario Tables
+
+```python
+# Two-way data table: vary growth rate (rows) × discount rate (cols)
+# Row headers: growth rates
+growth_rates = [0.02, 0.03, 0.04, 0.05, 0.06]
+# Col headers: discount rates
+discount_rates = [0.08, 0.09, 0.10, 0.11, 0.12]
+
+# Write headers
+for i, g in enumerate(growth_rates):
+    ws.cell(row=start_row + i + 1, column=start_col, value=g)
+    ws.cell(row=start_row + i + 1, column=start_col).number_format = '0.0%'
+    ws.cell(row=start_row + i + 1, column=start_col).font = Font(color='0000FF')
+
+for j, d in enumerate(discount_rates):
+    ws.cell(row=start_row, column=start_col + j + 1, value=d)
+    ws.cell(row=start_row, column=start_col + j + 1).number_format = '0.0%'
+    ws.cell(row=start_row, column=start_col + j + 1).font = Font(color='0000FF')
+
+# Fill formulas for each combination
+# Yellow background for the cell matching base case assumptions
+```
+
+---
+
+## Projection Period Patterns
+
+```python
+# Historical + Projected columns
+years = ['FY2022', 'FY2023', 'FY2024', 'FY2025E', 'FY2026E', 'FY2027E']
+
+for i, year in enumerate(years):
+    col = start_col + i
+    cell = ws.cell(row=header_row, column=col, value=year)
+    cell.font = Font(name=FONT_NAME, bold=HEADER_BOLD)
+    cell.alignment = Alignment(horizontal='center')
+
+    # Visual separator between historical and projected
+    if year.endswith('E') and not years[i-1].endswith('E'):
+        # Add left border to mark transition
+        for row in range(header_row, last_row + 1):
+            ws.cell(row=row, column=col).border = Border(
+                left=Side(style='medium', color=PRIMARY))
+```
+
+---
+
+## Additional Model Templates
+
+### Template: P&L (Profit & Loss) Statement
+
+```
+Sheet: "P&L"
+  Row 1: Company Name + Period
+  Row 3: Headers (Month/Quarter columns)
+  
+  Revenue Section:
+    Product Revenue     =Assumptions!B5 * (1+Assumptions!C5)
+    Service Revenue     =Assumptions!B6 * (1+Assumptions!C6)
+    Total Revenue       =SUM(above)
+  
+  COGS Section:
+    Direct Costs        =Total_Revenue * Assumptions!gross_margin
+    Gross Profit        =Total_Revenue - Direct_Costs
+    Gross Margin %      =IFERROR(Gross_Profit/Total_Revenue, 0)
+  
+  OpEx Section:
+    S&M, R&D, G&A       (each from Assumptions)
+    Total OpEx          =SUM(S&M:G&A)
+    EBITDA              =Gross_Profit - Total_OpEx
+    EBITDA Margin %     =IFERROR(EBITDA/Total_Revenue, 0)
+  
+  Below the Line:
+    D&A, Interest, Tax
+    Net Income          =EBITDA - D&A - Interest - Tax
+```
+
+### Template: Budget vs Actual
+
+```
+Sheet: "Budget vs Actual"
+  Columns: Category | Budget | Actual | Variance | Var %
+  
+  Key formulas:
+    Variance     = =Actual - Budget
+    Var %        = =IFERROR(Variance/Budget, 0)
+  
+  Conditional formatting:
+    Var % > 0    → Green font (favorable)
+    Var % < -10% → Red font + red fill (unfavorable)
+    Var % -10~0  → Orange font (watch)
+  
+  Summary section:
+    Total Budget    =SUM(Budget range)
+    Total Actual    =SUM(Actual range)
+    Overall Var %   =IFERROR((Total_Actual-Total_Budget)/Total_Budget, 0)
+```
+
+### Template: SaaS Metrics Dashboard
+
+```
+Sheet: "SaaS Metrics"
+  KPIs (each with formula, not hardcoded):
+    MRR              =SUMPRODUCT(Users * ARPU)
+    ARR              =MRR * 12
+    Net Revenue Retention = =IFERROR((Starting_MRR + Expansion - Contraction - Churn) / Starting_MRR, 0)
+    CAC              =IFERROR(Total_S&M / New_Customers, 0)
+    LTV              =IFERROR(ARPU * Gross_Margin / Monthly_Churn_Rate, 0)
+    LTV:CAC Ratio    =IFERROR(LTV / CAC, 0)
+    Payback Months   =IFERROR(CAC / (ARPU * Gross_Margin), 0)
+    
+  Chart: MRR waterfall (starting → new → expansion → contraction → churn → ending)
+  Chart: LTV:CAC trend line
+```
+
+### Template: Project Budget Tracker
+
+```
+Sheet: "Project Budget"
+  Columns: Phase | Task | Planned Cost | Actual Cost | Remaining | % Spent | Status
+  
+  Key formulas:
+    Remaining   = =Planned - Actual
+    % Spent     = =IFERROR(Actual/Planned, 0)
+    Status      = =IF(% Spent>1, "Over Budget", IF(% Spent>0.9, "At Risk", "On Track"))
+    
+  Phase subtotals with SUBTOTAL function
+  Grand total row with project-level health indicator
+```
--- a/skills/xlsx/scenes/finance_lite.md
+++ b/skills/xlsx/scenes/finance_lite.md
@@ -0,0 +1,192 @@
+# Finance Lite — Simple Budget & Expense Guide
+
+Load this reference for: simple budgets, expense reports, fee tracking, cost summaries, revenue/expense comparison, personal finance, project cost tracking — any financial table that does **NOT** need DCF, LBO, three-statement linkage, sensitivity analysis, or IB-grade formatting.
+
+For complex financial models → use `scenes/finance.md` instead.
+
+Also load `engines/design.md` for styling (use **standard** design tokens, NOT IB overrides).
+
+---
+
+## When to Use finance_lite vs finance
+
+| Signal | finance_lite ✅ | finance.md ❌ |
+|--------|----------------|--------------|
+| 预算表 / budget | ✅ | |
+| 费用报表 / expense report | ✅ | |
+| 项目成本追踪 / project cost tracking | ✅ | |
+| 收支对比 / revenue vs cost | ✅ | |
+| 个人记账 / personal finance | ✅ | |
+| 简单 ROI 计算 / simple ROI calculation | ✅ | |
+| DCF / LBO / 估值模型 (valuation model) | | ✅ |
+| 三表联动 (P&L + BS + CF) | | ✅ |
+| 敏感性分析 / scenario table | | ✅ |
+| IB pitch book level formatting | | ✅ |
+
+---
+
+## Standard Sheet Structure
+
+```
+Sheet: "Budget" (or user-specified name)
+  Row 1: margin (whitespace)
+  Row 2: Title (merged, styled via setup_sheet())
+  Row 3: spacer
+  Row 4: Headers
+  Row 5+: Data rows
+  Last row: Totals (if applicable)
+```
+
+### Typical Column Patterns
+
+**Budget Table:**
+```
+Category (类别) | Budget Amount (预算金额) | Actual Amount (实际金额) | Variance (差异) | Variance Rate (差异率) | Notes (备注)
+```
+
+**Expense Report:**
+```
+Date (日期) | Category (类别) | Description (说明) | Amount (金额) | Claimant (报销人) | Status (状态)
+```
+
+**Revenue vs Cost:**
+```
+Month (月份) | Revenue (收入) | Cost (成本) | Gross Profit (毛利) | Gross Margin (毛利率)
+```
+
+**Project Cost:**
+```
+Phase (阶段) | Task (任务) | Budget (预算) | Used (已用) | Remaining (剩余) | Usage Rate (使用率) | Status (状态)
+```
+
+---
+
+## Formula Patterns
+
+```python
+# Variance
+cell.value = '=C{r}-B{r}'  # Actual - Budget
+
+# Variance percentage (safe division)
+cell.value = '=IFERROR((C{r}-B{r})/B{r},0)'
+
+# Running total
+cell.value = '=SUM(D$5:D{r})'
+
+# Gross margin
+cell.value = '=IFERROR((B{r}-C{r})/B{r},0)'
+
+# Status formula (simple threshold)
+cell.value = '=IF(F{r}>1,"Over Budget",IF(F{r}>0.9,"At Risk","On Track"))'
+
+# Subtotal
+cell.value = '=SUBTOTAL(9,D{start}:D{end})'
+
+# Grand total
+cell.value = '=SUM(D5:D{last_data_row})'
+```
+
+---
+
+## Number Formats
+
+Use standard formats from `templates/base.py`:
+
+```python
+from templates.base import FORMATS
+
+cell.number_format = FORMATS['currency_cny']  # ¥#,##0.00
+cell.number_format = FORMATS['percentage']     # 0.0%
+cell.number_format = FORMATS['integer']        # #,##0
+cell.number_format = FORMATS['date']           # YYYY-MM-DD
+```
+
+For budget-specific formatting (negatives in parentheses):
+```python
+BUDGET_FORMATS = {
+    'currency':    '¥#,##0.00;(¥#,##0.00);"-"',
+    'variance':    '#,##0.00;(#,##0.00);"-"',
+    'var_pct':     '0.0%;(0.0%);"-"',
+}
+```
+
+---
+
+## Styling
+
+Use **standard** design tokens (NOT IB overrides):
+
+```python
+from templates.base import (
+    setup_sheet, style_header_row, style_data_row, style_total_row,
+    FONT_NAME, HEADER_BOLD, PRIMARY, ACCENT_POSITIVE, ACCENT_NEGATIVE, ACCENT_WARNING,
+    font_body, font_header, fill_header,
+)
+
+# Setup
+setup_sheet(ws, title="2026年部门预算", last_col=7)
+
+# Headers at row 4
+style_header_row(ws, row_num=4, col_start=2, col_end=7)
+
+# Data rows
+for i, row_num in enumerate(range(5, last_row + 1)):
+    style_data_row(ws, row_num=row_num, col_start=2, col_end=7, row_index=i)
+
+# Totals
+style_total_row(ws, row_num=last_row + 1, col_start=2, col_end=7)
+```
+
+---
+
+## Conditional Formatting (Simple)
+
+```python
+from openpyxl.formatting.rule import CellIsRule
+from templates.base import CF_POSITIVE_FONT, CF_POSITIVE_FILL, CF_NEGATIVE_FONT, CF_NEGATIVE_FILL
+
+# Highlight positive variance (green)
+ws.conditional_formatting.add(
+    f'D5:D{last_row}',
+    CellIsRule(operator='greaterThan', formula=['0'],
+               font=CF_POSITIVE_FONT, fill=CF_POSITIVE_FILL)
+)
+
+# Highlight negative variance (red)
+ws.conditional_formatting.add(
+    f'D5:D{last_row}',
+    CellIsRule(operator='lessThan', formula=['0'],
+               font=CF_NEGATIVE_FONT, fill=CF_NEGATIVE_FILL)
+)
+```
+
+---
+
+## Quick Templates
+
+### Template: Monthly Budget
+
+```python
+headers = ["类别", "预算金额", "实际金额", "差异", "差异率", "状态"]
+# Variance = Actual - Budget
+# Var% = IFERROR((Actual-Budget)/Budget, 0)
+# Status = IF(Var%>0.1,"超支"(Over Budget),IF(Var%>0,"注意"(Watch),"正常"(Normal)))
+```
+
+### Template: Expense Report
+
+```python
+headers = ["日期", "类别", "说明", "金额", "报销人", "状态"]
+# Date format: YYYY-MM-DD
+# Amount: currency_cny
+# Status: dropdown validation ["待审批"(Pending),"已审批"(Approved),"已报销"(Reimbursed),"已拒绝"(Rejected)]
+```
+
+### Template: Project Cost Tracker
+
+```python
+headers = ["阶段", "任务", "预算", "已用", "剩余", "使用率", "状态"]
+# Remaining = Budget - Used
+# Usage% = IFERROR(Used/Budget, 0)
+# Status = IF(Usage%>1,"超支"(Over Budget),IF(Usage%>0.9,"预警"(Warning),"正常"(Normal)))
+```
--- a/skills/xlsx/scenes/vba.md
+++ b/skills/xlsx/scenes/vba.md
@@ -0,0 +1,298 @@
+# VBA — Macro Generation & Management Guide
+
+Load this reference when the task involves: creating Excel macros, writing VBA code, automating Excel workflows, adding buttons/forms, modifying existing macros, or any `.xlsm` deliverable that needs programmatic automation.
+
+Also load `engines/vba-templates.md` for ready-to-use code templates.
+
+---
+
+## Core Principles
+
+### 1. Safety First
+- **Never** generate VBA that deletes files, accesses filesystem outside the workbook, or sends data to external URLs without explicit user request
+- **Always** include error handling (`On Error GoTo`)
+- **Always** add `Application.ScreenUpdating` toggle for performance
+- Generated macros must be **read-audit-friendly**: clear naming, comments, structured layout
+
+### 2. openpyxl VBA Workflow
+openpyxl can read/preserve/inject VBA but **cannot execute** it. The workflow:
+
+```python
+# READ existing VBA
+from openpyxl import load_workbook
+wb = load_workbook('file.xlsm', keep_vba=True)
+# wb.vba_archive contains all VBA modules
+
+# CREATE new .xlsm with VBA
+from openpyxl import Workbook
+wb = Workbook()
+# ... build sheets ...
+# Inject VBA via vbaProject.bin (see Injection section)
+wb.save('output.xlsm')
+```
+
+### 3. File Format Rules
+| Need | Format | Extension |
+|------|--------|-----------|
+| Data only, no macros | OpenXML | `.xlsx` |
+| Contains VBA macros | Macro-Enabled | `.xlsm` |
+| Binary with macros | Binary | `.xlsb` |
+
+**Critical**: If user gives `.xlsx` but wants macros → output must be `.xlsm`. Always warn about format change.
+
+---
+
+## VBA Code Structure Standard
+
+Every generated VBA module must follow this structure:
+
+```vba
+Option Explicit
+
+' ============================================================
+' Module: [ModuleName]
+' Purpose: [One-line description]
+' Author: Z.ai
+' Date: [YYYY-MM-DD]
+' ============================================================
+
+' --- Constants ---
+Private Const MODULE_NAME As String = "[ModuleName]"
+
+' --- Main Entry Point ---
+Public Sub Main()
+    On Error GoTo ErrHandler
+    Application.ScreenUpdating = False
+    Application.Calculation = xlCalculationManual
+    
+    ' [Main logic here]
+    
+CleanUp:
+    Application.ScreenUpdating = True
+    Application.Calculation = xlCalculationAutomatic
+    Exit Sub
+    
+ErrHandler:
+    MsgBox "Error in " & MODULE_NAME & ": " & Err.Description, _
+           vbCritical, "Error"
+    Resume CleanUp
+End Sub
+```
+
+### Naming Conventions
+| Element | Convention | Example |
+|---------|-----------|---------|
+| Sub/Function | PascalCase | `GenerateMonthlyReport` |
+| Variable | camelCase | `lastRow`, `wsData` |
+| Constant | UPPER_SNAKE | `MAX_ROWS`, `REPORT_TITLE` |
+| Module | PascalCase | `ModReport`, `ModUtils` |
+| Worksheet variable | ws + Name | `wsData`, `wsSummary` |
+| Range variable | rng + Desc | `rngData`, `rngHeaders` |
+
+### Variable Declaration Rules
+```vba
+' Always use explicit types
+Dim lastRow As Long          ' Not Integer (row limit)
+Dim ws As Worksheet
+Dim rng As Range
+Dim cell As Range
+Dim i As Long
+Dim strValue As String
+Dim dblAmount As Double
+```
+
+---
+
+## Common Patterns
+
+### Find Last Row/Column (Robust)
+```vba
+' Last row with data in column A
+Dim lastRow As Long
+lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
+
+' Last column with data in row 1
+Dim lastCol As Long
+lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
+
+' Used range (less reliable but useful)
+Dim usedRows As Long
+usedRows = ws.UsedRange.Rows.Count
+```
+
+### Loop Through Data
+```vba
+' Row loop
+Dim i As Long
+For i = 2 To lastRow  ' Skip header
+    If ws.Cells(i, 1).Value <> "" Then
+        ' Process row
+    End If
+Next i
+
+' For Each (range)
+Dim cell As Range
+For Each cell In ws.Range("A2:A" & lastRow)
+    If Not IsEmpty(cell) Then
+        ' Process cell
+    End If
+Next cell
+```
+
+### Sheet Operations
+```vba
+' Reference sheet safely
+Dim ws As Worksheet
+On Error Resume Next
+Set ws = ThisWorkbook.Sheets("Data")
+On Error GoTo 0
+If ws Is Nothing Then
+    MsgBox "Sheet 'Data' not found!", vbExclamation
+    Exit Sub
+End If
+
+' Create sheet if not exists
+Dim wsNew As Worksheet
+Dim sheetExists As Boolean
+For Each wsNew In ThisWorkbook.Sheets
+    If wsNew.Name = "Summary" Then sheetExists = True
+Next wsNew
+If Not sheetExists Then
+    Set wsNew = ThisWorkbook.Sheets.Add(After:=ThisWorkbook.Sheets(ThisWorkbook.Sheets.Count))
+    wsNew.Name = "Summary"
+End If
+```
+
+### User Interaction
+```vba
+' Simple input
+Dim userInput As String
+userInput = InputBox("Enter report month (YYYY-MM):", "Month Selection")
+If userInput = "" Then Exit Sub
+
+' Confirmation
+If MsgBox("Generate report for " & userInput & "?", _
+          vbYesNo + vbQuestion, "Confirm") = vbNo Then Exit Sub
+
+' File picker
+Dim filePath As Variant
+filePath = Application.GetOpenFilename( _
+    FileFilter:="Excel Files (*.xlsx;*.xlsm),*.xlsx;*.xlsm", _
+    Title:="Select Source File")
+If filePath = False Then Exit Sub
+```
+
+---
+
+## VBA Injection via openpyxl
+
+### Method 1: Preserve Existing VBA
+```python
+# Open with VBA preserved
+wb = load_workbook('source.xlsm', keep_vba=True)
+# Edit data/formatting as usual
+wb.save('output.xlsm')  # VBA modules intact
+```
+
+### Method 2: Copy VBA from Template
+```python
+# Use a template .xlsm that already has the VBA you need
+import shutil
+shutil.copy('template_with_macros.xlsm', 'output.xlsm')
+wb = load_workbook('output.xlsm', keep_vba=True)
+# Modify data
+wb.save('output.xlsm')
+```
+
+### Method 3: Manual vbaProject.bin Injection
+```python
+# For advanced use: inject raw vbaProject.bin
+# 1. Create your VBA in Excel, save as .xlsm
+# 2. Extract vbaProject.bin from the .xlsm (it's a ZIP)
+# 3. Inject into new workbook
+
+import zipfile
+import shutil
+
+# Create the workbook first
+wb = Workbook()
+# ... add data ...
+wb.save('temp.xlsx')
+
+# Convert to .xlsm by injecting VBA
+shutil.copy('temp.xlsx', 'output.xlsm')
+with zipfile.ZipFile('output.xlsm', 'a') as zf:
+    zf.write('vbaProject.bin', 'xl/vbaProject.bin')
+    
+# Update [Content_Types].xml to register VBA
+# (This is fragile — Method 1 or 2 preferred)
+```
+
+**Recommendation**: Method 1 (preserve) or Method 2 (template) are robust. Method 3 is fragile and should be last resort.
+
+---
+
+## Security Checklist
+
+Before delivering any VBA-enabled file:
+
+- [ ] No filesystem access outside workbook (no `Kill`, `FileCopy`, `MkDir` unless requested)
+- [ ] No network calls (`XMLHTTP`, `WinHttpRequest`) unless requested
+- [ ] No shell execution (`Shell`, `WScript.Shell`) unless requested
+- [ ] No registry access (`CreateObject("WScript.Shell").RegWrite`)
+- [ ] No auto-execution (`Auto_Open`, `Workbook_Open`) unless explicitly requested
+- [ ] Error handling in every Sub/Function
+- [ ] `ScreenUpdating` restored in cleanup
+- [ ] All variables explicitly declared (`Option Explicit`)
+- [ ] Module purpose documented in header comment
+
+---
+
+## Performance Guidelines
+
+```vba
+' ALWAYS bracket bulk operations
+Application.ScreenUpdating = False
+Application.Calculation = xlCalculationManual
+Application.EnableEvents = False
+
+' [Bulk operations here]
+
+Application.EnableEvents = True
+Application.Calculation = xlCalculationAutomatic
+Application.ScreenUpdating = True
+```
+
+### Array-Based Processing (for large data)
+```vba
+' Read range into array — much faster than cell-by-cell
+Dim data As Variant
+data = ws.Range("A1:Z" & lastRow).Value  ' 2D array
+
+' Process in memory
+Dim i As Long
+For i = LBound(data, 1) To UBound(data, 1)
+    data(i, 3) = data(i, 1) * data(i, 2)  ' Column C = A * B
+Next i
+
+' Write back in one shot
+ws.Range("A1:Z" & lastRow).Value = data
+```
+
+---
+
+## Debugging Support
+
+When user reports VBA errors, include diagnostic code:
+
+```vba
+' Debug logging to Immediate Window
+Debug.Print "Processing row " & i & ": " & ws.Cells(i, 1).Value
+
+' Verbose error info
+ErrHandler:
+    Debug.Print "ERROR in " & MODULE_NAME
+    Debug.Print "  Number: " & Err.Number
+    Debug.Print "  Description: " & Err.Description
+    Debug.Print "  Source: " & Err.Source
+```