Initial commit

This commit is contained in:
Z User
2026-06-06 05:21:10 +00:00
Unverified
commit 6664758a6d
493 changed files with 135653 additions and 0 deletions

271
skills/xlsx/scenes/advanced.md Executable file
View File

@@ -0,0 +1,271 @@
# Scene: Advanced Operations
## When This Applies
Batch processing multiple files, handling very large datasets, data validation, conditional formatting, sheet protection, or other power-user features.
---
## Large File Handling (>100K rows)
### Read-Only Mode
```python
from openpyxl import load_workbook
# Memory-efficient reading — does NOT load entire file
wb = load_workbook('huge.xlsx', read_only=True)
ws = wb.active
for row in ws.iter_rows(min_row=2, values_only=True):
process(row) # Yields rows one at a time
wb.close() # MUST close read-only workbooks
```
### Write-Only Mode
```python
from openpyxl import Workbook
wb = Workbook(write_only=True)
ws = wb.create_sheet()
# Write rows sequentially — cannot random-access cells
for data_row in large_dataset:
ws.append(data_row)
wb.save('output.xlsx')
```
### Chunked Processing with pandas
```python
# Read in chunks
chunks = pd.read_excel('huge.xlsx', chunksize=10000)
# Note: chunksize only works with read_csv, not read_excel
# For Excel, read specific columns/rows
df = pd.read_excel('huge.xlsx',
usecols=['A', 'C', 'E'], # Only needed columns
nrows=50000, # Limit rows
dtype={'id': str} # Prevent type inference overhead
)
```
---
## Batch Processing Multiple Files
```python
import os
import glob
import pandas as pd
# Collect all Excel files
files = glob.glob('data/*.xlsx')
# Method 1: Concatenate into one DataFrame
all_data = []
for f in files:
df = pd.read_excel(f)
df['source_file'] = os.path.basename(f)
all_data.append(df)
combined = pd.concat(all_data, ignore_index=True)
combined.to_excel('combined.xlsx', index=False)
# Method 2: One sheet per file
wb = Workbook()
wb.remove(wb.active) # Remove default sheet
for f in files:
df = pd.read_excel(f)
ws = wb.create_sheet(title=os.path.splitext(os.path.basename(f))[0][:31])
for r in dataframe_to_rows(df, index=False, header=True):
ws.append(r)
wb.save('all_files.xlsx')
```
---
## Data Validation (Dropdown Lists)
```python
from openpyxl.worksheet.datavalidation import DataValidation
# Dropdown list
dv = DataValidation(
type="list",
formula1='"High,Medium,Low"',
allow_blank=True,
showErrorMessage=True,
errorTitle="Invalid",
error="Please select High, Medium, or Low"
)
ws.add_data_validation(dv)
dv.add('D5:D100') # Apply to range
# Number range validation
dv_num = DataValidation(
type="whole",
operator="between",
formula1=1,
formula2=100,
errorTitle="Out of range",
error="Enter a number between 1 and 100"
)
ws.add_data_validation(dv_num)
dv_num.add('E5:E100')
# Date validation
dv_date = DataValidation(
type="date",
operator="greaterThan",
formula1="2024-01-01"
)
ws.add_data_validation(dv_date)
dv_date.add('F5:F100')
```
---
## Conditional Formatting
For full conditional formatting rules, color usage, and code examples → see **`engines/design.md §8`**.
Quick reference for advanced-only patterns (FormulaRule for row-level highlighting):
```python
from openpyxl.formatting.rule import FormulaRule
from openpyxl.styles import PatternFill
# Formula-based: highlight entire row if status = "Overdue"
ws.conditional_formatting.add('B5:H100',
FormulaRule(formula=['$G5="Overdue"'],
fill=PatternFill('solid', fgColor='FFEBEE')))
# Note: Icon sets are NOT supported by openpyxl — use color fills instead
```
---
## Sheet Protection
```python
# Protect sheet (allow select + sort, prevent edits)
ws.protection.sheet = True
ws.protection.password = 'mypassword'
ws.protection.sort = True
ws.protection.autoFilter = True
# Unlock specific cells for user input
from openpyxl.styles import Protection
unlocked = Protection(locked=False)
for row in range(5, 101):
ws.cell(row=row, column=4).protection = unlocked # Column D is editable
# Protect workbook structure (prevent adding/deleting sheets)
wb.security.workbookPassword = 'structpass'
wb.security.lockStructure = True
```
---
## Named Ranges
```python
from openpyxl.workbook.defined_name import DefinedName
# Create named range
ref = f"'Data'!$B$5:$B$100"
defn = DefinedName('SalesData', attr_text=ref)
wb.defined_names.add(defn)
# Use in formulas
ws['H5'] = '=SUM(SalesData)'
```
---
## Auto-Filter & Sort
```python
# Apply auto-filter
ws.auto_filter.ref = 'B4:H100'
# Add filter criteria (for saved state — user can change in Excel)
ws.auto_filter.add_filter_column(0, ['Active', 'Pending'])
# Sort (openpyxl can set sort state, but actual reordering
# must be done in Python before writing)
df = df.sort_values(['Category', 'Revenue'], ascending=[True, False])
```
---
## Merged Cells
```python
# Merge cells
ws.merge_cells('B2:H2') # Title spanning full width
# Write to merged range (write to top-left cell)
ws['B2'] = 'Report Title'
# Check existing merges before editing
for merge_range in ws.merged_cells.ranges:
print(f"Merged: {merge_range}")
# Unmerge if needed
ws.unmerge_cells('B2:H2')
```
**Warning**: Never write to cells within a merged range except the top-left cell. This causes corruption.
---
## Performance Tips
| Technique | When | Impact |
|-----------|------|--------|
| `read_only=True` | Reading files >50K rows | ~10x less memory |
| `write_only=True` | Writing files >50K rows | ~5x faster |
| `usecols` parameter | Only need specific columns | Faster read |
| Avoid `ws.cell()` in tight loops | Use `ws.append()` instead | Faster write |
| Batch style application | Apply to ranges, not cell-by-cell | Faster formatting |
| `data_only=True` for analysis | Need values not formulas | Faster read |
---
## VBA Module Inspection
When working with `.xlsm` files, you can read and list VBA modules:
```python
from openpyxl import load_workbook
import zipfile
import os
def list_vba_modules(filepath):
"""List all VBA modules in an .xlsm file."""
if not filepath.endswith(('.xlsm', '.xlsb')):
return {"has_vba": False, "modules": []}
modules = []
try:
with zipfile.ZipFile(filepath, 'r') as zf:
vba_files = [f for f in zf.namelist() if f.startswith('xl/vbaProject')]
if not vba_files:
return {"has_vba": False, "modules": []}
# Read with keep_vba to access vba_archive
wb = load_workbook(filepath, keep_vba=True)
if wb.vba_archive:
for name in wb.vba_archive.namelist():
modules.append(name)
wb.close()
except Exception as e:
return {"has_vba": False, "error": str(e)}
return {"has_vba": True, "modules": modules}
```
Use this to inspect before editing — know what VBA exists before you touch the file.

View File

@@ -0,0 +1,234 @@
# Analyze Recipes — Code Patterns for Data Analysis
> Load this file ON DEMAND when you need specific code patterns. Do NOT load upfront.
---
## Load & Explore
```python
import pandas as pd
df = pd.read_excel('input.xlsx') # or read_csv, read_json
# Multi-sheet: pd.read_excel('input.xlsx', sheet_name=None) → dict
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"Dtypes:\n{df.dtypes}")
print(f"Nulls:\n{df.isnull().sum()}")
print(f"Duplicates: {df.duplicated().sum()}")
print(f"\nDescribe:\n{df.describe()}")
```
---
## Aggregation & Grouping
```python
summary = df.groupby('Category').agg(
total=('Revenue', 'sum'),
avg=('Revenue', 'mean'),
count=('Revenue', 'count'),
max_val=('Revenue', 'max')
).round(2)
pivot = df.pivot_table(
values='Amount', index='Category', columns='Quarter',
aggfunc='sum', margins=True
)
```
---
## Time Series
```python
df['date'] = pd.to_datetime(df['date'])
monthly = df.resample('M', on='date').agg({'revenue': 'sum', 'orders': 'count'})
monthly['growth'] = monthly['revenue'].pct_change()
monthly['rolling_3m'] = monthly['revenue'].rolling(3).mean()
```
---
## Comparison / Diff
```python
df1 = pd.read_excel('this_month.xlsx')
df2 = pd.read_excel('last_month.xlsx')
merged = df1.merge(df2, on='ID', suffixes=('_new', '_old'))
merged['change'] = merged['value_new'] - merged['value_old']
merged['change_pct'] = (merged['change'] / merged['value_old'] * 100).round(1)
```
---
## Statistical Analysis
```python
stats = df.describe().T
stats['median'] = df.median()
stats['skew'] = df.skew()
corr = df.select_dtypes(include='number').corr().round(3)
top_10 = df.nlargest(10, 'Revenue')
bottom_10 = df.nsmallest(10, 'Revenue')
```
---
## Data Cleaning
```python
df = df.drop_duplicates()
df['amount'] = df['amount'].fillna(0)
df['name'] = df['name'].fillna('Unknown')
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
# Remove outliers (IQR)
Q1, Q3 = df['value'].quantile([0.25, 0.75])
IQR = Q3 - Q1
df = df[(df['value'] >= Q1 - 1.5*IQR) & (df['value'] <= Q3 + 1.5*IQR)]
```
---
## Bridge Pattern: pandas → openpyxl
```python
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = Workbook()
ws = wb.active
ws.title = "Analysis"
for r_idx, row in enumerate(dataframe_to_rows(summary, index=True, header=True), 1):
for c_idx, value in enumerate(row, 1):
ws.cell(row=r_idx + 3, column=c_idx + 1, value=value)
```
---
## KPI Summary Card
```python
kpis = [
('Total Revenue', total_revenue, '$#,##0'),
('Avg Order Value', avg_order, '$#,##0.00'),
('Growth Rate', growth_rate, '0.0%'),
('Total Orders', total_orders, '#,##0'),
]
col = 2
for label, value, fmt in kpis:
ws.cell(row=3, column=col, value=label)
ws.cell(row=4, column=col, value=value)
ws.cell(row=4, column=col).number_format = fmt
col += 3
```
---
## Cross-Validation Review Sheet
```python
review_ws = wb.create_sheet("Review")
review_ws.sheet_properties.tabColor = "FFC000"
checks = [
["Check", "Expected", "Actual", "Status"],
["Total Revenue", "=SUM(Data!B2:B100)", "=Summary!B10", '=IF(B2=C2,"✓ PASS","✗ FAIL")'],
["Row Count", "=COUNTA(Data!A:A)-1", "=Summary!B3", '=IF(B3=C3,"✓ PASS","✗ FAIL")'],
]
for i, row in enumerate(checks, 1):
for j, val in enumerate(row, 1):
review_ws.cell(row=i, column=j, value=val)
```
---
## xlsx.py Pivot Workflow
```bash
python3 "$XLSX_SKILL_DIR/xlsx.py" inspect data.xlsx --pretty
python3 "$XLSX_SKILL_DIR/xlsx.py" pivot data.xlsx output.xlsx \
--source "Data!A1:F500" \
--rows "Product,Region" \
--values "Revenue:sum,Units:count" \
--location "Summary!A3" \
--style "finance" \
--chart "bar"
python3 "$XLSX_SKILL_DIR/xlsx.py" validate output.xlsx
```
### PivotTable Best Practices
- Source data: first row must have unique, non-blank headers
- No merged cells or blank rows in source range
- Place pivot on a dedicated sheet, position at A3 or B2
- Row axis: primary grouping; Column axis: ≤10 distinct values
- Values: numeric measures only
### PivotTable Troubleshooting
| Symptom | Remedy |
|---------|--------|
| "Field not found" | Check header spelling via `inspect` |
| PivotTable empty | Ensure `--source` covers all data rows |
| `validate` reports pivot errors | Critical — must fix |
| `validate` reports `pass_with_warnings` | Safe to deliver |
---
## Alternating Column Structure (Key-Value Pairs)
When odd columns contain identifiers and even columns contain corresponding values (e.g., O=PartNo, P=Qty, Q=PartNo, R=Qty, ...):
**Detection heuristic**:
- Odd columns have repeated values or category codes
- Even columns are numeric
- Headers alternate between descriptive and quantitative names
**Solution**: Use SUMIF across the combined key/value ranges:
```python
# Excel formula: =SUMIF(O2:W2, A2, P2:X2)
# SUMIF matches position-by-position across multi-column ranges
formula = f'=SUMIF(O{row}:W{row},A{row},P{row}:X{row})'
```
---
## FIFO Allocation Formula (Cumulative Deduction)
Scenario: Allocate limited inventory to order lines in sequence — each row gets what's left after previous rows consumed their share.
**Formula template** (row N):
```
=MAX(0, MIN(OrderQty_N,
TotalInventory_for_key - SUM_of_already_allocated_above))
```
**Example** (H column = allocated qty):
```python
# Row 2 (first row): allocate up to available inventory
f'=MIN(G2, SUMIFS(Sheet2!D:D, Sheet2!A:A, A2, Sheet2!B:B, D2))'
# Row 3+ (subsequent): subtract already-allocated from rows above
f'=MAX(0, MIN(G{r}, SUMIFS(Sheet2!D:D, Sheet2!A:A, A{r}, Sheet2!B:B, D{r})'
f' - SUMIFS(H$1:H{r-1}, A$1:A{r-1}, A{r}, D$1:D{r-1}, D{r})))'
```
**Key**: `SUMIFS(H$1:H{r-1}, ...)` creates a running total of already-allocated amounts, achieving row-by-row deduction.
⚠️ This is a self-referencing formula pattern — openpyxl cannot verify it. Must open in Excel to confirm calculation.
### Data Provenance Implementation
```python
src_ws = wb.create_sheet("Sources")
src_ws.sheet_properties.tabColor = PRIMARY
headers = ["Data Description", "Source Name", "Source URL", "Access Date"]
for col, h in enumerate(headers, 1):
cell = src_ws.cell(row=1, column=col, value=h)
cell.font = Font(name=FONT_NAME, bold=HEADER_BOLD, color="FFFFFF")
cell.fill = PatternFill(start_color=PRIMARY, end_color=PRIMARY, fill_type="solid")
```

95
skills/xlsx/scenes/analyze.md Executable file
View File

@@ -0,0 +1,95 @@
# Scene: Data Analysis → Excel Output
## When This Applies
User wants to analyze data (statistics, trends, comparisons, pivots, aggregation) and receive results as an Excel file — possibly with charts, summary tables, or dashboards.
This scene bridges **pandas analysis** with **openpyxl output**. The deliverable is always an .xlsx file.
## Workflow
```
1. LOAD → Read input data (CSV/XLSX/JSON/DB)
2. EXPLORE → Understand structure, quality, distributions
3. ANALYZE → Compute metrics, aggregations, statistical tests
4. DESIGN → Plan Excel output (sheets, charts, KPIs)
5. BUILD → Write analysis results to .xlsx with formatting
6. CHART → Add charts (Excel-native or embedded matplotlib)
7. QA → recalc → audit → scan → chart-verify
8. PIVOT → If needed, run xlsx.py pivot as final step
9. VALIDATE → validate → deliver
```
## Analysis Framework
### Phase A: Problem Framing
- What question is the user trying to answer?
- Who will consume this output? (executive summary vs. detailed analysis)
- What decisions will be made based on this data?
### Phase B: Data Quality Assessment
- Missing values: count, pattern (random vs. systematic)
- Outliers: statistical detection (IQR, z-score)
- Data types: numeric vs. categorical, date parsing
- Duplicates: exact and fuzzy
### Phase C: Exploratory Analysis
- Distributions: histograms, box plots for key variables
- Correlations: pairwise for numeric columns
- Segmentation: group-by analysis on categorical dimensions
- Time patterns: trends, seasonality if time-series data
### Phase D: Insight Extraction
- Rank findings by business impact, not statistical significance
- Each insight must be actionable — "so what?" test
- Cross-validate: check the same insight from a different angle
### Phase E: Cross-Validation
- Sanity check totals against known benchmarks
- Verify computed metrics with alternative formulas
- Document any assumptions or limitations in the output
**Industry-specific frameworks:**
- **Finance**: Variance analysis → trend decomposition → ratio analysis → peer comparison
- **Marketing**: Funnel analysis → cohort analysis → attribution → ROI calculation
- **Operations**: Throughput analysis → bottleneck identification → utilization rates → SLA compliance
---
## Multi-Sheet Report Layout
```
Sheet 1: "Dashboard" — KPI cards + summary chart
Sheet 2: "Detail" — Full analysis table with formatting
Sheet 3: "Charts" — Additional visualizations
Sheet 4: "Raw Data" — Original data for reference (tab color: gray)
```
### KPI Summary Card Pattern
Place 4-6 KPI metrics at the top of Dashboard sheet (row 3-4), each spaced 3 columns apart. Include label (small, gray) and value (large, bold, themed) with appropriate number format.
---
## PivotTable Decision
| Situation | Use |
|-----------|-----|
| Need interactive PivotTable in Excel | `"$XLSX_SKILL_DIR/xlsx.py" pivot` |
| Just need a summary table (static) | pandas `pivot_table` → openpyxl |
| Simple aggregation (1 dimension) | pandas `groupby` → openpyxl |
**Trigger phrases**: summarize, aggregate, group by, categorize, breakdown, distribution, tally, totals per, cross-tab, 汇总, 透视, 分类统计, 交叉分析
---
## Data Provenance
When analysis uses external data, create a **"Sources" sheet** (tab color: `PRIMARY`) with columns: Data Description | Source Name | Source URL | Access Date.
Skip when user provides all data directly.
---
## Code Recipes
For specific code patterns (aggregation, time series, comparison, cleaning, bridge pattern), load `scenes/analyze-recipes.md` on demand.

133
skills/xlsx/scenes/convert.md Executable file
View File

@@ -0,0 +1,133 @@
# Scene: Format Conversion
## When This Applies
User wants to convert between tabular file formats: CSV↔XLSX, JSON→XLSX, TSV→XLSX, PDF table→XLSX, or XLSX→CSV/JSON.
## Conversion Matrix
| From | To | Method |
|------|-----|--------|
| CSV/TSV → XLSX | pandas read → openpyxl write with formatting | Most common |
| JSON → XLSX | pandas json_normalize → openpyxl | Flatten nested structures |
| XLSX → CSV | pandas read_excel → to_csv | Simple export |
| XLSX → JSON | pandas read_excel → to_json | With orient parameter |
| PDF table → XLSX | pdfplumber/tabula extract → openpyxl | Needs table detection |
| Image table → XLSX | OCR → pandas → openpyxl | Last resort, error-prone |
## CSV/TSV → XLSX
```python
import pandas as pd
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
# Read with encoding detection
df = pd.read_csv('input.csv', encoding='utf-8')
# Common encodings: utf-8, gbk, gb2312, latin-1, shift_jis
# Handle messy CSVs
df = pd.read_csv('input.csv',
encoding='utf-8',
sep=',', # or '\t', ';', '|'
skiprows=2, # skip junk header rows
na_values=['N/A', '-', ''],
dtype=str, # read everything as string first, convert later
on_bad_lines='skip' # skip malformed rows
)
# Convert types after reading
df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
df['date'] = pd.to_datetime(df['date'], errors='coerce')
# Write to Excel with formatting
wb = Workbook()
ws = wb.active
# Write data starting at B4 (with theme formatting)
for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True), 4):
for c_idx, value in enumerate(row, 2):
ws.cell(row=r_idx, column=c_idx, value=value)
# Apply design tokens from engines/design.md
# ...
wb.save('output.xlsx')
```
## JSON → XLSX
```python
import pandas as pd
import json
# Flat JSON
df = pd.read_json('input.json')
# Nested JSON — flatten
with open('input.json') as f:
data = json.load(f)
# If it's a list of objects
df = pd.json_normalize(data, max_level=2)
# If nested with specific record path
df = pd.json_normalize(data, record_path='items', meta=['id', 'name'])
# Write to Excel...
```
## XLSX → CSV/JSON
```python
# To CSV
df = pd.read_excel('input.xlsx', sheet_name='Data')
df.to_csv('output.csv', index=False, encoding='utf-8-sig') # utf-8-sig for Excel compatibility
# To JSON
df.to_json('output.json', orient='records', force_ascii=False, indent=2)
# Multiple sheets → multiple CSVs
sheets = pd.read_excel('input.xlsx', sheet_name=None)
for name, df in sheets.items():
df.to_csv(f'output_{name}.csv', index=False, encoding='utf-8-sig')
```
## PDF Table → XLSX
```python
# Method 1: pdfplumber (preferred for most PDFs)
import pdfplumber
tables = []
with pdfplumber.open('input.pdf') as pdf:
for page in pdf.pages:
page_tables = page.extract_tables()
for table in page_tables:
tables.extend(table)
# Clean and convert to DataFrame
df = pd.DataFrame(tables[1:], columns=tables[0])
# Method 2: tabula-py (Java-based, good for complex tables)
# import tabula
# dfs = tabula.read_pdf('input.pdf', pages='all', multiple_tables=True)
```
## Encoding Gotchas
| Scenario | Encoding | Tip |
|----------|----------|-----|
| Chinese data from Windows | `gbk` or `gb2312` | Try gbk first |
| Japanese data | `shift_jis` or `cp932` | |
| European data | `latin-1` or `cp1252` | |
| Excel-generated CSV | `utf-8-sig` (has BOM) | pandas handles automatically |
| Output CSV for Excel | Write with `utf-8-sig` | Prevents garbled Chinese in Excel |
## Quality Checks After Conversion
- [ ] Row count matches source
- [ ] No garbled characters (encoding correct)
- [ ] Numeric columns are numbers, not strings
- [ ] Dates are date objects, not text
- [ ] No blank rows/columns from source artifacts
- [ ] Headers are in the correct row

105
skills/xlsx/scenes/create.md Executable file
View File

@@ -0,0 +1,105 @@
# Scene: Create New Spreadsheet
## When This Applies
User wants to create a new Excel file from scratch — a table, template, schedule, report, or any structured data output.
For financial models, also load `scenes/finance.md`.
## Workflow
```
1. PLAN → Identify all sheets, their structure, formulas, cross-references
2. STYLE → Load engines/design.md, apply default palette
3. BUILD → Create workbook, write data/formulas/formatting per sheet
4. QA → recalc → audit → scan → chart-verify (if charts)
5. PIVOT → If needed, run pivot command LAST
6. VALIDATE → validate → exit 0 = deliver
```
## Layout & Styling
All layout rules (Canvas Origin B2, column widths, row heights, margins) and styling (title/header/data/totals) are defined in **`engines/design.md`** — the single source of truth. Do not duplicate here.
Quick reference for sheet structure:
```
Row 1: [top margin]
Row 2: Title (B2)
Row 3: [spacer]
Row 4: Column headers
Row 5+: Data rows
Last+1: Totals row
Last+3: Notes/sources
```
## Multi-Sheet Workbooks
### Cross-Sheet References
```python
# Reference another sheet
sheet['C5'] = "=Data!B10"
# Sheet names with spaces need quotes
sheet['C5'] = "='Sales Data'!B10"
# Green font for cross-sheet links (Finance theme)
sheet['C5'].font = Font(color="008000")
```
### Common Multi-Sheet Patterns
- **Data + Summary**: Raw data on Sheet1, formulas/charts on Summary
- **Monthly tabs**: Jan, Feb, Mar... + Annual Summary
- **Input + Output**: Assumptions sheet + Calculations sheet + Dashboard
## Template Patterns
### Simple Data Table
```python
wb = Workbook()
ws = wb.active
ws.title = "Data"
# Title + Headers + Data + Totals styling → see engines/design.md §11 Code Templates
# Only show formula logic here:
# Headers at B4
headers = ['Product', 'Q1', 'Q2', 'Q3', 'Q4', 'Total']
for col, h in enumerate(headers, 2):
cell = ws.cell(row=4, column=col, value=h)
# Data rows starting at row 5
# ...
# Totals row
total_row = last_data_row + 1
ws.cell(row=total_row, column=2, value='Total')
for col in range(3, 7): # Q1-Q4
letter = get_column_letter(col)
ws.cell(row=total_row, column=col).value = f'=SUM({letter}5:{letter}{last_data_row})'
# Grand total
ws.cell(row=total_row, column=7).value = f'=SUM(C{total_row}:F{total_row})'
```
### Schedule / Calendar
- Use merged cells for day headers
- Conditional formatting for weekends (light gray fill)
- Freeze panes: `ws.freeze_panes = 'C5'` (freeze header + left labels)
### Checklist / Tracker
- Checkbox column using data validation (`TRUE`/`FALSE`)
- Status column with conditional formatting (green/amber/red)
- Progress bar using data bar conditional formatting
## Freeze Panes & Print
```python
# Freeze headers (row 4) and label column (col B)
ws.freeze_panes = 'C5' # Rows 1-4 and cols A-B stay visible
# Print setup
ws.page_setup.orientation = 'landscape'
ws.page_setup.fitToWidth = 1
ws.page_setup.fitToHeight = 0
ws.print_area = 'B2:H50'
ws.print_title_rows = '4:4' # Repeat header on each page
```

View File

@@ -0,0 +1,222 @@
# Edit Patterns — Reusable Code for Complex Edit Operations
> Load this file ON DEMAND when you encounter grouping, sorting, block detection, or other complex edit patterns.
> Do NOT load upfront for simple edits.
---
## Pattern: Block Detection
Data is often split into independent blocks separated by blank rows or keyword rows (e.g., TOTAL, Subtotal).
```python
def detect_blocks(ws, col=1, start_row=1, end_row=None,
separator='blank', keyword='TOTAL'):
"""
Detect data block boundaries.
separator: 'blank' (empty row) or 'keyword' (row containing keyword)
Returns: list of (start_row, end_row) tuples
"""
if end_row is None:
end_row = ws.max_row
blocks, block_start = [], None
for row in range(start_row, end_row + 1):
val = ws.cell(row=row, column=col).value
is_blank = val is None or (isinstance(val, str) and val.strip() == '')
is_kw = (separator == 'keyword' and
isinstance(val, str) and keyword in str(val).upper())
if separator == 'blank':
if not is_blank and block_start is None:
block_start = row
elif is_blank and block_start is not None:
blocks.append((block_start, row - 1))
block_start = None
elif separator == 'keyword':
if is_kw:
if block_start:
blocks.append((block_start, row))
block_start = None
elif not is_blank and block_start is None:
block_start = row
if block_start:
blocks.append((block_start, end_row))
return blocks
```
---
## Pattern: Pre-filter Null Rows
Before any groupby/aggregation, filter out rows where key columns are empty.
```python
def pre_filter_rows(ws, key_cols, start_row, end_row):
"""Return row numbers where ALL key columns are non-null."""
return [row for row in range(start_row, end_row + 1)
if all(normalize_cell_value(ws.cell(row=row, column=c).value) is not None
for c in key_cols)]
```
---
## Pattern: Sort with Formula Rewrite
When sorting rows by swapping data (not using `insert_rows`), formulas must be regenerated with new row numbers.
```python
def sort_block_with_formulas(ws, block_rows, sort_col, formula_templates,
descending=True):
"""
Sort rows within a block, regenerating formulas.
formula_templates: dict {col_index: '=B{row}+C{row}'}
"""
# 1. Read all row data + compute sort key
rows_data = []
for r in block_rows:
vals = {c: ws.cell(row=r, column=c).value for c in range(1, ws.max_column + 1)}
rows_data.append(vals)
rows_data.sort(key=lambda x: (x.get(sort_col) or 0), reverse=descending)
# 2. Write back with new row numbers
for i, rd in enumerate(rows_data):
target = block_rows[i]
for col, val in rd.items():
if col in formula_templates:
ws.cell(row=target, column=col).value = formula_templates[col].format(row=target)
else:
ws.cell(row=target, column=col).value = val
```
---
## Pattern: Group-Merge (Aggregate by Key)
Group rows by a key column. Take first-row values for some columns, sum for others.
```python
from collections import OrderedDict
def group_merge_rows(ws, key_col, start_row, end_row, first_cols, sum_cols):
"""
Group by key_col, merge rows.
first_cols: take value from first row in group
sum_cols: sum values across group
"""
groups = OrderedDict()
for row in range(start_row, end_row + 1):
key = normalize_cell_value(ws.cell(row=row, column=key_col).value)
if key is None:
continue
if key not in groups:
groups[key] = {
'first': {c: ws.cell(row=row, column=c).value for c in first_cols},
'sums': {c: 0.0 for c in sum_cols},
}
for c in sum_cols:
v = normalize_cell_value(ws.cell(row=row, column=c).value)
if v is not None:
try:
groups[key]['sums'][c] += float(v)
except (ValueError, TypeError):
pass
return groups
```
---
## Pattern: Group-Max-Keep-Ties
Group by key, find max value per group, keep ALL rows that match the max (not just the first).
```python
from collections import defaultdict
def group_max_keep_ties(rows, key_func, value_func, filter_null=True):
"""
Keep all rows with the maximum value per group (ties preserved).
rows: list of row dicts or tuples
key_func: row → group key
value_func: row → comparable value (e.g., date)
"""
groups = defaultdict(list)
for row in rows:
val = value_func(row)
if filter_null and val is None:
continue
groups[key_func(row)].append(row)
kept = []
for key, group in groups.items():
max_val = max(value_func(r) for r in group)
kept.extend(r for r in group if value_func(r) == max_val)
return kept
```
---
## Pattern: Sequence Fill (Smart Numbering)
Fill blank rows with "parent number + letter suffix" (e.g., 5 → 5a, 5b, ..., 5z, 5aa).
```python
import re
def get_letter_suffix(n):
"""0=a, 25=z, 26=aa, 27=ab..."""
if n < 26:
return chr(ord('a') + n)
return chr(ord('a') + (n // 26) - 1) + chr(ord('a') + (n % 26))
def fill_sequential_labels(ws, col, start_row, end_row):
last_base, blank_count = None, 0
for row in range(start_row, end_row + 1):
val = ws.cell(row=row, column=col).value
if val is not None:
m = re.match(r'^(\d+)', str(val))
if m:
last_base = m.group(1)
blank_count = 0
else:
if last_base is not None:
ws.cell(row=row, column=col).value = f"{last_base}{get_letter_suffix(blank_count)}"
blank_count += 1
```
---
## Pattern: Zero-as-Blank Output
When merged/aggregated values of 0 should display as empty:
```python
# Method 1: Write None (best for programmatic verification)
cell.value = computed_value if computed_value != 0 else None
# Method 2: Number format (best for Excel viewing)
cell.value = computed_value
cell.number_format = '0.00;-0.00;""' # positive;negative;zero(blank)
```
---
## Pattern: Side-by-Side Table Detection
Some sheets contain multiple independent tables arranged horizontally (separated by empty columns).
```python
def detect_side_by_side_tables(ws):
"""Find column groups separated by all-null columns."""
tables = []
current_start = None
for col in range(1, ws.max_column + 1):
has_data = any(ws.cell(row=r, column=col).value is not None
for r in range(1, ws.max_row + 1))
if has_data and current_start is None:
current_start = col
elif not has_data and current_start is not None:
tables.append((current_start, col - 1))
current_start = None
if current_start:
tables.append((current_start, ws.max_column))
return tables # [(start_col, end_col), ...]
```

195
skills/xlsx/scenes/edit.md Executable file
View File

@@ -0,0 +1,195 @@
# Scene: Edit Existing Spreadsheet
## When This Applies
User provides an existing .xlsx/.xlsm file and wants to modify it — fill data, fix formulas, beautify layout, add sheets, restructure.
## Core Principle: Preserve First
**Study the existing file before making ANY changes.** The original format, style, and conventions take absolute priority over default guidelines.
### VBA Preservation Rule
When opening `.xlsm` files, **always** use `keep_vba=True`:
```python
wb = load_workbook('file.xlsm', keep_vba=True)
# Edit data/formatting as usual
wb.save('output.xlsm') # VBA modules preserved
```
**Never** save a `.xlsm` as `.xlsx` unless the user explicitly requests macro removal. This silently destroys all VBA code.
## Workflow
```
1. INSPECT → Read the file, understand structure
2. PLAN → Identify what to change vs what to preserve
3. BACKUP → If destructive changes, suggest user keeps original
4. MODIFY → Make targeted changes
5. QA → recalc → audit → scan
6. VALIDATE → validate → deliver
```
## Step 1: Inspect the File
### 1a. Structure Survey
```python
from openpyxl import load_workbook
# Read with formulas preserved
wb = load_workbook('input.xlsx')
# Survey structure
for name in wb.sheetnames:
ws = wb[name]
print(f"Sheet: {name}, Dimensions: {ws.dimensions}, "
f"Rows: {ws.max_row}, Cols: {ws.max_column}")
# Check for existing styles
sample = ws['B4']
print(f"Font: {sample.font.name}, Size: {sample.font.size}, "
f"Bold: {sample.font.bold}, Fill: {sample.fill.fgColor}")
```
Also run `python3 "$XLSX_SKILL_DIR/xlsx.py" inspect input.xlsx --pretty` for structured overview.
### 1b. Semantic Data Sampling (MANDATORY for merge/copy/aggregate operations)
**Don't just print headers — print actual data rows to understand column semantics:**
```python
# Sample first 5 data rows from each sheet
for name in wb.sheetnames:
ws = wb[name]
print(f"\n=== {name} ===")
for row in range(1, min(6, ws.max_row + 1)):
vals = []
for col in range(1, ws.max_column + 1):
v = ws.cell(row=row, column=col).value
if v is not None:
vals.append(f"{get_column_letter(col)}={v}")
if vals:
print(f" Row {row}: {vals}")
```
### 1c. Cross-Sheet Column Semantic Mapping (MANDATORY before any merge/copy)
**⚠️ NEVER copy columns by position index alone when merging sheets.**
When two sheets have similar headers (e.g., both have columns A-V), the same column position may hold completely different data. Always:
1. Print sample data (not just headers) from both source and target sheets
2. For each column, identify the data type and value domain
3. Create an explicit column mapping dict before writing any data
```python
# Example: source sheet E column = amount, target sheet E column = type code
# → Do NOT copy source.E → target.E. Build semantic mapping first.
column_mapping = {
'src_I': 'dst_E', # amount → amount (different positions!)
'src_E': 'dst_I', # type → type
}
```
### 1d. Cell Value Normalization
Canonical implementation lives in **`templates/base.py → normalize_cell_value()`**.
Referenced by `edit-patterns.md` and `quality/pipeline.md`.
```python
from base import normalize_cell_value
# normalize_cell_value(value) → None for blank/NBSP/ZWSP, otherwise original value
```
**Always use this when checking for empty cells**`\xa0` (NBSP) looks blank but fails `is None`.
## Step 2: Match Existing Styles
When adding new cells/rows to a styled file, use **`copy_style()` from `templates/base.py`**:
```python
from base import copy_style
# copy_style(source_cell, target_cell)
# → copies font, fill, border, alignment, number_format
```
## Common Edit Operations
### Fill / Complete Data
```python
# Add data to empty cells while preserving existing formatting
for row in range(start, end + 1):
cell = ws.cell(row=row, column=col)
if cell.value is None:
cell.value = new_value
# Copy style from the cell above
copy_style(ws.cell(row=row-1, column=col), cell)
```
### Insert Rows / Columns
```python
# Insert 3 rows at position 10
ws.insert_rows(10, amount=3)
# Note: formulas referencing rows below 10 will auto-adjust
# Insert column at position D
ws.insert_cols(4)
```
**Warning**: Inserting/deleting rows can break chart references and named ranges. Verify after insertion.
### Restructure Data
```python
# Move data from one layout to another
# Read all data first, then rewrite
data = []
for row in ws.iter_rows(min_row=2, values_only=True):
data.append(row)
# Clear and rewrite in new structure
# ...
```
### Fix Formulas
```python
# Find cells with errors (after recalc)
wb_data = load_workbook('input.xlsx', data_only=True)
ws_data = wb_data.active
wb_formula = load_workbook('input.xlsx')
ws_formula = wb_formula.active
for row in ws_data.iter_rows():
for cell in row:
if isinstance(cell.value, str) and cell.value.startswith('#'):
formula_cell = ws_formula[cell.coordinate]
print(f"Error at {cell.coordinate}: {cell.value}, Formula: {formula_cell.value}")
```
## Format Beautification
When the user asks to "make it look better" or "format nicely":
**Load `engines/design.md`** and apply its complete styling system (tokens, fonts, layout, colors).
**But**: if the file already has a consistent style, enhance it rather than replacing it. Add what's missing (alignment, column widths, alternating fills) without changing existing colors or fonts. Use `copy_style()` (above) to match adjacent cells.
## ⚠️ Dangerous Operations
| Operation | Risk | Mitigation |
|-----------|------|-----------|
| `load_workbook(data_only=True)` then save | Formulas permanently lost | Never save after data_only read |
| Delete rows/cols with formula dependencies | #REF! errors | Run audit after deletion |
| Modify pivot table output with openpyxl | Corrupt pivotCache | Never — regenerate via xlsx.py pivot |
| Overwrite merged cells | Layout breaks | Check `ws.merged_cells.ranges` first |
| Manual row sort (swap row data) | Formulas still reference old row numbers | **Regenerate formula strings with target row number** (see Common Patterns → Sort with Formula Rewrite) |
| Write SUM formula → verify with data_only | Get `None` — formula not evaluated | Compute value in Python for verification; write computed value or use recalc |
---
## Common Patterns
For complex edit operations (grouping, sorting, block detection, merging, sequence fill, etc.):
**Load `scenes/edit-patterns.md`** on demand.
Available patterns: Block Detection, Pre-filter Null, Sort with Formula Rewrite, Group-Merge, Group-Max-Keep-Ties, Sequence Fill, Zero-as-Blank, Side-by-Side Table Detection.

318
skills/xlsx/scenes/finance.md Executable file
View File

@@ -0,0 +1,318 @@
# Financial Model Specialist Guide
Load this reference when the task involves: financial statements, budgets, forecasts, DCF models, LBO, valuation, P&L, balance sheets, cash flow, or any investment banking deliverable.
Also load `engines/design.md` → use **Finance** scene overrides (IB text color rules, section dividers).
---
## Financial Model Architecture
### Standard Sheet Structure
```
Assumptions Sheet:
- All inputs, growth rates, margins, multiples
- Blue font for every changeable number
- Yellow background for key assumptions
- Source citations in adjacent cells or comments
Income Statement / P&L:
- Revenue → COGS → Gross Profit → OpEx → EBIT → Interest → Tax → Net Income
- All values are formulas referencing Assumptions
Balance Sheet:
- Assets = Liabilities + Equity (must balance!)
- Include balance check row: =Assets-Liabilities-Equity (should be 0)
Cash Flow Statement:
- Operating → Investing → Financing → Net Change
- Ending Cash = Beginning Cash + Net Change
Valuation / Output:
- DCF, comparables, or whatever model the user needs
- Green font for values pulled from other sheets
```
### Formula Construction Rules
```python
# ✅ CORRECT: Reference assumptions
sheet['C10'] = '=C9*(1+Assumptions!$B$5)' # Growth rate from assumptions
# ❌ WRONG: Hardcoded magic number
sheet['C10'] = '=C9*1.05'
# ✅ CORRECT: Protected division
sheet['D15'] = '=IF(C15=0,"-",B15/C15)'
# ✅ CORRECT: Consistent formula across periods
# If D10 = '=D9*(1+Assumptions!$B$5)' then E10 must follow the same pattern
```
### Assumptions Sheet Layout
```
B4: "Key Assumptions" (section header, bold)
B6: "Revenue Growth Rate" C6: 0.05 (blue font, yellow bg)
B7: "Gross Margin" C7: 0.65 (blue font, yellow bg)
B8: "OpEx as % Revenue" C8: 0.30 (blue font, yellow bg)
B9: "Tax Rate" C9: 0.21 (blue font, yellow bg)
B10: "Discount Rate (WACC)" C10: 0.10 (blue font, yellow bg)
B11: "Terminal Growth Rate" C11: 0.02 (blue font, yellow bg)
```
### Source Documentation for Hardcodes
Every hardcoded input MUST have a source citation:
```python
# In cell comment
ws['C6'].comment = Comment(
"Source: Company 10-K, FY2024, Page 45, Revenue Growth",
"Z.ai"
)
# Or in adjacent cell (if end of table)
ws['D6'] = "Source: Management guidance, Q3 2024 earnings call"
ws['D6'].font = Font(size=8, italic=True, color="808080")
```
---
## Number Formatting (CRITICAL)
> Finance-specific formats below. For general number formats, see `engines/design.md §10`.
> Finance formats take priority when both apply.
```python
FINANCE_FORMATS = {
# Currency — zeros as dash, negatives in parentheses
'currency': '$#,##0;($#,##0);"-"',
'currency_k': '$#,##0,"K";($#,##0,"K");"-"',
'currency_mm': '$#,##0.0,,"M";($#,##0.0,,"M");"-"',
# Percentages — one decimal
'pct': '0.0%;(0.0%);"-"',
# Multiples — for EV/EBITDA, P/E etc.
'multiple': '0.0"x";(0.0"x");"-"',
# Years — MUST be text, not number (avoids "2,024")
'year': '@',
# Integer with thousands separator
'integer': '#,##0;(#,##0);"-"',
# Two decimal places
'decimal': '#,##0.00;(#,##0.00);"-"',
# Shares (millions)
'shares': '#,##0.0,,"M"',
}
# Apply
cell.number_format = FINANCE_FORMATS['currency_mm']
```
**Always specify units in column headers**: "Revenue ($mm)", "Shares (M)", "Growth (%)"
---
## IB Model Layout Rules
> All colors below use **design tokens from `engines/design.md`**. Do not hardcode hex values.
> Finance-specific overrides (IB text color rules, section dividers) are in `design.md §2.4`.
### Section Headers
```python
# Dark background, white bold text, merged across data width
# Uses PRIMARY from design.md (or Finance palette PRIMARY from design.md)
ws.merge_cells('B10:H10')
ws['B10'] = 'Income Statement'
ws['B10'].fill = PatternFill('solid', fgColor=PRIMARY)
ws['B10'].font = Font(name=FONT_NAME, size=12, bold=HEADER_BOLD, color='FFFFFF')
```
### Data Alignment
- Column labels (years, quarters): **right-aligned**
- Row labels (line items): **left-aligned**
- Submetrics: **indented** (add 2-3 spaces prefix)
```python
# Parent line item
ws['B12'] = 'Revenue'
ws['B12'].font = Font(name=FONT_NAME, bold=HEADER_BOLD)
# Sub line item (indented)
ws['B13'] = ' Product Revenue'
ws['B14'] = ' Service Revenue'
```
### Totals Formatting
```python
# Uses design tokens — see engines/design.md §6.3
total_border = Border(top=Side(style='thin', color=PRIMARY))
for col in range(3, 9): # C through H
cell = ws.cell(row=total_row, column=col)
cell.font = Font(name=FONT_NAME, bold=HEADER_BOLD)
cell.border = total_border
```
### Grid Lines
```python
ws.sheet_view.showGridLines = False # Standard — defined in design.md §7.3
```
---
## Balance Check Pattern
For any financial model with a balance sheet:
```python
# Balance check row (should always be 0)
check_row = bs_end + 2
ws.cell(row=check_row, column=2, value='Balance Check')
for col in range(3, last_col + 1):
letter = get_column_letter(col)
ws.cell(row=check_row, column=col).value = \
f'={letter}{assets_total_row}-{letter}{liab_total_row}-{letter}{equity_total_row}'
# Conditional: red if not zero
ws.conditional_formatting.add(
f'{letter}{check_row}',
CellIsRule(operator='notEqual', formula=['0'],
font=Font(color='FF0000', bold=True))
)
```
---
## Sensitivity / Scenario Tables
```python
# Two-way data table: vary growth rate (rows) × discount rate (cols)
# Row headers: growth rates
growth_rates = [0.02, 0.03, 0.04, 0.05, 0.06]
# Col headers: discount rates
discount_rates = [0.08, 0.09, 0.10, 0.11, 0.12]
# Write headers
for i, g in enumerate(growth_rates):
ws.cell(row=start_row + i + 1, column=start_col, value=g)
ws.cell(row=start_row + i + 1, column=start_col).number_format = '0.0%'
ws.cell(row=start_row + i + 1, column=start_col).font = Font(color='0000FF')
for j, d in enumerate(discount_rates):
ws.cell(row=start_row, column=start_col + j + 1, value=d)
ws.cell(row=start_row, column=start_col + j + 1).number_format = '0.0%'
ws.cell(row=start_row, column=start_col + j + 1).font = Font(color='0000FF')
# Fill formulas for each combination
# Yellow background for the cell matching base case assumptions
```
---
## Projection Period Patterns
```python
# Historical + Projected columns
years = ['FY2022', 'FY2023', 'FY2024', 'FY2025E', 'FY2026E', 'FY2027E']
for i, year in enumerate(years):
col = start_col + i
cell = ws.cell(row=header_row, column=col, value=year)
cell.font = Font(name=FONT_NAME, bold=HEADER_BOLD)
cell.alignment = Alignment(horizontal='center')
# Visual separator between historical and projected
if year.endswith('E') and not years[i-1].endswith('E'):
# Add left border to mark transition
for row in range(header_row, last_row + 1):
ws.cell(row=row, column=col).border = Border(
left=Side(style='medium', color=PRIMARY))
```
---
## Additional Model Templates
### Template: P&L (Profit & Loss) Statement
```
Sheet: "P&L"
Row 1: Company Name + Period
Row 3: Headers (Month/Quarter columns)
Revenue Section:
Product Revenue =Assumptions!B5 * (1+Assumptions!C5)
Service Revenue =Assumptions!B6 * (1+Assumptions!C6)
Total Revenue =SUM(above)
COGS Section:
Direct Costs =Total_Revenue * Assumptions!gross_margin
Gross Profit =Total_Revenue - Direct_Costs
Gross Margin % =IFERROR(Gross_Profit/Total_Revenue, 0)
OpEx Section:
S&M, R&D, G&A (each from Assumptions)
Total OpEx =SUM(S&M:G&A)
EBITDA =Gross_Profit - Total_OpEx
EBITDA Margin % =IFERROR(EBITDA/Total_Revenue, 0)
Below the Line:
D&A, Interest, Tax
Net Income =EBITDA - D&A - Interest - Tax
```
### Template: Budget vs Actual
```
Sheet: "Budget vs Actual"
Columns: Category | Budget | Actual | Variance | Var %
Key formulas:
Variance = =Actual - Budget
Var % = =IFERROR(Variance/Budget, 0)
Conditional formatting:
Var % > 0 → Green font (favorable)
Var % < -10% → Red font + red fill (unfavorable)
Var % -10~0 → Orange font (watch)
Summary section:
Total Budget =SUM(Budget range)
Total Actual =SUM(Actual range)
Overall Var % =IFERROR((Total_Actual-Total_Budget)/Total_Budget, 0)
```
### Template: SaaS Metrics Dashboard
```
Sheet: "SaaS Metrics"
KPIs (each with formula, not hardcoded):
MRR =SUMPRODUCT(Users * ARPU)
ARR =MRR * 12
Net Revenue Retention = =IFERROR((Starting_MRR + Expansion - Contraction - Churn) / Starting_MRR, 0)
CAC =IFERROR(Total_S&M / New_Customers, 0)
LTV =IFERROR(ARPU * Gross_Margin / Monthly_Churn_Rate, 0)
LTV:CAC Ratio =IFERROR(LTV / CAC, 0)
Payback Months =IFERROR(CAC / (ARPU * Gross_Margin), 0)
Chart: MRR waterfall (starting → new → expansion → contraction → churn → ending)
Chart: LTV:CAC trend line
```
### Template: Project Budget Tracker
```
Sheet: "Project Budget"
Columns: Phase | Task | Planned Cost | Actual Cost | Remaining | % Spent | Status
Key formulas:
Remaining = =Planned - Actual
% Spent = =IFERROR(Actual/Planned, 0)
Status = =IF(% Spent>1, "Over Budget", IF(% Spent>0.9, "At Risk", "On Track"))
Phase subtotals with SUBTOTAL function
Grand total row with project-level health indicator
```

View File

@@ -0,0 +1,192 @@
# Finance Lite — Simple Budget & Expense Guide
Load this reference for: simple budgets, expense reports, fee tracking, cost summaries, revenue/expense comparison, personal finance, project cost tracking — any financial table that does **NOT** need DCF, LBO, three-statement linkage, sensitivity analysis, or IB-grade formatting.
For complex financial models → use `scenes/finance.md` instead.
Also load `engines/design.md` for styling (use **standard** design tokens, NOT IB overrides).
---
## When to Use finance_lite vs finance
| Signal | finance_lite ✅ | finance.md ❌ |
|--------|----------------|--------------|
| 预算表 / budget | ✅ | |
| 费用报表 / expense report | ✅ | |
| 项目成本追踪 / project cost tracking | ✅ | |
| 收支对比 / revenue vs cost | ✅ | |
| 个人记账 / personal finance | ✅ | |
| 简单 ROI 计算 / simple ROI calculation | ✅ | |
| DCF / LBO / 估值模型 (valuation model) | | ✅ |
| 三表联动 (P&L + BS + CF) | | ✅ |
| 敏感性分析 / scenario table | | ✅ |
| IB pitch book level formatting | | ✅ |
---
## Standard Sheet Structure
```
Sheet: "Budget" (or user-specified name)
Row 1: margin (whitespace)
Row 2: Title (merged, styled via setup_sheet())
Row 3: spacer
Row 4: Headers
Row 5+: Data rows
Last row: Totals (if applicable)
```
### Typical Column Patterns
**Budget Table:**
```
Category (类别) | Budget Amount (预算金额) | Actual Amount (实际金额) | Variance (差异) | Variance Rate (差异率) | Notes (备注)
```
**Expense Report:**
```
Date (日期) | Category (类别) | Description (说明) | Amount (金额) | Claimant (报销人) | Status (状态)
```
**Revenue vs Cost:**
```
Month (月份) | Revenue (收入) | Cost (成本) | Gross Profit (毛利) | Gross Margin (毛利率)
```
**Project Cost:**
```
Phase (阶段) | Task (任务) | Budget (预算) | Used (已用) | Remaining (剩余) | Usage Rate (使用率) | Status (状态)
```
---
## Formula Patterns
```python
# Variance
cell.value = '=C{r}-B{r}' # Actual - Budget
# Variance percentage (safe division)
cell.value = '=IFERROR((C{r}-B{r})/B{r},0)'
# Running total
cell.value = '=SUM(D$5:D{r})'
# Gross margin
cell.value = '=IFERROR((B{r}-C{r})/B{r},0)'
# Status formula (simple threshold)
cell.value = '=IF(F{r}>1,"Over Budget",IF(F{r}>0.9,"At Risk","On Track"))'
# Subtotal
cell.value = '=SUBTOTAL(9,D{start}:D{end})'
# Grand total
cell.value = '=SUM(D5:D{last_data_row})'
```
---
## Number Formats
Use standard formats from `templates/base.py`:
```python
from templates.base import FORMATS
cell.number_format = FORMATS['currency_cny'] # ¥#,##0.00
cell.number_format = FORMATS['percentage'] # 0.0%
cell.number_format = FORMATS['integer'] # #,##0
cell.number_format = FORMATS['date'] # YYYY-MM-DD
```
For budget-specific formatting (negatives in parentheses):
```python
BUDGET_FORMATS = {
'currency': '¥#,##0.00;(¥#,##0.00);"-"',
'variance': '#,##0.00;(#,##0.00);"-"',
'var_pct': '0.0%;(0.0%);"-"',
}
```
---
## Styling
Use **standard** design tokens (NOT IB overrides):
```python
from templates.base import (
setup_sheet, style_header_row, style_data_row, style_total_row,
FONT_NAME, HEADER_BOLD, PRIMARY, ACCENT_POSITIVE, ACCENT_NEGATIVE, ACCENT_WARNING,
font_body, font_header, fill_header,
)
# Setup
setup_sheet(ws, title="2026年部门预算", last_col=7)
# Headers at row 4
style_header_row(ws, row_num=4, col_start=2, col_end=7)
# Data rows
for i, row_num in enumerate(range(5, last_row + 1)):
style_data_row(ws, row_num=row_num, col_start=2, col_end=7, row_index=i)
# Totals
style_total_row(ws, row_num=last_row + 1, col_start=2, col_end=7)
```
---
## Conditional Formatting (Simple)
```python
from openpyxl.formatting.rule import CellIsRule
from templates.base import CF_POSITIVE_FONT, CF_POSITIVE_FILL, CF_NEGATIVE_FONT, CF_NEGATIVE_FILL
# Highlight positive variance (green)
ws.conditional_formatting.add(
f'D5:D{last_row}',
CellIsRule(operator='greaterThan', formula=['0'],
font=CF_POSITIVE_FONT, fill=CF_POSITIVE_FILL)
)
# Highlight negative variance (red)
ws.conditional_formatting.add(
f'D5:D{last_row}',
CellIsRule(operator='lessThan', formula=['0'],
font=CF_NEGATIVE_FONT, fill=CF_NEGATIVE_FILL)
)
```
---
## Quick Templates
### Template: Monthly Budget
```python
headers = ["类别", "预算金额", "实际金额", "差异", "差异率", "状态"]
# Variance = Actual - Budget
# Var% = IFERROR((Actual-Budget)/Budget, 0)
# Status = IF(Var%>0.1,"超支"(Over Budget),IF(Var%>0,"注意"(Watch),"正常"(Normal)))
```
### Template: Expense Report
```python
headers = ["日期", "类别", "说明", "金额", "报销人", "状态"]
# Date format: YYYY-MM-DD
# Amount: currency_cny
# Status: dropdown validation ["待审批"(Pending),"已审批"(Approved),"已报销"(Reimbursed),"已拒绝"(Rejected)]
```
### Template: Project Cost Tracker
```python
headers = ["阶段", "任务", "预算", "已用", "剩余", "使用率", "状态"]
# Remaining = Budget - Used
# Usage% = IFERROR(Used/Budget, 0)
# Status = IF(Usage%>1,"超支"(Over Budget),IF(Usage%>0.9,"预警"(Warning),"正常"(Normal)))
```

298
skills/xlsx/scenes/vba.md Executable file
View File

@@ -0,0 +1,298 @@
# VBA — Macro Generation & Management Guide
Load this reference when the task involves: creating Excel macros, writing VBA code, automating Excel workflows, adding buttons/forms, modifying existing macros, or any `.xlsm` deliverable that needs programmatic automation.
Also load `engines/vba-templates.md` for ready-to-use code templates.
---
## Core Principles
### 1. Safety First
- **Never** generate VBA that deletes files, accesses filesystem outside the workbook, or sends data to external URLs without explicit user request
- **Always** include error handling (`On Error GoTo`)
- **Always** add `Application.ScreenUpdating` toggle for performance
- Generated macros must be **read-audit-friendly**: clear naming, comments, structured layout
### 2. openpyxl VBA Workflow
openpyxl can read/preserve/inject VBA but **cannot execute** it. The workflow:
```python
# READ existing VBA
from openpyxl import load_workbook
wb = load_workbook('file.xlsm', keep_vba=True)
# wb.vba_archive contains all VBA modules
# CREATE new .xlsm with VBA
from openpyxl import Workbook
wb = Workbook()
# ... build sheets ...
# Inject VBA via vbaProject.bin (see Injection section)
wb.save('output.xlsm')
```
### 3. File Format Rules
| Need | Format | Extension |
|------|--------|-----------|
| Data only, no macros | OpenXML | `.xlsx` |
| Contains VBA macros | Macro-Enabled | `.xlsm` |
| Binary with macros | Binary | `.xlsb` |
**Critical**: If user gives `.xlsx` but wants macros → output must be `.xlsm`. Always warn about format change.
---
## VBA Code Structure Standard
Every generated VBA module must follow this structure:
```vba
Option Explicit
' ============================================================
' Module: [ModuleName]
' Purpose: [One-line description]
' Author: Z.ai
' Date: [YYYY-MM-DD]
' ============================================================
' --- Constants ---
Private Const MODULE_NAME As String = "[ModuleName]"
' --- Main Entry Point ---
Public Sub Main()
On Error GoTo ErrHandler
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
' [Main logic here]
CleanUp:
Application.ScreenUpdating = True
Application.Calculation = xlCalculationAutomatic
Exit Sub
ErrHandler:
MsgBox "Error in " & MODULE_NAME & ": " & Err.Description, _
vbCritical, "Error"
Resume CleanUp
End Sub
```
### Naming Conventions
| Element | Convention | Example |
|---------|-----------|---------|
| Sub/Function | PascalCase | `GenerateMonthlyReport` |
| Variable | camelCase | `lastRow`, `wsData` |
| Constant | UPPER_SNAKE | `MAX_ROWS`, `REPORT_TITLE` |
| Module | PascalCase | `ModReport`, `ModUtils` |
| Worksheet variable | ws + Name | `wsData`, `wsSummary` |
| Range variable | rng + Desc | `rngData`, `rngHeaders` |
### Variable Declaration Rules
```vba
' Always use explicit types
Dim lastRow As Long ' Not Integer (row limit)
Dim ws As Worksheet
Dim rng As Range
Dim cell As Range
Dim i As Long
Dim strValue As String
Dim dblAmount As Double
```
---
## Common Patterns
### Find Last Row/Column (Robust)
```vba
' Last row with data in column A
Dim lastRow As Long
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
' Last column with data in row 1
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
' Used range (less reliable but useful)
Dim usedRows As Long
usedRows = ws.UsedRange.Rows.Count
```
### Loop Through Data
```vba
' Row loop
Dim i As Long
For i = 2 To lastRow ' Skip header
If ws.Cells(i, 1).Value <> "" Then
' Process row
End If
Next i
' For Each (range)
Dim cell As Range
For Each cell In ws.Range("A2:A" & lastRow)
If Not IsEmpty(cell) Then
' Process cell
End If
Next cell
```
### Sheet Operations
```vba
' Reference sheet safely
Dim ws As Worksheet
On Error Resume Next
Set ws = ThisWorkbook.Sheets("Data")
On Error GoTo 0
If ws Is Nothing Then
MsgBox "Sheet 'Data' not found!", vbExclamation
Exit Sub
End If
' Create sheet if not exists
Dim wsNew As Worksheet
Dim sheetExists As Boolean
For Each wsNew In ThisWorkbook.Sheets
If wsNew.Name = "Summary" Then sheetExists = True
Next wsNew
If Not sheetExists Then
Set wsNew = ThisWorkbook.Sheets.Add(After:=ThisWorkbook.Sheets(ThisWorkbook.Sheets.Count))
wsNew.Name = "Summary"
End If
```
### User Interaction
```vba
' Simple input
Dim userInput As String
userInput = InputBox("Enter report month (YYYY-MM):", "Month Selection")
If userInput = "" Then Exit Sub
' Confirmation
If MsgBox("Generate report for " & userInput & "?", _
vbYesNo + vbQuestion, "Confirm") = vbNo Then Exit Sub
' File picker
Dim filePath As Variant
filePath = Application.GetOpenFilename( _
FileFilter:="Excel Files (*.xlsx;*.xlsm),*.xlsx;*.xlsm", _
Title:="Select Source File")
If filePath = False Then Exit Sub
```
---
## VBA Injection via openpyxl
### Method 1: Preserve Existing VBA
```python
# Open with VBA preserved
wb = load_workbook('source.xlsm', keep_vba=True)
# Edit data/formatting as usual
wb.save('output.xlsm') # VBA modules intact
```
### Method 2: Copy VBA from Template
```python
# Use a template .xlsm that already has the VBA you need
import shutil
shutil.copy('template_with_macros.xlsm', 'output.xlsm')
wb = load_workbook('output.xlsm', keep_vba=True)
# Modify data
wb.save('output.xlsm')
```
### Method 3: Manual vbaProject.bin Injection
```python
# For advanced use: inject raw vbaProject.bin
# 1. Create your VBA in Excel, save as .xlsm
# 2. Extract vbaProject.bin from the .xlsm (it's a ZIP)
# 3. Inject into new workbook
import zipfile
import shutil
# Create the workbook first
wb = Workbook()
# ... add data ...
wb.save('temp.xlsx')
# Convert to .xlsm by injecting VBA
shutil.copy('temp.xlsx', 'output.xlsm')
with zipfile.ZipFile('output.xlsm', 'a') as zf:
zf.write('vbaProject.bin', 'xl/vbaProject.bin')
# Update [Content_Types].xml to register VBA
# (This is fragile — Method 1 or 2 preferred)
```
**Recommendation**: Method 1 (preserve) or Method 2 (template) are robust. Method 3 is fragile and should be last resort.
---
## Security Checklist
Before delivering any VBA-enabled file:
- [ ] No filesystem access outside workbook (no `Kill`, `FileCopy`, `MkDir` unless requested)
- [ ] No network calls (`XMLHTTP`, `WinHttpRequest`) unless requested
- [ ] No shell execution (`Shell`, `WScript.Shell`) unless requested
- [ ] No registry access (`CreateObject("WScript.Shell").RegWrite`)
- [ ] No auto-execution (`Auto_Open`, `Workbook_Open`) unless explicitly requested
- [ ] Error handling in every Sub/Function
- [ ] `ScreenUpdating` restored in cleanup
- [ ] All variables explicitly declared (`Option Explicit`)
- [ ] Module purpose documented in header comment
---
## Performance Guidelines
```vba
' ALWAYS bracket bulk operations
Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Application.EnableEvents = False
' [Bulk operations here]
Application.EnableEvents = True
Application.Calculation = xlCalculationAutomatic
Application.ScreenUpdating = True
```
### Array-Based Processing (for large data)
```vba
' Read range into array — much faster than cell-by-cell
Dim data As Variant
data = ws.Range("A1:Z" & lastRow).Value ' 2D array
' Process in memory
Dim i As Long
For i = LBound(data, 1) To UBound(data, 1)
data(i, 3) = data(i, 1) * data(i, 2) ' Column C = A * B
Next i
' Write back in one shot
ws.Range("A1:Z" & lastRow).Value = data
```
---
## Debugging Support
When user reports VBA errors, include diagnostic code:
```vba
' Debug logging to Immediate Window
Debug.Print "Processing row " & i & ": " & ws.Cells(i, 1).Value
' Verbose error info
ErrHandler:
Debug.Print "ERROR in " & MODULE_NAME
Debug.Print " Number: " & Err.Number
Debug.Print " Description: " & Err.Description
Debug.Print " Source: " & Err.Source
```