Initial commit

This commit is contained in:
Z User
2026-06-06 05:21:10 +00:00
Unverified
commit 6664758a6d
493 changed files with 135653 additions and 0 deletions

View File

@@ -0,0 +1,386 @@
# Chart Templates — matplotlib Template Library
## Design Philosophy
GLM uses **matplotlib as the primary chart engine**. Advantages:
- High chart quality, print-ready
- Full style control, consistent with document palette
- Supports complex chart types (heatmap, radar, box plot, etc.)
- Reliable CJK rendering (with SimHei font configured)
**When to use native Word charts?**
Only when the user explicitly requests "editable charts." Default is always matplotlib PNG embedding.
## Base Configuration
```python
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.font_manager import FontProperties
# ── CJK Font ──
_FONT_PATHS = [
"/System/Library/Fonts/Supplemental/SimHei.ttf", # macOS
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc", # Linux
"/usr/share/fonts/truetype/chinese/SimHei.ttf", # custom install
"./SimHei.ttf", # current dir
]
ZH_FONT = None
for _fp in _FONT_PATHS:
try:
ZH_FONT = FontProperties(fname=_fp)
break
except:
continue
plt.rcParams["axes.unicode_minus"] = False
# ── Palette Adapter ──
def make_chart_palette(accent: str, surface: str = "#F2F4F6") -> dict:
"""Generate chart palette from document palette.accent"""
return {
"primary": accent,
"series": _generate_series_colors(accent, 6),
"grid": "#E0E0E0",
"bg": "white",
"text": "#333333",
"surface": surface,
}
def _generate_series_colors(base_hex: str, count: int) -> list:
"""Generate series colors via hue rotation from base color"""
import colorsys
base = tuple(int(base_hex.lstrip("#")[i:i+2], 16) / 255.0 for i in (0, 2, 4))
h, s, v = colorsys.rgb_to_hsv(*base)
colors = []
for i in range(count):
hi = (h + i * (1.0 / count)) % 1.0
r, g, b = colorsys.hsv_to_rgb(hi, min(s * 0.9, 1.0), min(v * 1.05, 1.0))
colors.append(f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}")
return colors
# ── Universal Export ──
def save_chart(fig, path: str, dpi: int = 200):
"""Save chart with uniform DPI. Square charts (pie/radar) use fixed padding to preserve 1:1 ratio."""
w, h = fig.get_size_inches()
if abs(w - h) < 0.1:
fig.savefig(path, dpi=dpi, bbox_inches="tight", pad_inches=0.3,
facecolor="white", edgecolor="none")
else:
fig.savefig(path, dpi=dpi, bbox_inches="tight", pad_inches=0.1,
facecolor="white", edgecolor="none")
plt.close(fig)
return path
```
## Template 1: Bar Chart
```python
def bar_chart(categories: list, values: list, title: str = "",
ylabel: str = "", palette: dict = None, output: str = "bar.png"):
"""
Basic bar chart.
categories: ["Q1", "Q2", "Q3", "Q4"]
values: [120, 150, 180, 200]
"""
p = palette or make_chart_palette("#5B8DB8")
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(categories, values, color=p["primary"], width=0.6, edgecolor="white")
# Data labels
for bar, val in zip(bars, values):
ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + max(values) * 0.02,
str(val), ha="center", va="bottom", fontsize=10,
fontproperties=ZH_FONT, color=p["text"])
if title:
ax.set_title(title, fontproperties=ZH_FONT, fontsize=14, pad=15, color=p["text"])
if ylabel:
ax.set_ylabel(ylabel, fontproperties=ZH_FONT, fontsize=11, color=p["text"])
ax.set_xticklabels(categories, fontproperties=ZH_FONT, fontsize=10)
ax.spines[["top", "right"]].set_visible(False)
ax.grid(axis="y", alpha=0.3, color=p["grid"])
if len(categories) > 6:
plt.xticks(rotation=45, ha="right")
return save_chart(fig, output)
```
### Grouped Bar Chart
```python
def grouped_bar(categories: list, groups: dict, title: str = "",
ylabel: str = "", palette: dict = None, output: str = "grouped_bar.png"):
"""
groups: {"Product A": [10, 20, 30], "Product B": [15, 25, 35]}
"""
p = palette or make_chart_palette("#5B8DB8")
fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(categories))
n = len(groups)
width = 0.8 / n
for i, (name, vals) in enumerate(groups.items()):
offset = (i - n / 2 + 0.5) * width
bars = ax.bar(x + offset, vals, width, label=name, color=p["series"][i % len(p["series"])])
ax.set_xticks(x)
ax.set_xticklabels(categories, fontproperties=ZH_FONT, fontsize=10)
ax.legend(prop=ZH_FONT, frameon=False)
if title:
ax.set_title(title, fontproperties=ZH_FONT, fontsize=14, pad=15)
ax.spines[["top", "right"]].set_visible(False)
ax.grid(axis="y", alpha=0.3)
return save_chart(fig, output)
```
## Template 2: Line Chart
```python
def line_chart(x_data: list, series: dict, title: str = "",
xlabel: str = "", ylabel: str = "", palette: dict = None,
output: str = "line.png"):
"""
series: {"Revenue": [100, 120, 150, 180], "Cost": [80, 90, 100, 110]}
"""
p = palette or make_chart_palette("#5B8DB8")
fig, ax = plt.subplots(figsize=(10, 6))
for i, (name, values) in enumerate(series.items()):
color = p["series"][i % len(p["series"])]
ax.plot(x_data, values, marker="o", markersize=5, linewidth=2,
label=name, color=color)
if title:
ax.set_title(title, fontproperties=ZH_FONT, fontsize=14, pad=15)
if xlabel:
ax.set_xlabel(xlabel, fontproperties=ZH_FONT, fontsize=11)
if ylabel:
ax.set_ylabel(ylabel, fontproperties=ZH_FONT, fontsize=11)
ax.legend(prop=ZH_FONT, frameon=False, loc="best")
ax.spines[["top", "right"]].set_visible(False)
ax.grid(True, alpha=0.3)
if len(x_data) > 6:
plt.xticks(rotation=45, ha="right")
return save_chart(fig, output)
```
## Template 3: Pie Chart
```python
def pie_chart(labels: list, values: list, title: str = "",
palette: dict = None, output: str = "pie.png"):
"""Pie chart — auto-merges slices below 3% into 'Other'"""
p = palette or make_chart_palette("#5B8DB8")
fig, ax = plt.subplots(figsize=(8, 8))
# Merge slices below 3% into "Other"
total = sum(values)
merged_labels, merged_values = [], []
other = 0
for lbl, val in zip(labels, values):
if val / total < 0.03:
other += val
else:
merged_labels.append(lbl)
merged_values.append(val)
if other > 0:
merged_labels.append("Other")
merged_values.append(other)
colors = p["series"][:len(merged_labels)]
wedges, texts, autotexts = ax.pie(
merged_values, labels=merged_labels, colors=colors,
autopct="%1.1f%%", startangle=90, pctdistance=0.75,
textprops={"fontproperties": ZH_FONT, "fontsize": 11}
)
for t in autotexts:
t.set_fontsize(10)
t.set_color("white")
if title:
ax.set_title(title, fontproperties=ZH_FONT, fontsize=14, pad=20)
return save_chart(fig, output)
```
## Template 4: Box Plot
```python
def box_plot(data: dict, title: str = "", ylabel: str = "",
palette: dict = None, output: str = "box.png"):
"""
data: {"Class A": [78, 82, 91, ...], "Class B": [65, 70, 88, ...]}
"""
p = palette or make_chart_palette("#5B8DB8")
fig, ax = plt.subplots(figsize=(10, 6))
labels = list(data.keys())
values = list(data.values())
bp = ax.boxplot(values, labels=labels, patch_artist=True, notch=False,
medianprops={"color": "white", "linewidth": 2})
for i, patch in enumerate(bp["boxes"]):
patch.set_facecolor(p["series"][i % len(p["series"])])
patch.set_alpha(0.8)
ax.set_xticklabels(labels, fontproperties=ZH_FONT, fontsize=11)
if title:
ax.set_title(title, fontproperties=ZH_FONT, fontsize=14, pad=15)
if ylabel:
ax.set_ylabel(ylabel, fontproperties=ZH_FONT, fontsize=11)
ax.spines[["top", "right"]].set_visible(False)
ax.grid(axis="y", alpha=0.3)
return save_chart(fig, output)
```
## Template 5: Radar Chart
```python
def radar_chart(categories: list, series: dict, title: str = "",
palette: dict = None, output: str = "radar.png"):
"""
categories: ["Chinese", "Math", "English", "Physics", "Chemistry"]
series: {"Student A": [85, 92, 78, 90, 88], "Student B": [75, 88, 92, 70, 85]}
"""
p = palette or make_chart_palette("#5B8DB8")
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
n = len(categories)
angles = np.linspace(0, 2 * np.pi, n, endpoint=False).tolist()
angles += angles[:1] # close the polygon
for i, (name, values) in enumerate(series.items()):
vals = values + values[:1] # close the polygon
color = p["series"][i % len(p["series"])]
ax.plot(angles, vals, linewidth=2, label=name, color=color)
ax.fill(angles, vals, alpha=0.15, color=color)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories, fontproperties=ZH_FONT, fontsize=11)
ax.legend(prop=ZH_FONT, loc="upper right", bbox_to_anchor=(1.2, 1.1), frameon=False)
if title:
ax.set_title(title, fontproperties=ZH_FONT, fontsize=14, pad=25)
return save_chart(fig, output)
```
## Template 6: Heatmap
```python
def heatmap(data: list, row_labels: list, col_labels: list, title: str = "",
palette: dict = None, output: str = "heatmap.png"):
"""
data: 2D array [[1,2,3],[4,5,6]]
row_labels: ["Row 1", "Row 2"]
col_labels: ["Col 1", "Col 2", "Col 3"]
"""
fig, ax = plt.subplots(figsize=(max(8, len(col_labels) * 1.2), max(6, len(row_labels) * 0.8)))
arr = np.array(data)
im = ax.imshow(arr, cmap="YlOrRd", aspect="auto")
ax.set_xticks(range(len(col_labels)))
ax.set_yticks(range(len(row_labels)))
ax.set_xticklabels(col_labels, fontproperties=ZH_FONT, fontsize=10)
ax.set_yticklabels(row_labels, fontproperties=ZH_FONT, fontsize=10)
# Value annotations
for i in range(len(row_labels)):
for j in range(len(col_labels)):
val = arr[i, j]
color = "white" if val > arr.max() * 0.7 else "black"
ax.text(j, i, f"{val:.1f}", ha="center", va="center",
fontsize=10, color=color)
fig.colorbar(im, ax=ax, shrink=0.8)
if title:
ax.set_title(title, fontproperties=ZH_FONT, fontsize=14, pad=15)
return save_chart(fig, output)
```
## Embedding in Documents (MANDATORY — Preserve Aspect Ratio)
**⚠️ Core Rule: When embedding any chart image, you MUST read actual image dimensions to calculate displayHeight. NEVER hardcode both width and height.**
Pie and radar charts are square — mismatched width/height produces ellipses or diamonds.
```js
// ✅ Correct: read actual image dimensions
const chartBuffer = fs.readFileSync("bar.png");
const sizeOf = require("image-size");
const dims = sizeOf(chartBuffer);
const displayWidth = 500;
const displayHeight = Math.round(displayWidth * (dims.height / dims.width));
new Paragraph({
alignment: AlignmentType.CENTER,
spacing: { before: 200, after: 100 },
children: [
new ImageRun({
data: chartBuffer,
transformation: { width: displayWidth, height: displayHeight },
type: "png",
}),
],
})
```
```js
// ❌ Wrong: hardcoded width and height (pie becomes ellipse, radar becomes diamond)
new ImageRun({
data: chartBuffer,
transformation: { width: 500, height: 350 }, // wrong ratio!
type: "png",
})
```
```python
# ✅ Python (ReportLab) correct approach:
from PIL import Image as PILImage
from reportlab.platypus import Image
pil_img = PILImage.open('chart.png')
orig_w, orig_h = pil_img.size
target_width = 400 # pt
scale = target_width / orig_w
img = Image('chart.png', width=target_width, height=orig_h * scale)
```
## Chart Selection Guide
| Data Scenario | Recommended Chart | Template Function |
|---------------|-------------------|-------------------|
| Category comparison | Bar chart | `bar_chart()` |
| Multi-group comparison | Grouped bar | `grouped_bar()` |
| Trend over time | Line chart | `line_chart()` |
| Proportion/composition | Pie chart | `pie_chart()` |
| Distribution/spread | Box plot | `box_plot()` |
| Multi-dimensional assessment | Radar chart | `radar_chart()` |
| Matrix correlation | Heatmap | `heatmap()` |
## Quality Standards
1. **DPI**: Uniform 200 DPI (built into `save_chart`)
2. **Colors**: Derived from document palette.accent for style consistency
3. **CJK text**: Must configure SimHei font; otherwise renders as boxes
4. **Label overlap prevention**: Auto-rotate 45° when >6 x-axis labels
5. **Legend**: Move outside chart (`bbox_to_anchor`) when >4 series
6. **Grid**: Light gray dashed grid lines for readability
7. **Clean frames**: Remove top/right spines for modern minimalist look
8. **Aspect ratio (CRITICAL)**: Must use `image-size` (JS) or `PIL` (Python) to read actual image dimensions and calculate displayHeight proportionally. **Pie and radar charts are square — hardcoding non-1:1 ratio causes ellipse/diamond distortion.**
9. **Dimensions**: Default 10×6 inches, fits well within A4 page

View File

@@ -0,0 +1,419 @@
# Common Rules
Shared rules referenced by all scene files. Scene-specific overrides take precedence.
## Default Page Layout
A4 portrait. Unless the scene specifies otherwise, use:
| Property | Value | Twips |
|----------|-------|-------|
| Page width | 21.0 cm | 11906 |
| Page height | 29.7 cm | 16838 |
| Top margin | 2.54 cm | 1440 |
| Bottom margin | 2.54 cm | 1440 |
| Left margin | 3.0 cm | 1701 |
| Right margin | 2.5 cm | 1417 |
```js
page: {
size: { width: 11906, height: 16838, orientation: PageOrientation.PORTRAIT },
margin: { top: 1440, bottom: 1440, left: 1701, right: 1417 },
}
```
**Scene overrides:**
- **Official doc (GB/T 9704 red-header):** top 2098, bottom 1984, left 1588, right 1474
- **Exam:** top/bottom 1134 (2 cm), left/right 1134 (2 cm)
## Default Font Specifications
Two font profiles exist. Each scene declares which profile it uses.
### Profile A: Formal (report, academic, contract, official-doc, exam)
| Element | CN Font | EN Font | Size | Notes |
|---------|---------|---------|------|-------|
| H1 | SimHei | Times New Roman | 16 pt (size: 32) | Bold, centered |
| H2 | SimHei | Times New Roman | 15 pt (size: 30) | Bold |
| H3 | SimHei | Times New Roman | 14 pt (size: 28) | Bold |
| Body | SimSun | Times New Roman | 12 pt (size: 24) | |
| Caption | SimSun | Times New Roman | 10.5 pt (size: 21) | |
- Text color: always **pure black `"000000"`** (never dark-blue-grey)
- First-line indent: **480 twips** (2 chars at SimSun 12pt)
- Line spacing: **312** (1.3x).
- **Color routing for non-report documents**: When the document is a short-form text (essay, evaluation, letter, speech, application, reflection, etc.) rather than a structured report/whitepaper/proposal/consulting deliverable, heading color MUST use pure black `"000000"` instead of `palette.primary`. Colored headings are reserved for documents that need brand/professional identity (reports with covers, whitepapers, proposals, consulting deliverables).
### Profile B: Visual (resume, copywriting)
| Element | CN Font | EN Font | Size |
|---------|---------|---------|------|
| Name/Title | Microsoft YaHei | Calibri | Varies |
| Body | Microsoft YaHei | Calibri | 1011 pt |
| Caption | Microsoft YaHei | Calibri | 9 pt |
- First-line indent: **420 twips** (2 chars at YaHei)
- Color: per design-system palette
### Official-Doc Font Override (GB/T 9704)
When `needsRedHeader() = true`:
| Element | Font | Size |
|---------|------|------|
| Red header org name | STXiaoBiaoSong (or SimSun bold) | 26 pt (size: 52) |
| Title | STXiaoBiaoSong (or SimHei) | 22 pt (size: 44) |
| Body | FangSong | 16 pt (size: 32) |
| Section heading | FangSong_GB2312 bold (or HeiTi) | 16 pt (size: 32) |
- Line spacing: **560** (28 pt fixed)
- First-line indent: **640 twips** (2 chars at FangSong 16pt)
## Chinese Font Size Reference
| Name | Points | Half-points (size:) |
|------|--------|---------------------|
| Chu Hao (initial) | 42 | 84 |
| Xiao Chu | 36 | 72 |
| Yi Hao (1st) | 26 | 52 |
| Xiao Yi | 24 | 48 |
| Er Hao (2nd) | 22 | 44 |
| Xiao Er | 18 | 36 |
| San Hao (3rd) | 16 | 32 |
| Xiao San | 15 | 30 |
| Si Hao (4th) | 14 | 28 |
| Xiao Si | 12 | 24 |
| Wu Hao (5th) | 10.5 | 21 |
| Xiao Wu | 9 | 18 |
| Liu Hao (6th) | 7.5 | 15 |
## Placeholder Convention
When required information is missing, use standardized placeholders so users can Find & Replace in Word.
**Format:** Always use full-width brackets `【 】`.
| Type | Format | Example |
|------|--------|---------|
| General field | `【field name】` | Name: 【company name】 |
| Monetary amount | `【RMB in words: yuan (lowercase: ¥)】` | Amount: 【RMB in words】 |
| Date field | `【____/____/____】` | Signing date: 【____/____/____】 |
| Long text | `【Please fill in: ______】` | Delivery criteria: 【Please fill in: ______】 |
| Attachment ref | `【See Appendix 1: ______】` | |
**Rules:**
1. Placeholder format must be consistent throughout the entire document
2. Each placeholder must specify exactly what is needed (never use vague "TBD" or "to be completed")
3. Never hard-code unconfirmed critical facts; use a placeholder instead
4. Never use sloppy expressions like "to be refined", "omitted", "user fills in later"
## Title Orphan Prevention (All Scenes)
Body headings (H1/H2/H3) and cover titles must avoid leaving 12 characters alone on the last line. This rule applies to ALL document types.
**For cover titles:** Always use `calcTitleLayout()` + `splitTitleLines()` from `design-system.md` — these handle orphan prevention automatically (merges ≤2-char last lines into the previous line).
**For body headings (H1/H2/H3):** When a heading text is long enough to wrap, apply the same `splitTitleLines()` logic. If the heading would cause a single-character orphan in Word's auto-wrapping, manually split into multiple `TextRun` elements with a `Break` (soft line break) at a semantic boundary.
```js
const { Break } = require("docx");
// Check if heading needs manual line break to prevent orphan
function buildHeadingRuns(text, maxCharsPerLine, runProps) {
// If text fits in one line, no action needed
if (text.length <= maxCharsPerLine) {
return [new TextRun({ text, ...runProps })];
}
// Use splitTitleLines to find semantic break points
const lines = splitTitleLines(text, maxCharsPerLine);
const runs = [];
for (let i = 0; i < lines.length; i++) {
if (i > 0) runs.push(new TextRun({ break: 1, ...runProps, text: "" })); // soft line break
runs.push(new TextRun({ text: lines[i], ...runProps }));
}
return runs;
}
```
**Estimation for maxCharsPerLine:** For centered headings, estimate available width = page width - left margin - right margin. For SimHei at a given pt size, each CJK char ≈ pt × 20 twips wide. Divide available width by char width to get `maxCharsPerLine`.
---
## Undefined / Null Value Prevention (Mandatory)
Generated code MUST guard against outputting literal `undefined`, `null`, `NaN`, or empty strings for any visible text field. This is a **hard requirement** — these are never acceptable in a delivered document.
```js
// ✅ MANDATORY: Safe text helper — use for ALL user-facing text values
function safeText(value, placeholder) {
if (value === undefined || value === null || value === "" || String(value) === "NaN" || String(value) === "undefined") {
return placeholder || "【Please fill in】";
}
return String(value);
}
// Usage:
new TextRun({ text: safeText(config.contact, "【Contact person】") })
new TextRun({ text: safeText(row.phone, "【Phone number】") })
```
**Rules:**
1. Every `TextRun` displaying user-provided or config-derived data MUST use `safeText()` or equivalent guard
2. If a field is optional and not provided, use `【Please fill in: field_name】` placeholder (full-width brackets)
3. Table cells with missing data: show `【Please fill in】`, never leave as empty string or undefined
4. This applies to ALL scenes — contracts, reports, academic, exams, etc.
---
## WPS / Office Word Compatibility (Mandatory)
Generated .docx files must render consistently in both Microsoft Office Word and WPS Office. The following OOXML features have known compatibility issues — avoid or use carefully.
### Features to AVOID (high incompatibility risk)
| Feature | Issue | Alternative |
|---------|-------|------------|
| **Text-character decorative lines** (e.g., `───`, `━━━`, `═══`, `——————`) | Character-drawn lines depend on font metrics and rendering engine — they appear different widths/lengths in MS Office vs WPS, often truncated or misaligned. They cannot span a controlled width. | **Always use paragraph borders** (`border.top`, `border.bottom`) for horizontal decorative lines. Paragraph borders render consistently across engines and respect indent for precise width control. See recipe R2 for correct implementation. |
| **Default table borders on cover wrapper tables** (forgetting `allNoBorders`) | docx-js default table borders are `single/auto/sz=4`. On the 16838-high cover wrapper, these borders add ~8 twips of extra height per edge. MS Office includes border thickness in height calculation, causing content to overflow by a few twips → **blank page 2**. WPS is more lenient and may absorb the overflow. | **Every cover wrapper table MUST explicitly set `borders: allNoBorders`** (all 6 border positions = NONE). Never rely on defaults. Define the `allNoBorders` constant and use it consistently. |
| `verticalAlign: "center"` or `"bottom"` in exact-height TableRow | WPS ignores vertical alignment in exact-height rows; content may clip or shift | Use `verticalAlign: "top"` + `spacing.before` to position content. Avoid `margins.top`/`margins.bottom` in exact-height cells — they reduce available height unpredictably across engines |
| `characterSpacing` (large values) | WPS renders differently from Word; letter spacing may collapse or expand | Keep `characterSpacing` ≤ 80; for cover English labels, test both renderers |
| `margins.top`/`margins.bottom` inside exact-height cells | MS Office and WPS calculate remaining height differently when cell margins are present | Use `spacing.before` on the first paragraph for vertical positioning; only use `margins.left`/`margins.right` |
| Complex nested Tables inside exact-height cells | WPS height calculation differs from Word; content may overflow or clip | Wrap everything in a single 16838 outer wrapper cell (R1 architecture). Nested tables inside are acceptable when the outer wrapper provides a safety net |
| Large font without explicit `spacing.line` | Paragraph inherits small line spacing from document default (e.g., 560tw for body); font taller than line height → top of characters clipped | Always set `spacing: { line: fontPt * 23, lineRule: "atLeast" }` on paragraphs with font size > body text |
| `ShadingType.SOLID` | WPS shows solid black instead of intended color | Always use `ShadingType.CLEAR` |
| OOXML raw XML for columns (`w:cols`) | WPS column rendering may differ | Use only when explicitly needed (A3 exam papers); test output |
| `titlePage: true` with complex headers/footers | WPS may not properly suppress first-page header/footer | Use separate sections instead of titlePage flag |
| Tab stops for alignment | WPS tab width may differ from Word | Use borderless Tables for alignment instead |
### Features that are SAFE (consistent rendering)
| Feature | Notes |
|---------|-------|
| Borderless Tables for layout | Both renderers handle well |
| `ShadingType.CLEAR` with fill color | Consistent |
| `rule: "exact"` on single-level TableRow | Works in both (avoid with nested Tables) |
| Paragraph borders (left, bottom, etc.) | Consistent |
| `spacing.before` / `spacing.after` | Consistent |
| Standard fonts (SimHei, SimSun, YaHei, TNR, Calibri) | Available on both platforms |
| `PageBreak` inside Paragraph | Consistent |
| Section breaks (`SectionType.NEXT_PAGE`) | Consistent |
### Mandatory Compatibility Checks (Post-Generation)
Add to quality self-check:
- [ ] No `ShadingType.SOLID` anywhere (search codebase)
- [ ] No `verticalAlign: "center"` or `"bottom"` in exact-height rows
- [ ] No tab-stop alignment for party info or data alignment (use Tables)
- [ ] Covers use the 16838 outer wrapper architecture (R1 pattern) with `spacing.before` for positioning; no `margins.top`/`margins.bottom` in exact-height cells
- [ ] **Cover section margin = `{ top: 0, bottom: 0, left: 0, right: 0 }`** — non-zero margins cause wrapper to shrink away from page edges
- [ ] **Cover wrapper row has `height: { value: 16838, rule: "exact" }`** — without this, content overflows or leaves whitespace
- [ ] **Cover is in a separate section from body content** — cover and body must not share a section
- [ ] **Cover wrapper table uses explicit `allNoBorders`** — never rely on default table borders (causes blank page 2 in MS Office)
- [ ] **No text-character decorative lines** (`───`, `━━━`, `═══`, `——————`) — use paragraph borders instead
- [ ] `characterSpacing` values ≤ 80 throughout
- [ ] TOC: follow `references/toc.md` checklist (heading style, TableOfContents element, PageBreak, post-processing script)
- [ ] All tables use `WidthType.PERCENTAGE` for column widths (WPS tblGrid bug; if DXA is unavoidable, set `columnWidths` explicitly)
```js
// ✅ Correct — percentage widths, WPS-safe
new Table({
width: { size: 100, type: WidthType.PERCENTAGE },
rows: [new TableRow({ children: [
new TableCell({ width: { size: 30, type: WidthType.PERCENTAGE }, children: [...] }),
new TableCell({ width: { size: 70, type: WidthType.PERCENTAGE }, children: [...] }),
]})],
});
// ❌ WRONG — DXA widths cause WPS tblGrid mismatch (all gridCol=100)
new TableCell({ width: { size: 3000, type: WidthType.DXA }, ... })
```
---
## Universal Prohibitions
These apply to ALL scenes. Scene files may add scene-specific prohibitions.
1. **No outlines-only** — always produce a complete, finished document
2. **No chat-style output** — the document must not read like a conversation or explanation
3. **No fake TOC / page numbers / headers** — use proper docx-js structures
4. **No excessive blank lines** to pad layout
5. **No dirty formatting** — no stray annotations, template fragments, broken hyperlinks, garbled markers
6. **No sloppy placeholders** — "TBD", "omitted", "略", "to be refined" are forbidden; use proper `【】` placeholders
7. **No fabricated data** — do not invent statistics, citations, legal references, or facts to appear professional
8. **No inconsistent heading/numbering** — one numbering system per document, no level-skipping
9. **No Markdown artifacts** — no `#`, `**`, `-` list markers, `>` blockquotes, and **no Markdown table syntax** (`| col1 | col2 |`, `|---|---|`) in the final docx. Any tabular data MUST be rendered as a proper docx `Table` object — never as plain-text pipe-delimited lines. This applies to ALL scenes including exam paper data tables, report statistics, and academic result tables.
10. **No bullet-list documents** — body text must be proper paragraphs, not endless bullet points
## Letter / Correspondence Format (Universal)
When generating any letter-style document (invitation letter, thank-you letter, cover letter, recommendation letter, English essay in letter format, etc.), the following layout rules apply regardless of scene:
1. **Complimentary close and sender name MUST be right-aligned** — e.g., "Yours sincerely,", "Best regards,", "Yours,", and the sender name below it must use `alignment: AlignmentType.RIGHT`
2. **Date** — if placed at the top of the letter, right-aligned; if at the bottom, right-aligned with the closing
3. **Salutation** ("Dear Mr. Smith," / "Dear Mike,") — left-aligned, followed by a blank line or `spacing.after`
4. **Body paragraphs** — left-aligned (English) or justified (CJK), with appropriate `spacing.after` between paragraphs
```js
// ✅ Correct — closing and sender right-aligned
new Paragraph({ alignment: AlignmentType.RIGHT, spacing: { before: 400 },
children: [new TextRun({ text: "Yours sincerely,", size: 24 })] }),
new Paragraph({ alignment: AlignmentType.RIGHT,
children: [new TextRun({ text: "Li Hua", size: 24 })] }),
// ❌ WRONG — closing left-aligned (default)
new Paragraph({
children: [new TextRun({ text: "Yours sincerely," })] }),
```
## Quality Self-Check (Universal)
→ See **SKILL.md § Post-Generation — Two-Layer Verification** for the complete checklist.
Scene files add scene-specific checks on top of that universal checklist.
## Execution Priority
When rules conflict, follow this precedence (highest first):
1. **User-provided template or explicit instructions** — always override defaults
2. **Scene-specific rules** — override common rules and design-system defaults
3. **Common rules** (this file) — override design-system aesthetic defaults
4. **Design-system defaults** — baseline aesthetics
## Cover Recipes
See `references/design-system.md` for the 7 validated cover recipes (R1R7) and 14 color palettes.
Cover recipe selection: `selectCoverRecipe(docType, industry, titleLength)` — defined in `references/design-system.md` (authoritative source).
---
## Cover Title Layout Rules (Mandatory)
These rules apply to ALL cover recipes (R1R7). They prevent the most common cover quality issues: title overflow, content spilling to page 2, and mid-word line breaks.
### Rule 1: Always use `calcTitleLayout()`
Every cover MUST call `calcTitleLayout(title, availableWidth)` from `design-system.md` to determine:
- **Font size** (dynamically calculated, never hardcoded above 40pt)
- **Line breaks** (semantically split, never mid-word)
**Forbidden:** Passing the full title as a single long TextRun and letting Word auto-wrap. This causes uncontrolled line breaks at arbitrary character positions.
### Rule 2: No single-character orphan lines
If the last line of a title contains only 12 characters, merge it into the previous line. The `splitTitleLines()` function handles this automatically.
### Rule 3: No mid-word breaks for CJK text
Line breaks must occur at semantic boundaries: after particles (e.g., de/yu/he/ji/zhi), punctuation, connectors, spaces, or underscores. Never split a compound term (e.g., a 4-character term like a management specification must not be split into 3+1 characters).
For mixed Chinese+English titles (e.g., "基于Transformer架构的..."), use `estimateTextWidth()` instead of character count for line break calculation. Chinese characters are ~2× wider than English characters at the same font size.
### Rule 4: Maximum 3 title lines on cover
Cover titles must not exceed 3 lines. If the title is too long, reduce font size (down to minimum 24pt) before adding more lines. If it still exceeds 3 lines at 24pt, force 3 lines with longer line lengths.
### Rule 5: Always use `calcCoverSpacing()` for whitespace
Spacing values (`spacing.before`) in cover elements must be dynamically calculated, not hardcoded. Fixed values like `before: 4500` assume a specific title length and will cause overflow with longer titles.
### Rule 6: Cover height budget validation
Before generating, verify that total content height stays within 15638 twips (16838 page height minus 1200 twips safety margin — MS Office renders large fonts taller than calculated). Each recipe in `design-system.md` includes height budget annotations — verify during generation.
### Rule 7: R5 meta info table (academic covers)
Academic cover meta info must use a 2-column table with **percentage widths only** (NOT DXA — WPS breaks with DXA widths):
- **Table width:** adaptive 5575% of page, calculated by `calcR5MetaLayout()` in `design-system.md`. Table is centered via `alignment: CENTER`.
- **Label column:** adaptive 2545% of table width, **LEFT aligned**, plain text label + "". NO full-width space padding, NO right-alignment, NO distributed alignment.
- **Value column:** remaining percentage, **LEFT aligned**, `bottom border single sz=4` = fixed-length underline (same length for all rows regardless of value text length).
- **Label column borders:** none (NO bottom border on label cells).
- ⚠️ Do NOT use DXA widths, full-width space padding (`\u3000`), spacer columns, or tab stops — these render inconsistently between MS Office and WPS.
### Rule 8: Large font paragraphs must set explicit line spacing
When a paragraph uses a font size larger than the document body text (e.g., cover titles at 36pt+), it **MUST** set explicit `spacing.line` to prevent clipping. Without it, the paragraph inherits the document/style default line spacing (often 560 twips for body text), which is smaller than the font height → the top of characters gets clipped.
**Formula:** `spacing.line = Math.ceil(fontPt * 23)` with `lineRule: "atLeast"`
**Example:** A 36pt title needs `spacing: { line: 828, lineRule: "atLeast" }`. Without this, the inherited `line=560` clips the top 160 twips of the text.
This applies to ALL large-font paragraphs (cover titles, chapter headings, decorative text), not just covers.
### Rule 9: Every TextRun on a colored background MUST set explicit `color`
⚠️ **CRITICAL:** When a TextRun is inside a cell/area with a dark or colored background (shading), it **MUST** explicitly set the `color` property. Omitting `color` defaults to black (`#000000`), which is invisible on dark backgrounds.
**Common mistake:** Subtitle or meta text on R1/R2/R4 dark cover blocks without `color` → appears as invisible black text on dark bg.
**Rule:** For any TextRun inside a shaded cell:
- Use `P.cover.titleColor` for title text
- Use `P.cover.subtitleColor` for subtitle text
- Use `P.cover.metaColor` for meta info text
- Use `P.cover.footerColor` for footer text
- **NEVER** rely on default color when background is not white
### Rule 10: Page number API nesting and 3-section numbering
⚠️ **CRITICAL:** Page number settings MUST be nested inside `page.pageNumbers`:
```js
// ❌ WRONG — docx-js ignores top-level pageNumberStart/pageNumberFormatType
properties: { pageNumberStart: 1, pageNumberFormatType: NumberFormat.DECIMAL }
// ✅ CORRECT
properties: { page: { pageNumbers: { start: 1, formatType: NumberFormat.DECIMAL } } }
```
**Standard page numbering (5-zone convention):**
All multi-section documents MUST follow this five-zone page numbering scheme unless the user explicitly requests otherwise.
| Zone | Section | pageNumbers | Footer instrText | Notes |
|------|---------|-------------|-----------------|-------|
| 1. Cover | Title page | None (no footer) | — | Always logical page 1, but number is **hidden** |
| 2. Front matter | Abstract, TOC, Preface | `{ start: 1, formatType: UPPER_ROMAN }` | `PAGE \* ROMAN \* MERGEFORMAT` | Separate Roman numeral sequence (i, ii, iii…) |
| 3. Body | Main content | `{ start: 1, formatType: DECIMAL }` | `PAGE \* arabic \* MERGEFORMAT` | **Resets to 1** |
| 4. Appendix | Appendices (A, B, C…) | Continues body (no reset) | Same as body | No section break needed unless different headers required |
| 5. References | Bibliography | Continues body (no reset) | Same as body | If body ends on p.42, references continue from p.43 |
**Key rules:**
0. **NEVER use "Page X of Y" denominator format.** Footer must show only the current page number (e.g., `1`, `2`, `iii`). Do NOT display total page count. No `Page 3 of 12`, no `3 / 12`, no `第3页/共12页`. Just the bare number. `PageNumber.TOTAL_PAGES` / `NUMPAGES` is **FORBIDDEN** in footers.
1. **Cover is always page 1 internally** but the page number is never displayed. Suppress footer in cover section.
2. **Front matter uses independent Roman numerals** starting at `i`. This sequence is separate from the body.
3. **Body resets to Arabic 1.** The first page of main content is always page `1`.
4. **Appendix and references continue the body sequence.** No reset between body → appendix → references.
5. **Documents without front matter** skip zone 2 (cover hidden, body starts at Arabic 1).
6. **Documents without cover** start body (or front matter) at page 1 directly.
7. **Short documents (≤3 pages):** simple Arabic 1, 2, 3 throughout, no cover/frontmatter distinction.
8. **Single-page documents** (certificates, letters): no page numbering at all.
**3-section docx-js implementation (for documents with TOC):**
At minimum, implement zones 13 as separate docx sections:
```js
// Section 1: Cover — no page number
properties: { page: { /* no pageNumbers */ } }
// No footer children, or empty footer
// Section 2: Front matter — Roman numerals
properties: { page: { pageNumbers: { start: 1, formatType: NumberFormat.UPPER_ROMAN } } }
// Footer: PAGE \* ROMAN \* MERGEFORMAT
// Section 3: Body — Arabic, reset to 1
properties: { page: { pageNumbers: { start: 1, formatType: NumberFormat.DECIMAL } } }
// Footer: PAGE \* arabic \* MERGEFORMAT
// Appendix and References: same section as body (continues numbering)
// Only create a new section if different header/footer content is needed
```
**Post-processing required** (WPS compatibility):
1. Remove empty `<w:pgNumType/>` from cover section XML
2. Patch footer instrText: replace bare `PAGE` with format-specific `PAGE \* ROMAN` or `PAGE \* arabic`
See `toc.md` § Page Number API for full details.

View File

@@ -0,0 +1,538 @@
## Geometric Decoration System — Pure docx-js Decorations
### Design Philosophy
Uses only docx-js native capabilities for visual decoration — no external tools (like Playwright screenshots). Suitable for covers, chapter separators, page background enhancement.
**When to fall back to Playwright?**
Only when gradients, complex illustrations, or brand visuals are needed that pure OOXML cannot express. Default: prefer native solutions below.
### Decoration Element Library
#### 1. Color Strip — Table Simulation
Single-row single-column borderless table + background color to create horizontal color strips.
```js
function colorStrip(color, height = 80) {
return new Table({
width: { size: 100, type: WidthType.PERCENTAGE },
borders: { top: NB, bottom: NB, left: NB, right: NB,
insideHorizontal: NB, insideVertical: NB },
rows: [new TableRow({
height: { value: height, rule: "exact" },
children: [new TableCell({
shading: { type: ShadingType.CLEAR, fill: color.replace("#", "") },
borders: { top: NB, bottom: NB, left: NB, right: NB },
children: [new Paragraph({ children: [] })],
})],
})],
});
}
// ══════════════════════════════════════════════════════════════
// R6 — Editorial Warm (minimal, warm white bg, no decorations)
// ══════════════════════════════════════════════════════════════
// Suitable for: lesson plans (non-STEM), cultural/creative, newsletters,
// event planning, internal reports, light-weight documents
// NOT for: formal business, consulting, finance, government, academic
// Title constraint: single line only (≤20 chars). Longer titles → route to R1.
//
// Structure: 2-row wrapper table (no border, warm bg shading)
// Row 1 (content): category → title → subtitle → fields
// Row 2 (footer): left English title + right label
// All spacing via paragraph indent (WPS safe, no cell margins).
function buildCoverR6(config) {
const P = config.palette;
const PAD_L = 1300, PAD_R = 1100;
const ind = { left: PAD_L, right: PAD_R };
const FOOTER_H = 900;
const CONTENT_H = 16838 - FOOTER_H;
const shading = { fill: P.bg || "F7F7F5", type: ShadingType.CLEAR };
// ⚠️ R6 uses a simplified title layout: prefer single line, shrink font to fit
const availW = 11906 - PAD_L - PAD_R;
const { titlePt, titleLines } = calcTitleLayoutR6(config.title, availW, 36, 22);
const titleSize = titlePt * 2;
const lineH = Math.ceil(titlePt * 23 * 1.3);
// Dynamic top spacing
const titleH = titleLines.length * (titleSize * 10 + 200);
const categoryH = 22 * 10 + 900;
const subtitleH = config.subtitle ? (28 * 10 + 1200) : 0;
const fieldsH = (config.metaLines || []).length * (24 * 10 + 100);
const contentH = categoryH + titleH + subtitleH + fieldsH;
const remaining = Math.max(CONTENT_H - 1200 - contentH, 400);
const topSpacing = Math.floor(remaining * 0.55);
const children = [];
// 1. Top spacer (dynamic)
children.push(new Paragraph({ indent: ind, spacing: { before: topSpacing } }));
// 2. Category label (small, wide letter-spacing)
if (config.englishLabel) {
children.push(new Paragraph({
indent: ind, spacing: { after: 900 },
children: [new TextRun({
text: config.englishLabel, size: 22,
color: P.cover.metaColor || "9A9A9A",
font: { ascii: "Calibri", eastAsia: "Microsoft YaHei" },
characterSpacing: 60,
})],
}));
}
// 3. Title (single line preferred, dynamic font size)
for (let i = 0; i < titleLines.length; i++) {
children.push(new Paragraph({
indent: ind,
spacing: { after: i < titleLines.length - 1 ? 60 : 300, line: lineH, lineRule: "atLeast" },
children: [new TextRun({
text: titleLines[i], size: titleSize,
color: P.cover.titleColor || "2C2C2C",
font: { ascii: "Calibri", eastAsia: "Microsoft YaHei" },
characterSpacing: 30,
})],
}));
}
// 4. Subtitle
if (config.subtitle) {
children.push(new Paragraph({
indent: ind, spacing: { after: 1200 },
children: [new TextRun({
text: config.subtitle, size: 28,
color: P.cover.subtitleColor || "6B6B6B",
font: { ascii: "Calibri", eastAsia: "Microsoft YaHei" },
characterSpacing: 15,
})],
}));
}
// 5. Meta fields (tab-aligned label + value)
for (const line of (config.metaLines || [])) {
// Expect "labelvalue" format or plain text
const sep = line.indexOf("") !== -1 ? "" : (line.indexOf(":") !== -1 ? ":" : null);
const label = sep ? line.split(sep)[0].trim() : line;
const value = sep ? line.split(sep).slice(1).join(sep).trim() : "";
children.push(new Paragraph({
indent: ind, spacing: { after: 100 },
tabStops: [{ type: TabStopType.LEFT, position: PAD_L + 1600 }],
children: [
new TextRun({ text: label, size: 22, color: P.cover.metaColor || "9A9A9A",
font: { ascii: "Calibri", eastAsia: "Microsoft YaHei" }, characterSpacing: 20 }),
...(value ? [
new TextRun({ text: "\t" }),
new TextRun({ text: value, size: 24, color: P.cover.subtitleColor || "6B6B6B",
font: { ascii: "Calibri", eastAsia: "Microsoft YaHei" }, characterSpacing: 8 }),
] : []),
],
}));
}
// 6. Footer (2-column borderless table)
const footerLeft = config.footerLeft || "";
const footerRight = config.footerRight || "";
// Adaptive font size for long English footer text
const flSize = footerLeft.length > 60 ? 14 : (footerLeft.length > 40 ? 16 : 18);
const flSpacing = footerLeft.length > 60 ? 5 : (footerLeft.length > 40 ? 10 : 20);
const footerTable = new Table({
width: { size: 100, type: WidthType.PERCENTAGE },
layout: TableLayoutType.FIXED, borders: allNoBorders,
rows: [new TableRow({
children: [
new TableCell({
width: { size: 70, type: WidthType.PERCENTAGE }, borders: noBorders, shading,
children: [new Paragraph({
indent: { left: PAD_L },
children: [new TextRun({ text: footerLeft, size: flSize,
color: P.cover.footerColor || "9A9A9A",
font: { ascii: "Calibri" }, characterSpacing: flSpacing })],
})],
}),
new TableCell({
width: { size: 30, type: WidthType.PERCENTAGE }, borders: noBorders, shading,
children: [new Paragraph({
alignment: AlignmentType.RIGHT, indent: { right: PAD_R },
children: [new TextRun({ text: footerRight, size: 18,
color: P.cover.footerColor || "9A9A9A",
font: { ascii: "Calibri" }, characterSpacing: 20 })],
})],
}),
],
})],
});
// 7. 2-row wrapper (content + footer)
return [new Table({
width: { size: 100, type: WidthType.PERCENTAGE },
layout: TableLayoutType.FIXED, borders: allNoBorders,
rows: [
new TableRow({
height: { value: CONTENT_H, rule: "exact" },
children: [new TableCell({
shading, borders: noBorders,
margins: { top: 0, bottom: 0, left: 0, right: 0 },
verticalAlign: VerticalAlign.TOP,
children,
})],
}),
new TableRow({
height: { value: FOOTER_H, rule: "exact" },
children: [new TableCell({
shading, borders: noBorders,
margins: { top: 0, bottom: 0, left: 0, right: 0 },
verticalAlign: VerticalAlign.CENTER,
children: [footerTable],
})],
}),
],
})];
}
// R6 title layout: prefer FEWER lines over larger font size (single line best)
function calcTitleLayoutR6(title, availableWidthTw, preferredPt, minPt) {
const step = 2;
// Try to fit in 1 line (shrink font if needed)
for (let pt = preferredPt; pt >= minPt; pt -= step) {
const charWidthTw = pt * 23 * 0.5; // CJK ~50% em width
const charsPerLine = Math.floor(availableWidthTw / charWidthTw);
if (title.length <= charsPerLine) return { titlePt: pt, titleLines: [title] };
}
// Can't fit in 1 line, try 2 lines at largest possible font
for (let pt = preferredPt; pt >= minPt; pt -= step) {
const charWidthTw = pt * 23 * 0.5;
const charsPerLine = Math.floor(availableWidthTw / charWidthTw);
const lines = splitTitleLines(title, charsPerLine);
if (lines.length <= 2) return { titlePt: pt, titleLines: lines };
}
// Fallback: minPt, up to 3 lines
const charWidthTw = minPt * 23 * 0.5;
const charsPerLine = Math.floor(availableWidthTw / charWidthTw);
return { titlePt: minPt, titleLines: splitTitleLines(title, charsPerLine) };
}
// Usage: cover top decoration
// children: [colorStrip(P.accent, 120), ...]
```
#### 2. Side Ribbon
Uses left border to create vertical ribbon effect.
```js
function sideRibbon(content, color, width = 14) {
return new Paragraph({
border: {
left: { style: BorderStyle.SINGLE, size: width, color: color.replace("#", ""), space: 12 },
},
indent: { left: 240 },
spacing: { before: 100, after: 100 },
children: content,
});
}
// Usage: emphasis quotes, chapter tips
// sideRibbon([new TextRun({ text: "Key Insight", bold: true })], P.accent)
```
#### 3. Border Compositions
```js
// Top thick line + bottom thin line — title area frame
function frameTitle(titleRuns) {
return new Paragraph({
border: {
top: { style: BorderStyle.SINGLE, size: 18, color: c(P.accent) },
bottom: { style: BorderStyle.SINGLE, size: 4, color: c(P.accent) },
},
spacing: { before: 400, after: 200 },
alignment: AlignmentType.CENTER,
children: titleRuns,
});
}
// L-shape border — left + bottom
function lShapeBorder(content) {
return new Paragraph({
border: {
left: { style: BorderStyle.SINGLE, size: 12, color: c(P.accent), space: 10 },
bottom: { style: BorderStyle.SINGLE, size: 12, color: c(P.accent) },
},
indent: { left: 300 },
spacing: { before: 200, after: 300 },
children: content,
});
}
// Double-line frame — top and bottom double lines
function doubleLine(content) {
return new Paragraph({
border: {
top: { style: BorderStyle.DOUBLE, size: 6, color: c(P.accent) },
bottom: { style: BorderStyle.DOUBLE, size: 6, color: c(P.accent) },
},
spacing: { before: 200, after: 200 },
alignment: AlignmentType.CENTER,
children: content,
});
}
```
#### 4. Gradient Simulation
Multiple narrow color strips to simulate gradient effect.
```js
function gradientStrip(startColor, endColor, steps = 5, totalHeight = 200) {
const rows = [];
const h = Math.floor(totalHeight / steps);
for (let i = 0; i < steps; i++) {
const ratio = i / (steps - 1);
const blended = blendColors(startColor, endColor, ratio);
rows.push(new TableRow({
height: { value: h, rule: "exact" },
children: [new TableCell({
shading: { type: ShadingType.CLEAR, fill: blended },
borders: { top: NB, bottom: NB, left: NB, right: NB },
children: [new Paragraph({ children: [] })],
})],
}));
}
return new Table({
width: { size: 100, type: WidthType.PERCENTAGE },
borders: { top: NB, bottom: NB, left: NB, right: NB,
insideHorizontal: NB, insideVertical: NB },
rows,
});
}
function blendColors(hex1, hex2, ratio) {
const r1 = parseInt(hex1.slice(1, 3), 16), g1 = parseInt(hex1.slice(3, 5), 16), b1 = parseInt(hex1.slice(5, 7), 16);
const r2 = parseInt(hex2.slice(1, 3), 16), g2 = parseInt(hex2.slice(3, 5), 16), b2 = parseInt(hex2.slice(5, 7), 16);
const r = Math.round(r1 + (r2 - r1) * ratio), g = Math.round(g1 + (g2 - g1) * ratio), b = Math.round(b1 + (b2 - b1) * ratio);
return `${r.toString(16).padStart(2,"0")}${g.toString(16).padStart(2,"0")}${b.toString(16).padStart(2,"0")}`;
}
```
#### 5. Symbol Ornaments
```js
// Section divider line — for chapter separation
function ornamentDivider(symbol = "◆", count = 3) {
const ornament = Array(count).fill(symbol).join(" ");
return new Paragraph({
alignment: AlignmentType.CENTER,
spacing: { before: 400, after: 400 },
children: [new TextRun({ text: ornament, size: 20, color: c(P.accent) })],
});
}
// Common decoration symbols
// ◆ ◇ ● ○ ★ ☆ ■ □ ▲ △ ─ ━ ═ ║ ╔ ╗ ╚ ╝
// Ornamental: ❧ ❦ ✦ ✧ ✿ ❀ ❁ ※
```
#### 6. Info Card — Table Implementation
```js
function infoCard(title, items, accentColor) {
const ac = accentColor.replace("#", "");
const headerRow = new TableRow({
children: [new TableCell({
columnSpan: 2,
shading: { type: ShadingType.CLEAR, fill: ac },
margins: { top: 80, bottom: 80, left: 160, right: 160 },
borders: { top: NB, bottom: NB, left: NB, right: NB },
children: [new Paragraph({
children: [new TextRun({ text: title, bold: true, size: 24, color: "FFFFFF" })],
})],
})],
});
const dataRows = items.map(([label, value]) => new TableRow({
children: [
new TableCell({
width: { size: 30, type: WidthType.PERCENTAGE },
margins: { top: 60, bottom: 60, left: 160, right: 80 },
shading: { type: ShadingType.CLEAR, fill: "F8F9FA" },
borders: { bottom: { style: BorderStyle.SINGLE, size: 1, color: "E0E0E0" },
top: NB, left: NB, right: NB },
children: [new Paragraph({ children: [new TextRun({ text: label, size: 21, color: "666666" })] })],
}),
new TableCell({
margins: { top: 60, bottom: 60, left: 80, right: 160 },
borders: { bottom: { style: BorderStyle.SINGLE, size: 1, color: "E0E0E0" },
top: NB, left: NB, right: NB },
children: [new Paragraph({ children: [new TextRun({ text: value, size: 21 })] })],
}),
],
}));
return new Table({
width: { size: 80, type: WidthType.PERCENTAGE },
alignment: AlignmentType.CENTER,
borders: { top: NB, bottom: NB, left: NB, right: NB,
insideHorizontal: NB, insideVertical: NB },
rows: [headerRow, ...dataRows],
});
}
```
// R7 — Swiss Tech Minimalist (slate grey bg, Klein blue accent, asymmetric layout)
// Suitable for: cultural/creative research, trend reports, brand strategy, design deliverables
// Palette: ST-1 (exclusive)
// Layout: left-aligned title (upper 20%), right-shifted subtitle with top rule,
// right-aligned info block with accent right border, Swiss cross anchor
// Key features: ■ square accent dot, open-frame tables, large whitespace
//
// ⚠️ MANDATORY: All cover non-negotiables apply (margin=0, 16838 exact, allNoBorders)
// ⚠️ Title uses calcTitleLayout() with maxPt=36 (not 40 — R7 uses lighter visual weight)
function buildCoverR7(config) {
const P = palettes[config.palette || "ST-1"];
const C = P.cover;
const padL = 600;
// Title layout — R7 uses 36pt max (lighter than R1-R4's 40pt)
const availW = 11906 - padL - 600;
const { titlePt, titleLines } = calcTitleLayout(config.title, availW, 36, 24);
const titleSize = titlePt * 2;
const lineH = Math.ceil(titlePt * 23);
// Dynamic spacing based on title lines
const topSpacer = titleLines.length <= 2 ? 1200 : 800;
const subtitleSpacer = titleLines.length <= 2 ? 1400 : 800;
const infoSpacer = titleLines.length <= 2 ? 2200 : 1200;
const children = [];
// 1. Swiss cross anchor — top-left decorative element
children.push(new Paragraph({
spacing: { before: 600 },
indent: { left: padL },
children: [new TextRun({
text: "\uFF0B", // fullwidth plus
size: 40, bold: true, color: C.titleColor,
font: { ascii: "Arial", eastAsia: "SimHei" },
})],
}));
// 2. Top spacer
children.push(new Paragraph({ spacing: { before: topSpacer } }));
// 3. Title lines — left-aligned, last line has accent ■
titleLines.forEach((line, i) => {
const isLast = i === titleLines.length - 1;
const runs = [new TextRun({
text: line, size: titleSize, color: C.titleColor,
font: { ascii: "Arial", eastAsia: "Noto Sans SC" },
})];
if (isLast) {
runs.push(new TextRun({
text: " \u25A0", // ■ black square
size: 24, color: P.accent,
font: { ascii: "Arial" },
}));
}
children.push(new Paragraph({
indent: { left: padL },
spacing: { after: isLast ? 200 : 80, line: lineH, lineRule: "atLeast" },
children: runs,
}));
});
// 4. Subtitle spacer
children.push(new Paragraph({ spacing: { before: subtitleSpacer } }));
// 5. Subtitle — right-shifted, top border rule, wide character spacing
if (config.subtitle) {
children.push(new Paragraph({
indent: { left: 3800, right: 600 },
border: { top: { style: BorderStyle.SINGLE, size: 2, color: C.titleColor, space: 14 } },
spacing: { after: 200 },
children: [new TextRun({
text: config.subtitle, size: 26, color: C.subtitleColor,
font: { ascii: "Arial", eastAsia: "Noto Sans SC" },
characterSpacing: 40,
})],
}));
}
// 6. Decorative horizontal line
children.push(new Paragraph({
spacing: { before: 600 },
border: { bottom: { style: BorderStyle.SINGLE, size: 1, color: "C8D0DC", space: 0 } },
}));
// 7. Info spacer
children.push(new Paragraph({ spacing: { before: infoSpacer } }));
// 8. Info footer — right-aligned, 4 label+value pairs, accent right border
// Standard fields: ORGANIZATION, RESPONSIBILITY, REPORT NUMBER, DATE & EDITION
const metaEntries = config.metaEntries || [
{ label: "ORGANIZATION", value: config.organization || "" },
{ label: "RESPONSIBILITY", value: config.responsibility || "" },
{ label: "REPORT NUMBER", value: config.reportNumber || "" },
{ label: "DATE & EDITION", value: config.dateEdition || "" },
];
for (const entry of metaEntries) {
// Label — 7pt uppercase English
children.push(new Paragraph({
alignment: AlignmentType.RIGHT,
indent: { right: 800 },
border: { right: { style: BorderStyle.SINGLE, size: 12, color: P.accent, space: 16 } },
spacing: { after: 20 },
children: [new TextRun({
text: entry.label, size: 14, color: C.metaColor,
font: { ascii: "Arial" },
characterSpacing: 20,
})],
}));
// Value — 11pt bold
children.push(new Paragraph({
alignment: AlignmentType.RIGHT,
indent: { right: 800 },
border: { right: { style: BorderStyle.SINGLE, size: 12, color: P.accent, space: 16 } },
spacing: { after: 280 },
children: [new TextRun({
text: entry.value, size: 22, bold: true, color: C.titleColor,
font: { ascii: "Arial", eastAsia: "Noto Sans SC" },
})],
}));
}
// Wrap in 16838 exact wrapper table
return [new Table({
width: { size: 100, type: WidthType.PERCENTAGE },
layout: TableLayoutType.FIXED,
borders: allNoBorders,
rows: [new TableRow({
height: { value: 16838, rule: "exact" },
children: [new TableCell({
shading: { type: ShadingType.CLEAR, fill: P.bg },
borders: noBorders,
verticalAlign: VerticalAlign.TOP,
children,
})],
})],
})];
}
### Decoration Usage Scenarios
| Scenario | Recommended Decoration | Combination |
|------|----------|----------|
| Report cover | Color strip + L-frame border | Top strip → Title area → L-frame author info |
| Proposal cover | Gradient simulation + double-line frame | Gradient bg → Double-line title |
| Chapter separator | Symbol ornament + side ribbon | Symbol divider → New chapter title with ribbon |
| Summary card | Info card | Standalone card displaying key metrics |
| Academic cover | Color strip + info table | Top strip → School name → Title → Info table |
---

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,257 @@
# docx-js Advanced Features
Advanced API for complex document scenarios. Load this when creating documents with TOC, cover pages, footnotes, multi-section layouts, or post-processing needs.
## Table of Contents (TOC)
**→ See `references/toc.md` for the complete TOC reference** (3-step process, code examples, page numbering, common bugs, checklist).
## Cover Page Design (Vertical Centering)
Use large `spacing.before` to push content down for visual centering:
```js
// Approximate vertical center on A4:
// Total printable height ≈ 14000 twips
// For title at ~40% from top: before = 5600
const coverSection = {
properties: {
page: { /* standard A4 */ },
// No headers/footers on cover page
},
children: [
new Paragraph({ spacing: { before: 5600 } }), // spacer
new Paragraph({
alignment: AlignmentType.CENTER,
children: [new TextRun({
text: title,
font: { ascii: "Calibri", eastAsia: "SimHei" },
size: 52, bold: true, color: palette.primary,
})],
}),
// ... subtitle, author, date
],
};
```
For multi-section documents, put the cover in its own section so it can have different headers/footers.
## Footnotes
```js
const { FootnoteReferenceRun, Footnote } = require("docx");
const doc = new Document({
footnotes: {
1: { children: [new Paragraph({ children: [new TextRun({ text: "Smith, J. (2024). Research Methods. Academic Press, pp. 45-67.", size: 18 })] })] },
2: { children: [new Paragraph({ children: [new TextRun({ text: "Zhang, W. (2023). \u201c数据分析方法研究\u201d. 科学通报, 68(12), 1234-1250.", size: 18 })] })] },
},
sections: [{
children: [
new Paragraph({
children: [
new TextRun({ text: "According to recent studies" }),
new FootnoteReferenceRun(1), // superscript [1]
new TextRun({ text: ", data analysis methods have evolved" }),
new FootnoteReferenceRun(2), // superscript [2]
new TextRun({ text: "." }),
],
}),
],
}],
});
```
### Academic Reference Pattern
For sequential references [1][2][3]..., pre-define all footnotes in the `footnotes` object with numeric keys, then reference them inline with `FootnoteReferenceRun(n)`.
## keepNext — Element Binding
Prevent page breaks between related elements:
```js
// Heading stays with next paragraph
new Paragraph({
heading: HeadingLevel.HEADING_2,
keepNext: true, // don't break after this
children: [new TextRun({ text: "Table 1: Results" })],
})
// Table immediately follows on same page
// Caption stays with image
new Paragraph({
keepNext: true,
alignment: AlignmentType.CENTER,
children: [new TextRun({ text: "Figure 1: Architecture Diagram", italics: true, size: 20 })],
})
// ImageRun paragraph follows
```
Use `keepNext: true` for:
- Heading → first paragraph of section
- Table caption → table
- Image → image caption
- "Figure X" label → image
## Page Break Rules
Follow the document type strategy defined in SOUL.md Rule 1.
**Structural breaks (always):**
- Cover page → TOC
- TOC → main content
- Main content → back cover
**Content breaks (by document type):**
- Academic / teaching → `new Paragraph({ children: [new PageBreak()] })` before each H1 chapter
- Business report → PageBreak before each H1; H2 flows naturally
- Resume / contract / letter → No content page breaks
- Short article → No content page breaks
**Anti-tear (mandatory):**
```js
// Heading stays with next paragraph
new Paragraph({
heading: HeadingLevel.HEADING_1,
keepNext: true,
children: [new TextRun("Chapter Title")],
})
// Table caption stays with table
new Paragraph({
keepNext: true,
children: [new TextRun({ text: "Table 1: Summary", italics: true })],
})
// Image caption stays with image
new Paragraph({
keepNext: true,
children: [new TextRun({ text: "Figure 1: Architecture", italics: true })],
})
```
**Never:**
- PageBreak inside tables
- PageBreak as standalone element (must be inside Paragraph)
- PageBreak at the END of the last section (causes blank page)
```js
// Correct: page break between cover and TOC
new Paragraph({ children: [new PageBreak()] })
```
## Quotes Escaping in JS Strings
**⚠️⚠️⚠️ CRITICAL — #1 MOST COMMON BUG ⚠️⚠️⚠️**
Bare Chinese curly quotation marks (`""` `''`) in JS string literals **WILL break syntax and crash document generation**. This bug occurs most often in **Chinese body text** where curly quotes are used for emphasis, proper nouns, event names, or quoted speech — e.g., `"双11"`, `"前低后高"`, `"618"大促`. **Every single occurrence** of `""''` in text content MUST be Unicode-escaped. No exceptions.
**MANDATORY RULE: Before writing ANY `TextRun`, `para()`, or string containing Chinese text, scan the text for `""''` characters and replace ALL of them with `\u201c \u201d \u2018 \u2019`.**
| Character | Unicode | Escape method |
|-----------|---------|---------------|
| `"` `"` | `\u201c` `\u201d` | Unicode escape `\u201c` `\u201d` |
| `'` `'` | `\u2018` `\u2019` | Unicode escape `\u2018` `\u2019` |
| `"` | U+0022 | `\"` or wrap string in single quotes / template literal |
| `'` | U+0027 | `\'` or wrap string in double quotes / template literal |
```js
// ❌ WRONG — curly quotes in Chinese text break JS syntax (VERY COMMON MISTAKE)
content.push(para("2025年四个季度行业增速呈现"前低后高"的态势。在"618"大促、"双11""双12"活动拉动下增长显著。"));
new TextRun({ text: "他说"你好"" })
new TextRun({ text: 'It's a test' })
// ✅ CORRECT — ALL curly quotes replaced with Unicode escapes
content.push(para("2025年四个季度行业增速呈现\u201c前低后高\u201d的态势。在\u201c618\u201d大促、\u201c双11\u201d\u201c双12\u201d活动拉动下增长显著。"));
new TextRun({ text: "他说\u201c你好\u201d" })
new TextRun({ text: "It\u2019s a test" })
// ✅ CORRECT — straight quotes escaped or use alternate delimiters
new TextRun({ text: "He said \"hello\"" })
new TextRun({ text: 'He said "hello"' })
new TextRun({ text: `He said "hello"` })
```
## Multi-Section Documents
Different headers/footers per section:
```js
const doc = new Document({
sections: [
{
// Section 1: Cover — no header/footer
properties: { page: { /* ... */ } },
children: coverChildren,
},
{
// Section 2: Front matter — Roman page numbers
properties: {
type: SectionType.NEXT_PAGE,
page: {
/* size, margin... */
pageNumbers: { start: 1, formatType: NumberFormat.UPPER_ROMAN },
},
},
headers: { default: new Header({ children: [] }) },
footers: {
default: new Footer({
children: [new Paragraph({
alignment: AlignmentType.CENTER,
children: [new TextRun({ children: [PageNumber.CURRENT], size: 18 })],
})],
}),
},
children: tocAndAbstract,
},
{
// Section 3: Main content — Arabic page numbers
properties: {
type: SectionType.NEXT_PAGE,
page: {
/* size, margin... */
pageNumbers: { start: 1, formatType: NumberFormat.DECIMAL },
},
},
headers: {
default: new Header({
children: [new Paragraph({
alignment: AlignmentType.CENTER,
children: [new TextRun({ text: docTitle, size: 18, color: "888888" })],
})],
}),
},
footers: { default: footerWithPageNumbers },
children: mainContent,
},
],
});
```
## Converting DOCX to PDF
```bash
# Using LibreOffice (headless)
libreoffice --headless --convert-to pdf output.docx
# ⚠️ TOC Rule: If document has TOC, warn user that:
# 1. LibreOffice conversion may show empty TOC
# 2. User should open in Word first, update fields (Ctrl+A → F9), save, then convert
# 3. Or use Word's "Save as PDF" for best results
```
## Converting DOCX to Images
```bash
# Step 1: Convert to PDF
libreoffice --headless --convert-to pdf output.docx
# Step 2: Convert PDF to images
pdftoppm -png -r 200 output.pdf output_page
# This generates output_page-1.png, output_page-2.png, etc.
# Use -r 200 for good quality (200 DPI)
```
Useful for generating preview thumbnails or when user needs images instead of document files.

View File

@@ -0,0 +1,333 @@
# docx-js API Reference
Complete API for creating .docx documents with the `docx` npm package. For advanced features (TOC details, footnotes, PDF conversion), see `docx-js-advanced.md`.
## Setup
```js
const {
Document, Packer, Paragraph, TextRun, Table, TableRow, TableCell,
ImageRun, PageBreak, Header, Footer, PageNumber, NumberFormat,
AlignmentType, HeadingLevel, WidthType, BorderStyle, ShadingType,
PageOrientation, TabStopType, TabStopPosition, ExternalHyperlink,
InternalHyperlink, Bookmark, LevelFormat, TableOfContents,
} = require("docx");
const fs = require("fs");
```
## Document Creation + Export
```js
const doc = new Document({
styles: { /* see Styles section */ },
numbering: { config: [ /* see Lists section */ ] },
sections: [{
properties: {
page: {
size: { width: 11906, height: 16838 },
margin: { top: 1417, bottom: 1417, left: 1701, right: 1417 },
},
},
headers: { default: new Header({ children: [/* */] }) },
footers: { default: new Footer({ children: [/* */] }) },
children: [ /* Paragraphs, Tables, etc. */ ],
}],
});
const buffer = await Packer.toBuffer(doc);
fs.writeFileSync("output.docx", buffer);
```
## Paragraph + TextRun
```js
new Paragraph({
heading: HeadingLevel.HEADING_1, // or HEADING_2, HEADING_3
alignment: AlignmentType.JUSTIFIED,
spacing: { before: 240, after: 120, line: 312 }, // 1.3x mandatory
indent: { firstLine: 480 }, // 2-char CJK indent (480 SimSun / 420 YaHei)
children: [
new TextRun({
text: "Hello",
bold: true,
italics: true,
size: 24, // 12pt = Xiao Si
font: { ascii: "Calibri", eastAsia: "Microsoft YaHei" },
color: "000000", // Pure black for Profile A; for Profile B use palette.body
}),
],
});
// Additional text formatting options
new TextRun({ text: "Underlined", underline: { type: UnderlineType.SINGLE } })
new TextRun({ text: "Highlighted", highlight: "yellow" })
new TextRun({ text: "Strikethrough", strike: true })
new TextRun({ text: "x²", superScript: true })
new TextRun({ text: "H₂O", subScript: true })
new SymbolRun({ char: "2022", font: "Symbol" }) // Bullet •
```
## Table
**⚠️ CRITICAL**: Always set `margins` on TableCell (or at Table level for global default). Without margins, text touches borders.
**⚠️ CRITICAL**: Use `ShadingType.CLEAR` — never `ShadingType.SOLID` (causes black cells).
**⚠️ CRITICAL — Table Cross-Page Control**:
- Header row MUST set `tableHeader: true` (auto-repeat header on page break)
- All rows MUST set `cantSplit: true` (prevent row content split across pages)
- Title paragraph before table MUST set `keepNext: true` (keep title with table)
```js
// ⚠️ Title before table — keepNext keeps title with table
new Paragraph({
keepNext: true, // ← critical
children: [new TextRun({ text: "Table 1 Feature Comparison", bold: true, size: 21 })],
}),
new Table({
width: { size: 100, type: WidthType.PERCENTAGE },
borders: {
top: { style: BorderStyle.SINGLE, size: 2, color: "9AA6B2" },
bottom: { style: BorderStyle.SINGLE, size: 2, color: "9AA6B2" },
left: { style: BorderStyle.NONE },
right: { style: BorderStyle.NONE },
insideHorizontal: { style: BorderStyle.SINGLE, size: 1, color: "D0D0D0" },
insideVertical: { style: BorderStyle.NONE },
},
rows: [
// ⚠️ Header row — tableHeader + cantSplit
new TableRow({
tableHeader: true, // auto-repeat on page break
cantSplit: true, // prevent row split
children: ["Header 1", "Header 2"].map(text =>
new TableCell({
children: [new Paragraph({ children: [new TextRun({ text, bold: true, size: 21 })] })],
shading: { type: ShadingType.CLEAR, fill: "F1F5F9" },
margins: { top: 60, bottom: 60, left: 120, right: 120 },
width: { size: 50, type: WidthType.PERCENTAGE },
})
),
}),
// ⚠️ Data rows — cantSplit
new TableRow({
cantSplit: true, // prevent row split
children: ["Data 1", "Data 2"].map(text =>
new TableCell({
children: [new Paragraph({ children: [new TextRun({ text, size: 21 })] })],
margins: { top: 60, bottom: 60, left: 120, right: 120 },
width: { size: 50, type: WidthType.PERCENTAGE },
})
),
}),
],
});
```
### Column Widths
```js
// Fixed widths (twips)
width: { size: 3000, type: WidthType.DXA }
// Percentage
width: { size: 50, type: WidthType.PERCENTAGE }
```
## ImageRun
**⚠️ CRITICAL**: Always include `type` parameter. Always preserve aspect ratio.
```js
const imageBuffer = fs.readFileSync("chart.png");
// Calculate dimensions preserving aspect ratio
const displayWidth = 500;
const aspectRatio = originalHeight / originalWidth;
const displayHeight = Math.round(displayWidth * aspectRatio);
new Paragraph({
alignment: AlignmentType.CENTER,
children: [
new ImageRun({
data: imageBuffer,
transformation: { width: displayWidth, height: displayHeight },
type: "png", // REQUIRED: "png", "jpg", "gif", "bmp"
}),
],
});
```
## PageBreak
**⚠️ CRITICAL**: PageBreak MUST be inside a Paragraph. Standalone PageBreak crashes Word.
**⚠️ Best Practice**: Attach PageBreak to the end of a **paragraph with text content**. Avoid empty paragraph + PageBreak (may cause blank pages). If using multi-section structure, prefer section breaks over PageBreak.
```js
// ✅ Recommended — PageBreak attached to content paragraph
new Paragraph({
children: [
new TextRun({ text: "End of section" }),
new PageBreak()
]
})
// ✅ Acceptable — but prefer section breaks
new Paragraph({ children: [new PageBreak()] })
// ✅ Best — use section breaks instead of PageBreak
// Place content in different sections — auto page break
```
## Headers & Footers + Page Numbers
```js
headers: {
default: new Header({
children: [
new Paragraph({
alignment: AlignmentType.CENTER,
children: [new TextRun({ text: "Document Title", size: 18, color: "888888" })],
}),
],
}),
},
footers: {
default: new Footer({
children: [
new Paragraph({
alignment: AlignmentType.CENTER,
children: [
new TextRun({ children: [PageNumber.CURRENT], size: 18 }),
],
}),
],
}),
},
```
> ⚠️ **Denominator FORBIDDEN** — never use `PageNumber.TOTAL_PAGES` or "X / Y" format. Show only current page number.
## Styles Definition
The example below is for **Chinese documents** (default). For **English documents**, replace `font` with `"Times New Roman"` throughout.
```js
styles: {
default: {
document: {
run: {
font: { ascii: "Calibri", eastAsia: "Microsoft YaHei" },
size: 24, color: "000000", // Pure black for Profile A; for Profile B use palette.body
},
paragraph: {
spacing: { line: 312 }, // 1.3x mandatory
},
},
heading1: {
run: { font: { ascii: "Calibri", eastAsia: "SimHei" }, size: 32, bold: true, color: "0B1220" },
paragraph: { spacing: { before: 360, after: 160, line: 312 } },
},
heading2: {
run: { font: { ascii: "Calibri", eastAsia: "SimHei" }, size: 28, bold: true, color: "0B1220" },
paragraph: { spacing: { before: 240, after: 120, line: 312 } },
},
heading3: {
run: { font: { ascii: "Calibri", eastAsia: "SimHei" }, size: 24, bold: true, color: "0B1220" },
paragraph: { spacing: { before: 200, after: 100, line: 312 } },
},
},
}
```
## Lists
**⚠️ CRITICAL**: Each separate numbered list MUST use a unique `reference` name. Reusing the same reference causes numbering to continue instead of restarting.
```js
// In Document numbering config
numbering: {
config: [
{
reference: "list-features", // unique name!
levels: [{
level: 0,
format: LevelFormat.DECIMAL,
text: "%1.",
alignment: AlignmentType.LEFT,
style: { paragraph: { indent: { left: 720, hanging: 360 } } },
}],
},
{
reference: "list-benefits", // different name for second list!
levels: [{ /* same config */ }],
},
],
},
// Usage in paragraphs
new Paragraph({
numbering: { reference: "list-features", level: 0 },
children: [new TextRun({ text: "First item" })],
})
```
### Bullet Lists
```js
new Paragraph({
bullet: { level: 0 },
children: [new TextRun({ text: "Bullet item" })],
})
```
## Hyperlinks
### External Link
```js
new ExternalHyperlink({
children: [new TextRun({ text: "Click here", style: "Hyperlink" })],
link: "https://example.com",
})
```
### Internal Link (Bookmark)
```js
// Define bookmark at target
new Paragraph({
children: [
new Bookmark({ id: "section1", children: [new TextRun("Section 1")] }),
],
})
// Link to bookmark
new InternalHyperlink({
children: [new TextRun({ text: "Go to Section 1", style: "Hyperlink" })],
anchor: "section1",
})
```
## Table of Contents (TOC)
**→ See `references/toc.md` for the complete TOC reference.**
Quick reminder: (1) Add `TableOfContents` element + PageBreak, (2) Run `python3 "$DOCX_SCRIPTS/add_toc_placeholders.py" output.docx --auto`, (3) Check exit code.
## Tabs
```js
new Paragraph({
tabStops: [
{ type: TabStopType.RIGHT, position: TabStopPosition.MAX },
],
children: [new TextRun("Left"), new TextRun("\t"), new TextRun("Right")]
})
```
## Constants Quick Reference
- **Underlines:** `SINGLE`, `DOUBLE`, `WAVY`, `DASH`
- **Borders:** `SINGLE`, `DOUBLE`, `DASHED`, `DOTTED`
- **Numbering:** `DECIMAL` (1,2,3), `UPPER_ROMAN` (I,II,III), `LOWER_LETTER` (a,b,c)
- **Symbols:** `"2022"` (•), `"00A9"` (©), `"00AE"` (®), `"2122"` (™)

323
skills/docx/references/faq.md Executable file
View File

@@ -0,0 +1,323 @@
# FAQ — Common Bugs and Fixes
## Bug: Table text touching cell borders
**Symptom**: Text is cramped against table cell edges, no padding.
**Fix**: Set `margins` at the TableCell level:
```js
new TableCell({
margins: { top: 60, bottom: 60, left: 120, right: 120 },
children: [/* ... */],
})
```
---
## Bug: Numbered list doesn't restart
**Symptom**: Second numbered list continues from where the first left off (e.g., starts at 4 instead of 1).
**Fix**: Each separate numbered list MUST use a unique `reference` name in numbering config:
```js
numbering: { config: [
{ reference: "list-A", levels: [{ level: 0, format: LevelFormat.DECIMAL, text: "%1." }] },
{ reference: "list-B", levels: [{ level: 0, format: LevelFormat.DECIMAL, text: "%1." }] },
]}
```
---
## Bug: Cover and content on same page
**Symptom**: Cover page content flows directly into main content without page break.
**Fix**: Add a PageBreak paragraph at the end of cover content:
```js
coverChildren.push(new Paragraph({ children: [new PageBreak()] }));
```
---
## Bug: Three-line table shows all borders
**Symptom**: Table intended to be three-line shows full grid borders.
**Fix**: Set table-level borders to NONE, then override only specific cell borders:
```js
// Table level: all borders NONE
borders: { top: { style: BorderStyle.SINGLE, size: 4 }, bottom: { style: BorderStyle.SINGLE, size: 4 },
left: { style: BorderStyle.NONE }, right: { style: BorderStyle.NONE },
insideHorizontal: { style: BorderStyle.NONE }, insideVertical: { style: BorderStyle.NONE } }
// Header cells: bottom border only
headerCell.borders = { bottom: { style: BorderStyle.SINGLE, size: 2, color: "000000" } }
```
---
## Bug: User requests Chinese font size name (e.g. Wu Hao) but output is wrong
**Symptom**: Font size doesn't match expected Chinese size name.
**Fix**: Use the correct half-point value. `size` in docx-js is in half-points:
- Wu Hao 五号 = 10.5pt → `size: 21`
- Xiao Si 小四 = 12pt → `size: 24`
- Si Hao 四号 = 14pt → `size: 28`
See SKILL.md for complete conversion table.
---
## Bug: Black table cells
**Symptom**: Table cells appear solid black in Word.
**Fix**: Use `ShadingType.CLEAR` not `ShadingType.SOLID`:
```js
// ❌ WRONG
shading: { type: ShadingType.SOLID, fill: "F1F5F9" }
// ✅ CORRECT
shading: { type: ShadingType.CLEAR, fill: "F1F5F9" }
```
---
## Bug: Chinese characters garbled in matplotlib charts
**Symptom**: Chinese text shows as empty boxes □□□ in generated PNG charts.
**Fix**: Configure SimHei font before plotting:
```python
from matplotlib.font_manager import FontProperties
zh_font = FontProperties(fname="/path/to/SimHei.ttf")
plt.title("中文标题", fontproperties=zh_font)
plt.rcParams["axes.unicode_minus"] = False
```
---
## Bug: Image stretched/squashed in document
**Symptom**: Embedded image appears distorted.
**Fix**: Calculate display height from width using original aspect ratio:
```js
const aspectRatio = originalHeight / originalWidth;
const displayWidth = 500;
const displayHeight = Math.round(displayWidth * aspectRatio);
new ImageRun({ data: buf, transformation: { width: displayWidth, height: displayHeight }, type: "png" });
```
---
## Bug: TOC shows empty in generated document
→ See `references/toc.md` — "5 Common TOC Bugs" section for diagnosis and fixes.
---
## Bug: PageBreak standalone crashes Word
**Symptom**: Document fails to open or renders incorrectly.
**Fix**: PageBreak must always be wrapped in a Paragraph:
```js
// ❌ WRONG — standalone
children: [new PageBreak()]
// ✅ CORRECT — inside Paragraph
children: [new Paragraph({ children: [new PageBreak()] })]
```
---
## Bug: Quotation marks break JavaScript syntax — ⚠️ #1 MOST COMMON BUG
**This is the single most frequent code generation error.** Chinese text routinely uses curly quotes `""` for emphasis, proper nouns, and event names (e.g., "双11", "前低后高", "618"大促). These MUST be Unicode-escaped — bare curly quotes silently break JS syntax.
**Rule: scan ALL Chinese text for `""''` and replace with `\u201c \u201d \u2018 \u2019` BEFORE writing the string.**
```js
// ❌ WRONG — curly quotes in Chinese text break syntax (extremely common)
para("行业增速呈现"前低后高"的态势,在"618"大促拉动下增长。")
"他说"你好"" // \u201c \u201d
'It's a test' // \u2019
// ✅ CORRECT — Unicode escapes for ALL curly quotes
para("行业增速呈现\u201c前低后高\u201d的态势在\u201c618\u201d大促拉动下增长。")
"他说\u201c你好\u201d"
"It\u2019s a test"
// ✅ Straight quotes: escape or use alternate delimiters
"He said \"hello\""
'He said "hello"'
```
---
## Bug: Unwanted blank pages in document
**Common causes:**
1. **Trailing PageBreak at end of last section** — pagination should use section breaks or be at the start of the next section
2. **Empty Paragraph overflow** — empty paragraphs at page bottom push to a new page
3. **PageBreak right after Table** — Table already at page bottom, PageBreak creates extra page
**Fix:**
```js
// Post-generation check: last section's children should not end with PageBreak
function removeTrailingPageBreak(section) {
const children = section.children;
if (!children.length) return;
const last = children[children.length - 1];
// If last element is a Paragraph containing only PageBreak, remove it
if (last instanceof Paragraph) {
const runs = last.root?.filter(c => c instanceof PageBreak);
if (runs?.length && !last.root?.some(c => c instanceof TextRun)) {
children.pop();
}
}
}
```
**Prevention rules:**
- Place PageBreak at the **start of the next section**, not the end of the previous one
- Or use separate sections for pagination (no PageBreak needed)
- The last section of a document must NEVER end with a PageBreak
---
## Bug: Different rendering in WPS vs Microsoft Word
**Symptom**: Document looks correct in Word but renders differently in WPS (or vice versa) — misaligned tables, shifted content, clipped text in cells, black cells, or broken covers.
**Root causes and fixes:**
### 1. `ShadingType.SOLID` shows black in WPS
```js
// ❌ WPS shows solid black
shading: { type: ShadingType.SOLID, fill: "F1F5F9" }
// ✅ Both renderers show correct color
shading: { type: ShadingType.CLEAR, fill: "F1F5F9" }
```
### 2. `verticalAlign: "center"` in exact-height rows shifts content
WPS ignores vertical centering in `rule: "exact"` rows — content stays at top, creating visual mismatch.
```js
// ❌ Inconsistent between Word and WPS
new TableRow({ height: { value: 800, rule: "exact" },
children: [new TableCell({ verticalAlign: VerticalAlign.CENTER, ... })] })
// ✅ Use top alignment + margins/spacing for positioning
new TableRow({ height: { value: 800, rule: "exact" },
children: [new TableCell({ verticalAlign: VerticalAlign.TOP,
margins: { top: 200 }, ... })] })
```
### 3. Tab stops misalign in WPS
Tab widths differ between Word and WPS. Never use tabs for alignment.
```js
// ❌ Tab-based alignment — breaks in WPS
new Paragraph({ tabStops: [{ type: TabStopType.RIGHT, position: 8000 }],
children: [new TextRun({ text: "Party A:\tCompany Name" })] })
// ✅ Borderless table for alignment — consistent everywhere
new Table({ borders: allNoBorders, rows: [new TableRow({ children: [
new TableCell({ children: [new Paragraph({ children: [new TextRun({ text: "Party A:" })] })] }),
new TableCell({ children: [new Paragraph({ children: [new TextRun({ text: "Company Name" })] })] }),
] })] })
```
### 4. Nested tables in exact-height cells overflow differently
Word calculates nested table heights more accurately than WPS. Use stacked tables instead.
```js
// ❌ Nested table inside exact-height cell
new TableRow({ height: { value: 16838, rule: "exact" },
children: [new TableCell({ children: [nestedTable1, nestedTable2] })] })
// ✅ Stacked approach — content table + filler table
[contentTable, fillerTable] // both at top level, heights sum to 16838
```
### 5. `characterSpacing` renders differently
Large `characterSpacing` values cause inconsistent letter spacing. Keep ≤ 80.
### 6. `titlePage: true` header/footer suppression
WPS may not correctly hide first-page headers when using `titlePage: true`. Use a separate section for the cover instead.
---
## Bug: Cover spills to second page
**Symptom**: Cover content overflows, with some elements (date, footer, accent strip) appearing on page 2.
**Root cause**: Total content height exceeds 16838 twips (A4 page height). Common when:
- Title is very long (3+ lines at large font size)
- Fixed spacing values assume short title
- Multiple meta lines + subtitle + English label
**Fix**: Always use `calcTitleLayout()` + `calcCoverSpacing()` from `design-system.md`. These dynamically adjust font sizes and spacing to fit within the page. See `design-system.md § Cover Content Overflow Prevention` for the complete checklist.
---
## Bug: Blank page 2 after cover in MS Office (but not WPS)
**Symptom**: Cover displays correctly in WPS but produces a blank second page in MS Office Word.
**Root cause**: The cover wrapper table uses **default docx-js table borders** (`single/auto/sz=4`) instead of explicitly setting `allNoBorders`. Default borders add ~8 twips per edge. MS Office includes border thickness in the exact-height row calculation, pushing total height past 16838 twips → overflow to page 2. WPS is more lenient and absorbs the extra pixels.
**Fix**: Every cover wrapper table MUST explicitly set `borders: allNoBorders`:
```js
const NB = { style: BorderStyle.NONE, size: 0, color: "FFFFFF" };
const allNoBorders = { top: NB, bottom: NB, left: NB, right: NB,
insideHorizontal: NB, insideVertical: NB };
new Table({
borders: allNoBorders, // ← MANDATORY
rows: [new TableRow({
height: { value: 16838, rule: "exact" },
// ...
})],
});
```
**Prevention**: Add to post-generation check — search for any `new Table` in cover code that does not explicitly set `borders`.
---
## Bug: Cover decorative lines appear truncated or misaligned
**Symptom**: Horizontal decorative lines on the cover (accent strips, divider rules) display at different widths in MS Office vs WPS, or appear truncated / not spanning the intended width.
**Root cause**: Lines were implemented using text characters (`───`, `━━━`, `═══`, `——————`) instead of paragraph borders. Character-drawn lines depend on font metrics (character width × count), which vary across rendering engines.
**Fix**: Always use **paragraph borders** for decorative lines:
```js
// ✅ Paragraph border — renders consistently in both MS Office and WPS
new Paragraph({
indent: { left: 1000, right: 1000 },
border: { top: { style: BorderStyle.SINGLE, size: 18, color: accentColor, space: 20 } },
children: [],
})
// ❌ NEVER use text characters for decorative lines
new TextRun({ text: "───────────────" }) // width varies across engines
```
**Note**: This applies to ALL cover recipes (R1R5). Recipe R2 uses `border.top` and `border.bottom` for its double-rule frame — follow this pattern.
---
## Bug: "undefined" appears in document text
**Symptom**: Fields like "Contact: undefined" or "Location: undefined" in generated documents.
**Root cause**: JavaScript outputs the string `"undefined"` when accessing a property that doesn't exist on the config object.
**Fix**: Use `safeText()` helper for ALL user-facing text values:
```js
function safeText(value, placeholder) {
if (value === undefined || value === null || value === "" ||
String(value) === "NaN" || String(value) === "undefined") {
return placeholder || "【Please fill in】";
}
return String(value);
}
// Usage: new TextRun({ text: safeText(config.contact, "【Contact person】") })
```

View File

@@ -0,0 +1,276 @@
# Math Formulas — LaTeX → docx-js Mapping
## Design Philosophy
GLM uses **LaTeX as the formula input syntax**, internally converting to docx-js Math objects.
**Why not write OMML directly?**
- Models are naturally proficient in LaTeX (abundant in training data)
- LaTeX is semantically clear and highly readable
- Conversion layer is encapsulated internally, transparent to the user
## Quick Start
```js
const { Math: OoxmlMath, MathRun, MathFraction, MathSuperScript,
MathSubScript, MathRadical, MathSum, MathSubSuperScript } = require("docx");
// Embed formula in paragraph
new Paragraph({
alignment: AlignmentType.CENTER,
children: [
new OoxmlMath({
children: [/* Math components */]
})
]
})
```
## LaTeX → docx-js Conversion Table
### Basic Operations
| LaTeX | Meaning | docx-js Implementation |
|-------|---------|----------------------|
| `x + y` | Addition | `new MathRun("x + y")` |
| `x - y` | Subtraction | `new MathRun("x y")` (use Unicode minus ``) |
| `x \times y` | Multiplication | `new MathRun("x × y")` |
| `x \div y` | Division | `new MathRun("x ÷ y")` |
| `x \pm y` | Plus-minus | `new MathRun("x ± y")` |
| `x \neq y` | Not equal | `new MathRun("x ≠ y")` |
| `x \leq y` | Less or equal | `new MathRun("x ≤ y")` |
| `x \geq y` | Greater or equal | `new MathRun("x ≥ y")` |
### Fractions
| LaTeX | docx-js |
|-------|---------|
| `\frac{a}{b}` | `new MathFraction({ numerator: [new MathRun("a")], denominator: [new MathRun("b")] })` |
| `\frac{x+1}{x-1}` | `new MathFraction({ numerator: [new MathRun("x+1")], denominator: [new MathRun("x1")] })` |
### Superscripts & Subscripts
| LaTeX | docx-js |
|-------|---------|
| `x^2` | `new MathSuperScript({ children: [new MathRun("x")], superScript: [new MathRun("2")] })` |
| `x_i` | `new MathSubScript({ children: [new MathRun("x")], subScript: [new MathRun("i")] })` |
| `x_i^2` | `new MathSubSuperScript({ children: [new MathRun("x")], subScript: [new MathRun("i")], superScript: [new MathRun("2")] })` |
### Radicals
| LaTeX | docx-js |
|-------|---------|
| `\sqrt{x}` | `new MathRadical({ children: [new MathRun("x")] })` |
| `\sqrt[3]{x}` | `new MathRadical({ children: [new MathRun("x")], degree: [new MathRun("3")] })` |
### Summation & Integrals
| LaTeX | docx-js |
|-------|---------|
| `\sum_{i=1}^{n}` | `new MathSum({ subScript: [new MathRun("i=1")], superScript: [new MathRun("n")], children: [new MathRun("aᵢ")] })` |
### Greek Letters
Use Unicode characters directly:
```js
// LaTeX → Unicode mapping
const GREEK = {
"\\alpha": "α", "\\beta": "β", "\\gamma": "γ", "\\delta": "δ",
"\\epsilon": "ε", "\\zeta": "ζ", "\\eta": "η", "\\theta": "θ",
"\\iota": "ι", "\\kappa": "κ", "\\lambda": "λ", "\\mu": "μ",
"\\nu": "ν", "\\xi": "ξ", "\\pi": "π", "\\rho": "ρ",
"\\sigma": "σ", "\\tau": "τ", "\\phi": "φ", "\\chi": "χ",
"\\psi": "ψ", "\\omega": "ω",
"\\Alpha": "Α", "\\Beta": "Β", "\\Gamma": "Γ", "\\Delta": "Δ",
"\\Theta": "Θ", "\\Lambda": "Λ", "\\Pi": "Π", "\\Sigma": "Σ",
"\\Phi": "Φ", "\\Psi": "Ψ", "\\Omega": "Ω",
};
```
## Complete Formula Examples
### Quadratic Formula
LaTeX: `x = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}`
```js
new OoxmlMath({
children: [
new MathRun("x = "),
new MathFraction({
numerator: [
new MathRun("b ± "),
new MathRadical({
children: [
new MathSuperScript({
children: [new MathRun("b")],
superScript: [new MathRun("2")],
}),
new MathRun(" 4ac"),
],
}),
],
denominator: [new MathRun("2a")],
}),
],
})
```
### Pythagorean Theorem
LaTeX: `a^2 + b^2 = c^2`
```js
new OoxmlMath({
children: [
new MathSuperScript({ children: [new MathRun("a")], superScript: [new MathRun("2")] }),
new MathRun(" + "),
new MathSuperScript({ children: [new MathRun("b")], superScript: [new MathRun("2")] }),
new MathRun(" = "),
new MathSuperScript({ children: [new MathRun("c")], superScript: [new MathRun("2")] }),
],
})
```
### Trigonometric Identity
LaTeX: `\sin^2\theta + \cos^2\theta = 1`
```js
new OoxmlMath({
children: [
new MathSuperScript({ children: [new MathRun("sin")], superScript: [new MathRun("2")] }),
new MathRun("θ + "),
new MathSuperScript({ children: [new MathRun("cos")], superScript: [new MathRun("2")] }),
new MathRun("θ = 1"),
],
})
```
## Common Exam Formula Templates
### Middle School Math
```js
// Quadratic discriminant
const discriminant = new OoxmlMath({
children: [
new MathRun("Δ = "),
new MathSuperScript({ children: [new MathRun("b")], superScript: [new MathRun("2")] }),
new MathRun(" 4ac"),
],
});
// Circle area
const circleArea = new OoxmlMath({
children: [
new MathRun("S = π"),
new MathSuperScript({ children: [new MathRun("r")], superScript: [new MathRun("2")] }),
],
});
```
### High School Math
```js
// Logarithm change of base
const logChange = new OoxmlMath({
children: [
new MathSubScript({ children: [new MathRun("log")], subScript: [new MathRun("a")] }),
new MathRun("b = "),
new MathFraction({
numerator: [new MathRun("ln b")],
denominator: [new MathRun("ln a")],
}),
],
});
// Arithmetic series sum
const arithmeticSum = new OoxmlMath({
children: [
new MathSubScript({ children: [new MathRun("S")], subScript: [new MathRun("n")] }),
new MathRun(" = "),
new MathFraction({
numerator: [
new MathRun("n("),
new MathSubScript({ children: [new MathRun("a")], subScript: [new MathRun("1")] }),
new MathRun(" + "),
new MathSubScript({ children: [new MathRun("a")], subScript: [new MathRun("n")] }),
new MathRun(")"),
],
denominator: [new MathRun("2")],
}),
],
});
```
### Physics
```js
// Newton's second law
const newton2 = new OoxmlMath({
children: [new MathRun("F = ma")],
});
// Kinetic energy
const kineticEnergy = new OoxmlMath({
children: [
new MathSubScript({ children: [new MathRun("E")], subScript: [new MathRun("k")] }),
new MathRun(" = "),
new MathFraction({
numerator: [new MathRun("1")],
denominator: [new MathRun("2")],
}),
new MathRun("m"),
new MathSuperScript({ children: [new MathRun("v")], superScript: [new MathRun("2")] }),
],
});
```
## Complexity Fallback Strategy
When formulas are too complex (nesting >3 levels) for docx-js Math, **fall back to matplotlib PNG rendering:**
```python
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
def latex_to_png(latex_str: str, output_path: str, fontsize: int = 14, dpi: int = 200):
"""Render LaTeX formula as PNG image"""
fig, ax = plt.subplots(figsize=(0.1, 0.1))
ax.axis("off")
text = ax.text(0, 0.5, f"${latex_str}$", fontsize=fontsize,
transform=ax.transAxes, verticalalignment="center")
fig.canvas.draw()
bbox = text.get_window_extent(fig.canvas.get_renderer())
fig.set_size_inches(bbox.width / dpi + 0.2, bbox.height / dpi + 0.2)
plt.savefig(output_path, dpi=dpi, bbox_inches="tight",
pad_inches=0.05, transparent=True)
plt.close()
return output_path
```
Then embed the PNG in the document:
```js
const formulaImg = fs.readFileSync("formula.png");
new Paragraph({
alignment: AlignmentType.CENTER,
children: [new ImageRun({
data: formulaImg,
transformation: { width: 300, height: 40 }, // adjust based on actual size
type: "png",
})],
})
```
**Fallback rules:**
- Nested fractions >2 levels → fallback
- Matrices/determinants → fallback
- Complex integrals (multiple integrals + limits + integrand) → fallback
- Piecewise functions → fallback
- All other cases → prefer docx-js Math

222
skills/docx/references/ooxml.md Executable file
View File

@@ -0,0 +1,222 @@
# OOXML Editing Reference — Document Library API
**Important: Read this entire document before editing.** This is the primary reference for modifying existing .docx files.
## Document Library (Python) — Primary API
Use the `Document` class from `"$DOCX_SCRIPTS/document.py"` for all edits, tracked changes, and comments. It handles infrastructure automatically (people.xml, RSIDs, settings.xml, comments, relationships, content types).
**Working with Unicode and Entities:**
- Both entity notation and Unicode work for search: `contains="&#8220;Company"``contains="\u201cCompany"`
- Both work for replacement too
### Setup
```bash
# Find the docx skill root
find /mnt/skills -name "document.py" -path "*/docx/scripts/*" 2>/dev/null | head -1
# Skill root = parent of scripts/
# Run with PYTHONPATH
PYTHONPATH=/mnt/skills/docx python your_script.py
```
```python
from scripts.document import Document, DocxXMLEditor
# Basic init (auto-creates temp copy, sets up infrastructure)
doc = Document('unpacked')
# Custom author/initials
doc = Document('unpacked', author="John Doe", initials="JD")
# Enable tracked changes
doc = Document('unpacked', track_revisions=True)
# Custom RSID (auto-generated if omitted)
doc = Document('unpacked', rsid="07DC5ECB")
```
### Finding Nodes
```python
# By text
node = doc["word/document.xml"].get_node(tag="w:p", contains="specific text")
# By line range
para = doc["word/document.xml"].get_node(tag="w:p", line_number=range(100, 150))
# By attributes
node = doc["word/document.xml"].get_node(tag="w:del", attrs={"w:id": "1"})
# By exact line number
para = doc["word/document.xml"].get_node(tag="w:p", line_number=42)
# Combined filters (disambiguation)
node = doc["word/document.xml"].get_node(tag="w:r", contains="Section", line_number=range(2400, 2500))
```
### Tracked Changes
**CRITICAL**: Only mark text that actually changes. Keep unchanged text outside `<w:del>`/`<w:ins>` tags.
**Method Selection**:
- Regular text → `replace_node()` with `<w:del>`/`<w:ins>`, or `suggest_deletion()` for whole elements
- Partially modify another's tracked change → `replace_node()` to nest changes
- Reject another's insertion → `revert_insertion()` (NOT `suggest_deletion()`)
- Reject another's deletion → `revert_deletion()`
```python
# Change one word: "monthly" → "quarterly"
node = doc["word/document.xml"].get_node(tag="w:r", contains="The report is monthly")
rpr = tags[0].toxml() if (tags := node.getElementsByTagName("w:rPr")) else ""
replacement = f'<w:r w:rsidR="00AB12CD">{rpr}<w:t>The report is </w:t></w:r><w:del><w:r>{rpr}<w:delText>monthly</w:delText></w:r></w:del><w:ins><w:r>{rpr}<w:t>quarterly</w:t></w:r></w:ins>'
doc["word/document.xml"].replace_node(node, replacement)
# Delete entire run
node = doc["word/document.xml"].get_node(tag="w:r", contains="text to delete")
doc["word/document.xml"].suggest_deletion(node)
# Delete entire paragraph
para = doc["word/document.xml"].get_node(tag="w:p", contains="paragraph to delete")
doc["word/document.xml"].suggest_deletion(para)
# Insert new content after a node
node = doc["word/document.xml"].get_node(tag="w:r", contains="existing text")
doc["word/document.xml"].insert_after(node, '<w:ins><w:r><w:t>new text</w:t></w:r></w:ins>')
# Add new numbered list item
target_para = doc["word/document.xml"].get_node(tag="w:p", contains="existing list item")
pPr = tags[0].toxml() if (tags := target_para.getElementsByTagName("w:pPr")) else ""
new_item = f'<w:p>{pPr}<w:r><w:t>New item</w:t></w:r></w:p>'
tracked_para = DocxXMLEditor.suggest_paragraph(new_item)
doc["word/document.xml"].insert_after(target_para, tracked_para)
```
### Handling Other Authors' Changes
```python
# Partially delete another author's insertion
node = doc["word/document.xml"].get_node(tag="w:ins", attrs={"w:id": "5"})
replacement = '''<w:ins w:author="Jane Smith" w:date="2025-01-15T10:00:00Z">
<w:r><w:t>quarterly </w:t></w:r>
<w:del><w:r><w:delText>financial </w:delText></w:r></w:del>
<w:r><w:t>report</w:t></w:r>
</w:ins>'''
doc["word/document.xml"].replace_node(node, replacement)
# Reject insertion (wraps in deletion)
ins = doc["word/document.xml"].get_node(tag="w:ins", attrs={"w:id": "5"})
doc["word/document.xml"].revert_insertion(ins)
# Reject deletion (restores deleted content)
del_elem = doc["word/document.xml"].get_node(tag="w:del", attrs={"w:id": "3"})
doc["word/document.xml"].revert_deletion(del_elem)
```
### Comments
```python
doc = Document('unpacked', author="Z.ai", initials="Z")
# Comment on a range
start = doc["word/document.xml"].get_node(tag="w:del", attrs={"w:id": "1"})
end = doc["word/document.xml"].get_node(tag="w:ins", attrs={"w:id": "2"})
doc.add_comment(start=start, end=end, text="Explanation of this change")
# Comment on paragraph
para = doc["word/document.xml"].get_node(tag="w:p", contains="text")
doc.add_comment(start=para, end=para, text="Comment here")
# Comment on newly created tracked change
node = doc["word/document.xml"].get_node(tag="w:r", contains="old")
new_nodes = doc["word/document.xml"].replace_node(
node, '<w:del><w:r><w:delText>old</w:delText></w:r></w:del><w:ins><w:r><w:t>new</w:t></w:r></w:ins>')
doc.add_comment(start=new_nodes[0], end=new_nodes[1], text="Changed per requirements")
# Reply to comment
doc.reply_to_comment(parent_comment_id=0, text="I agree")
```
### Images
```python
from PIL import Image
import shutil, os
doc = Document('unpacked')
media_dir = os.path.join(doc.unpacked_path, 'word/media')
os.makedirs(media_dir, exist_ok=True)
shutil.copy('image.png', os.path.join(media_dir, 'image1.png'))
img = Image.open(os.path.join(media_dir, 'image1.png'))
width_emus = int(6.5 * 914400) # 6.5" usable width
height_emus = int(width_emus * img.size[1] / img.size[0])
# Add relationship
rels_editor = doc['word/_rels/document.xml.rels']
next_rid = rels_editor.get_next_rid()
rels_editor.append_to(rels_editor.dom.documentElement,
f'<Relationship Id="{next_rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image1.png"/>')
doc['[Content_Types].xml'].append_to(doc['[Content_Types].xml'].dom.documentElement,
'<Default Extension="png" ContentType="image/png"/>')
# Insert
node = doc["word/document.xml"].get_node(tag="w:p", line_number=100)
doc["word/document.xml"].insert_after(node, f'''<w:p><w:r><w:drawing>
<wp:inline distT="0" distB="0" distL="0" distR="0">
<wp:extent cx="{width_emus}" cy="{height_emus}"/>
<wp:docPr id="1" name="Picture 1"/>
<a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">
<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">
<pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">
<pic:nvPicPr><pic:cNvPr id="1" name="image1.png"/><pic:cNvPicPr/></pic:nvPicPr>
<pic:blipFill><a:blip r:embed="{next_rid}"/><a:stretch><a:fillRect/></a:stretch></pic:blipFill>
<pic:spPr><a:xfrm><a:ext cx="{width_emus}" cy="{height_emus}"/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom></pic:spPr>
</pic:pic>
</a:graphicData>
</a:graphic>
</wp:inline>
</w:drawing></w:r></w:p>''')
```
### Saving
```python
doc.save() # Validates + copies back to original dir
doc.save('modified-unpacked') # Save to different location
doc.save(validate=False) # Skip validation (debug only)
```
### Direct DOM Manipulation
```python
editor = doc["word/document.xml"]
node = doc["word/document.xml"].get_node(tag="w:p", line_number=5)
parent = node.parentNode
parent.removeChild(node)
# General replacement (without tracked changes)
old = doc["word/document.xml"].get_node(tag="w:p", contains="original")
doc["word/document.xml"].replace_node(old, "<w:p><w:r><w:t>replacement</w:t></w:r></w:p>")
# Chained insertions
node = doc["word/document.xml"].get_node(tag="w:r", line_number=100)
nodes = doc["word/document.xml"].insert_after(node, "<w:r><w:t>A</w:t></w:r>")
nodes = doc["word/document.xml"].insert_after(nodes[-1], "<w:r><w:t>B</w:t></w:r>")
```
## Schema Compliance Quick Reference
- **Element ordering in `<w:pPr>`**: `<w:pStyle>``<w:numPr>``<w:spacing>``<w:ind>``<w:jc>`
- **Whitespace**: `xml:space='preserve'` on `<w:t>` with leading/trailing spaces
- **RSIDs**: 8-digit hex only (0-9, A-F)
- **trackRevisions**: Add `<w:trackRevisions/>` after `<w:proofState>` in settings.xml
- **`<w:del>`/`<w:ins>` placement**: At paragraph level, containing complete `<w:r>` elements. Never nest inside `<w:r>`.
## Validation Rules
The validator ensures document text matches the original after reverting GLM's changes:
- **Never modify text inside another author's `<w:ins>` or `<w:del>` tags**
- **Use nested deletions** to remove another author's insertions
- **Every edit must be tracked** with `<w:ins>` or `<w:del>` tags

264
skills/docx/references/toc.md Executable file
View File

@@ -0,0 +1,264 @@
# Table of Contents (TOC) — Complete Reference
> **This is the single source of truth for all TOC rules.** Other files should reference this file instead of duplicating TOC instructions.
## Overview
DOCX TOC is a **3-step process**: Code → Post-process → User opens Word.
```
Step A: docx-js code generates empty TOC field structure
Step B: add_toc_placeholders.py fills it with visible placeholder entries
Step C: User opens Word → "Update Field" → real page numbers replace placeholders
```
All 3 steps are **mandatory**. Skipping any step results in a broken or empty TOC.
## When to Add TOC
- **Recommended**: Long or complex documents with many headings (reports, theses, papers, manuals)
- **Do NOT add**: Resumes, contracts, letters, exam papers, short documents
- **postcheck rule**: If document contains a "目录" title but no `TableOfContents` element → error
## Step A: Code Generation (docx-js)
Insert **4 elements** in sequence:
```js
const { TableOfContents, Paragraph, TextRun, PageBreak, AlignmentType } = require("docx");
// 1. TOC title — ⛔ DO NOT use HeadingLevel (or TOC will index itself!)
new Paragraph({
alignment: AlignmentType.CENTER,
spacing: { before: 480, after: 360 },
children: [new TextRun({
text: "目 录", // or "Table of Contents" for English docs
bold: true, size: 32,
font: { eastAsia: "SimHei", ascii: "Times New Roman" }
})],
}),
// 2. TOC field element — ⚠️ first parameter is NOT displayed, it's internal name only
new TableOfContents("Table of Contents", {
hyperlink: true,
headingStyleRange: "1-3", // match HeadingLevel range used in document
}),
// 3. ★ MANDATORY Refresh Hint — tells user how to update page numbers
new Paragraph({
spacing: { before: 200 },
children: [new TextRun({
text: "Note: This Table of Contents is generated via field codes. To ensure page number accuracy after editing, please right-click the TOC and select \"Update Field.\"",
italics: true, size: 18, color: "888888"
})]
}),
// 4. ★ MANDATORY PageBreak after TOC — prevents TOC and body merging on same page
new Paragraph({ children: [new PageBreak()] }),
```
### Heading Requirements
**⚠️ CRITICAL**: TOC only picks up paragraphs with `heading: HeadingLevel.HEADING_X`.
```js
// ✅ Correct — Heading style, TOC can index
new Paragraph({
heading: HeadingLevel.HEADING_1,
children: [new TextRun({ text: "第一章 引言", bold: true, size: 32, color: c(P.primary) })]
})
// ❌ Wrong — manual bold + large font, TOC cannot detect
new Paragraph({
children: [new TextRun({ text: "第一章 引言", bold: true, size: 32, color: c(P.primary) })]
})
```
**Exceptions:**
- Cover title: does NOT need Heading style (should not appear in TOC)
- "目录" title: **MUST NOT** use Heading style (prevents TOC from indexing itself)
## Step B: Post-Processing Script
**MUST** run after generating the DOCX file:
```bash
python3 "$DOCX_SCRIPTS/add_toc_placeholders.py" output.docx --auto
```
### What the script does
1. Extracts Heading 1-3 from the document as TOC entries
2. Fixes docx-js fldChar structure bug (begin+instrText+separate merged in one `<w:r>`)
3. Patches `settings.xml` with `updateFields=true` (Word prompts to refresh on open)
4. Ensures Heading styles have `outlineLvl` (required for TOC field update)
5. Ensures TOC 1/2/3 styles exist in `styles.xml`
6. Injects placeholder entries with HYPERLINK + PAGEREF between `separate` and `end` fldChars
7. Handles duplicate heading texts (each gets its own bookmark)
### Error handling
The script **exits with code 1** if:
- No TOC field structure found (missing `TableOfContents` element)
- TOC field has `begin` but no `separate` fldChar (malformed structure)
- Field structure exists but no TOC instrText detected
**If exit code = 1 → the generated code is wrong. Fix the code and regenerate.**
### Options
```bash
# Auto mode (recommended — default behavior)
python3 "$DOCX_SCRIPTS/add_toc_placeholders.py" output.docx --auto
# Manual entries
python3 "$DOCX_SCRIPTS/add_toc_placeholders.py" output.docx \
--entries '[{"level":1,"text":"Chapter 1","page":"1"},{"level":2,"text":"Section 1.1","page":"2"}]'
```
## Step C: User Opens in Word/WPS
- **Word**: Detects `updateFields=true` → prompts "Update field?" → click Yes → real page numbers
- **WPS**: May NOT auto-prompt. User must: right-click TOC → "Update Field" → "Update entire table"
The placeholder entries ensure TOC is **not blank** even without updating — users see heading titles with approximate page numbers.
## Multi-Section Page Numbering
When a document has a TOC, the TOC MUST be in its own section so that body page numbering starts from 1. This applies to **all document types with a TOC** (reports, whitepapers, PRDs, academic papers, etc.) — not just academic papers.
**Mandatory 3-section architecture for documents with cover + TOC:**
```js
sections: [
{ /* Section 1: Cover — no page number, no footer */
properties: {
page: { size: pgSize, margin: pgMargin },
// ⚠️ Do NOT set page.pageNumbers here — docx-js emits empty <pgNumType/> which confuses WPS
},
},
{ /* Section 2: Front matter (abstract, TOC) — Roman numerals */
properties: {
type: SectionType.NEXT_PAGE,
page: {
size: pgSize, margin: pgMargin,
pageNumbers: { start: 1, formatType: NumberFormat.UPPER_ROMAN }, // I, II, III...
},
},
footers: { default: pageNumFooter() }, // see footer rules below
children: [/* abstract + TOC title + TableOfContents + PageBreak */]
},
{ /* Section 3: Body — Arabic numerals starting from 1 */
properties: {
type: SectionType.NEXT_PAGE,
page: {
size: pgSize, margin: pgMargin,
pageNumbers: { start: 1, formatType: NumberFormat.DECIMAL }, // 1, 2, 3...
},
},
footers: { default: pageNumFooter() },
children: [/* body content */]
},
]
```
### ⚠️ Page Number API — Correct Nesting (CRITICAL)
Page number settings MUST be nested inside `page.pageNumbers`, NOT at properties top level:
```js
// ❌ WRONG — docx-js ignores these, pgNumType will be empty
properties: {
pageNumberStart: 1,
pageNumberFormatType: NumberFormat.DECIMAL,
}
// ✅ CORRECT — docx-js writes start= and fmt= attributes
properties: {
page: {
pageNumbers: { start: 1, formatType: NumberFormat.DECIMAL },
},
}
```
### ⚠️ Footer Field Instruction — WPS Compatibility (CRITICAL)
WPS may ignore `pgNumType fmt` in the section properties. To ensure correct display, the footer PAGE field **MUST** include an explicit format switch via **post-processing**:
After generating the docx, unzip and patch each footer XML:
- **Roman numeral footer**: replace `PAGE` with `PAGE \* ROMAN \\** MERGEFORMAT`
- **Arabic numeral footer**: replace `PAGE \* arabic \* MERGEFORMAT`
**⚠️ NEVER use `\* decimal` in instrText** — `decimal` is a docx-js API enum value (`NumberFormat.DECIMAL` for `pgNumType` XML attribute), NOT a valid Word field format switch. Using it causes page numbers to render as "1decimal", "2decimal". The correct Word field switch for Arabic numerals is always `\* arabic`.
```js
// Post-process footer XML:
footerXml = footerXml.replace(
/(<w:instrText[^>]*>)\s*PAGE\s*(<\/w:instrText>)/g,
'$1 PAGE \\* ROMAN \\** MERGEFORMAT $2' // or "arabic" for body section
);
```
Also remove any empty `<w:pgNumType/>` from the cover section (docx-js emits these even when no pageNumbers is set):
```js
docXml = docXml.replace(/<w:pgNumType\/>/g, "");
```
### Page Numbering Rules
| Section | Content | Format | Start | Footer |
|---------|---------|--------|-------|--------|
| Cover | Title page | None | — | No footer |
| Front matter | Abstract, TOC | Roman (I, II, III) | 1 | `PAGE \* ROMAN` |
| Body | Main content | Arabic (1, 2, 3) | 1 | `PAGE \* arabic` |
⚠️ **The body section MUST set `pageNumbers: { start: 1 }`** — otherwise page numbers continue from the front matter pages, causing TOC page references to be offset. This is the #1 cause of "TOC page numbers are wrong".
### Common Causes of Incorrect Page Numbers
| Cause | Fix |
|-------|-----|
| `pageNumberStart` at properties top level | Move to `page: { pageNumbers: { start: 1 } }` |
| Cover section emits empty `<pgNumType/>` | Post-process to remove it |
| Footer uses bare `PAGE` without format switch | Post-process to add `\* roman` or `\* arabic` |
| Cover and body in same section | Separate cover into its own section |
| Multiple sections without pageNumbers.start | Explicitly set on each section needing independent counting |
| headingStyleRange doesn't match headings | Ensure `headingStyleRange: "1-3"` covers all HeadingLevel values used |
| Cover section has header/footer | Don't set header/footer on cover section |
## TOC Refresh Hint (MANDATORY)
**⚠️ When the document contains a TOC, you MUST add the following hint paragraph between the `TableOfContents` element and the PageBreak (so it appears on the TOC page, not the body page).** This ensures users know how to refresh page numbers after editing.
```js
new Paragraph({
spacing: { before: 200 },
children: [new TextRun({
text: "Note: This Table of Contents is generated via field codes. To ensure page number accuracy after editing, please right-click the TOC and select \"Update Field.\"",
italics: true, size: 18, color: "888888"
})]
}),
```
## 5 Common TOC Bugs
| # | Bug | Symptom | Fix |
|---|-----|---------|-----|
| 1 | "目录" heading uses `HeadingLevel.HEADING_1` | TOC includes "目录" as an entry | Remove `heading:` from TOC title paragraph |
| 2 | No `PageBreak` after `TableOfContents` | TOC and body text on same page | Add `new Paragraph({ children: [new PageBreak()] })` after TOC |
| 3 | Missing `TableOfContents` element | Script cannot inject placeholders, TOC is empty | Always include `new TableOfContents(...)` in code |
| 4 | Headings use bold+large instead of `HeadingLevel` | TOC is empty even after running script | Change all body headings to `heading: HeadingLevel.HEADING_X` |
| 5 | Script not run or exit code ignored | TOC page shows only title + blank space | Always run script; if exit code = 1, fix code and regenerate |
## Checklist (for self-check during generation)
- [ ] Document has 3+ H1 → TOC is included
- [ ] "目录" heading does NOT use `HeadingLevel` (prevents self-indexing)
- [ ] `new TableOfContents(...)` element present (not just plain text)
- [ ] `PageBreak` exists after TOC element (prevents merging with body)
- [ ] All body chapter headings use `heading: HeadingLevel.HEADING_X`
- [ ] `add_toc_placeholders.py --auto` runs after generation
- [ ] Script exit code checked — if 1, fix code and regenerate
- [ ] TOC page has visible placeholder content (not empty)
- [ ] **TOC Refresh Hint present** — italic gray note after TOC PageBreak telling user to right-click → "Update Field"
- [ ] `outlineLevel: 0` for H1, `1` for H2, etc. (needed for TOC field update)