Initial commit

2026-06-06 05:21:10 +00:00
commit 6664758a6d
493 changed files with 135653 additions and 0 deletions
--- a/skills/pdf/scripts/cover_validate.js
+++ b/skills/pdf/scripts/cover_validate.js
@@ -0,0 +1,367 @@
+#!/usr/bin/env node
+/**
+ * cover_validate.js — Cover page overlap detection via Playwright rendering
+ *
+ * Detects text-vs-decorative-line overlap on cover HTML pages by:
+ *   1. Rendering the HTML in Playwright
+ *   2. Waiting for fonts to load
+ *   3. Measuring bounding boxes of text elements and decorative line elements
+ *   4. Checking for Y-axis overlap (minimum spacing = 1U = 5% of page width ≈ 30pt)
+ *
+ * Usage:
+ *   node cover_validate.js cover.html
+ *   node cover_validate.js cover.html --width 210mm --height 297mm
+ *   node cover_validate.js cover.html --min-gap 30   # custom min gap in px (default: auto = 5% of width)
+ *
+ * Exit codes:
+ *   0 = no overlap issues found
+ *   1 = overlap detected (prints details to stderr)
+ *   2 = script error (missing file, browser launch failure, etc.)
+ *
+ * This script is ONLY for cover pages. Do NOT use it on:
+ *   - Multi-page documents (use html2pdf-next.js pre-render checks)
+ *   - Posters (use html2poster.js which handles overflow automatically)
+ */
+
+'use strict';
+
+const fs = require('fs');
+const path = require('path');
+
+// ── Playwright import ──
+
+let playwright;
+try {
+  playwright = require('playwright');
+} catch {
+  try {
+    playwright = require('playwright-core');
+  } catch {
+    console.error('✗ Neither playwright nor playwright-core is installed.');
+    process.exit(2);
+  }
+}
+
+// ── Chromium resolution (shared logic with html2poster.js) ──
+
+function resolveChromium(chromiumObj) {
+  let exe;
+  try { exe = chromiumObj.executablePath(); } catch (_) { exe = null; }
+  if (exe && fs.existsSync(exe)) return { status: 'ok', executablePath: exe };
+
+  const candidates = [
+    '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+    '/Applications/Chromium.app/Contents/MacOS/Chromium',
+    '/usr/bin/chromium-browser', '/usr/bin/chromium', '/usr/bin/google-chrome',
+  ];
+  if (process.env.PLAYWRIGHT_CHROMIUM_PATH) candidates.unshift(process.env.PLAYWRIGHT_CHROMIUM_PATH);
+
+  for (const c of candidates) {
+    if (fs.existsSync(c)) return { status: 'fallback', executablePath: c };
+  }
+  return { status: 'missing', executablePath: exe || '' };
+}
+
+// ── CLI parsing ──
+
+function parseArgs(argv) {
+  const tokens = argv.slice(2);
+  let input = null, width = '210mm', height = '297mm', minGap = null;
+
+  for (let i = 0; i < tokens.length; i++) {
+    const t = tokens[i];
+    if (t === '--width') width = tokens[++i];
+    else if (t === '--height') height = tokens[++i];
+    else if (t === '--min-gap') minGap = parseFloat(tokens[++i]);
+    else if (t === '--help' || t === '-h') {
+      console.log(`Usage: node cover_validate.js <cover.html> [options]
+
+Options:
+  --width <val>     Page width (default: 210mm)
+  --height <val>    Page height (default: 297mm)
+  --min-gap <px>    Minimum gap between text and decorative lines (default: 5% of width)
+  --help            Show this help`);
+      process.exit(0);
+    } else if (!t.startsWith('-') && !input) {
+      input = t;
+    }
+  }
+  return { input, width, height, minGap };
+}
+
+// ── Convert CSS dimension string to px for viewport ──
+
+function dimToPx(dim) {
+  if (!dim) return null;
+  const s = String(dim).trim();
+  const num = parseFloat(s);
+  if (s.endsWith('mm')) return Math.round(num * 3.7795);  // 1mm ≈ 3.7795px at 96dpi
+  if (s.endsWith('cm')) return Math.round(num * 37.795);
+  if (s.endsWith('in')) return Math.round(num * 96);
+  if (s.endsWith('px') || !isNaN(num)) return Math.round(num);
+  return null;
+}
+
+// ── Decorative line detection heuristics ──
+// A decorative line is an element that:
+//   - Is very thin in one dimension (height ≤ 5px or width ≤ 5px)
+//   - OR is an <hr> element
+//   - OR has a large aspect ratio (> 10:1 or < 1:10)
+//   - AND is not inside a text element
+
+const DECORATIVE_LINE_DETECTION = `
+(function detectOverlaps(minGapPx) {
+  // Collect all elements
+  const allElements = document.querySelectorAll('*');
+  
+  const textElements = [];
+  const lineElements = [];
+  
+  // Classify elements
+  for (const el of allElements) {
+    const rect = el.getBoundingClientRect();
+    if (rect.width === 0 || rect.height === 0) continue;
+    
+    const tag = el.tagName.toLowerCase();
+    const style = getComputedStyle(el);
+    
+    // Skip invisible elements
+    if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') continue;
+    
+    // Detect decorative lines
+    const isHR = tag === 'hr';
+    const isThinH = rect.height <= 5 && rect.width > 20;  // thin horizontal line
+    const isThinV = rect.width <= 5 && rect.height > 20;   // thin vertical line
+    const aspectH = rect.width / rect.height;
+    const aspectV = rect.height / rect.width;
+    const isWideRatio = aspectH > 15 && rect.height <= 8;  // very wide, very thin
+    const isTallRatio = aspectV > 15 && rect.width <= 8;   // very tall, very thin
+    
+    // Check if element has only border (no text content, no background image)
+    const hasOnlyBorder = (
+      el.textContent.trim() === '' &&
+      style.backgroundImage === 'none' &&
+      (style.borderTopWidth !== '0px' || style.borderBottomWidth !== '0px' ||
+       style.borderLeftWidth !== '0px' || style.borderRightWidth !== '0px')
+    );
+    const isBorderLine = hasOnlyBorder && (rect.height <= 8 || rect.width <= 8);
+    
+    if (isHR || isThinH || isThinV || isWideRatio || isTallRatio || isBorderLine) {
+      lineElements.push({
+        tag: tag,
+        class: el.className || '',
+        rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
+        type: isThinH || isWideRatio ? 'horizontal' : (isThinV || isTallRatio ? 'vertical' : (rect.width >= rect.height ? 'horizontal' : 'vertical')),
+      });
+      continue;
+    }
+    
+    // Detect text elements (has direct text content or is a heading/paragraph)
+    const textTags = ['h1','h2','h3','h4','h5','h6','p','span','a','li','td','th','label','summary'];
+    const hasDirectText = Array.from(el.childNodes).some(n => n.nodeType === 3 && n.textContent.trim());
+    
+    if (textTags.includes(tag) || hasDirectText) {
+      // Skip if this is inside a decorative element
+      if (rect.height < 3) continue;
+      
+      textElements.push({
+        tag: tag,
+        class: el.className || '',
+        text: el.textContent.trim().substring(0, 60),
+        rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
+      });
+    }
+  }
+  
+  // De-duplicate: if a parent and child text element both overlap the same line,
+  // only keep the more specific (smaller) one to avoid duplicate reports.
+  // Sort text elements by area (smallest first) so we can skip parents.
+  textElements.sort((a, b) => (a.rect.width * a.rect.height) - (b.rect.width * b.rect.height));
+  
+  // Check overlaps between text elements and line elements
+  const overlaps = [];
+  const reportedPairs = new Set(); // track "lineIndex:textContent" to deduplicate
+  
+  for (const text of textElements) {
+    for (const line of lineElements) {
+      const tr = text.rect;
+      const lr = line.rect;
+      
+      if (line.type === 'horizontal') {
+        // Check vertical overlap/proximity
+        const textTop = tr.y;
+        const textBottom = tr.y + tr.height;
+        const lineTop = lr.y;
+        const lineBottom = lr.y + lr.height;
+        
+        // Check horizontal overlap (they must share some X range)
+        const xOverlap = !(tr.x + tr.width < lr.x || lr.x + lr.width < tr.x);
+        if (!xOverlap) continue;
+        
+        // Calculate vertical gap
+        let vGap;
+        if (lineTop >= textBottom) {
+          vGap = lineTop - textBottom;  // line is below text
+        } else if (textTop >= lineBottom) {
+          vGap = textTop - lineBottom;  // line is above text
+        } else {
+          vGap = 0;  // overlapping
+        }
+        
+        if (vGap < minGapPx) {
+          // De-dup: same line region, only report the smallest (most specific) text element
+          const lineKey = 'h:' + Math.round(lr.x) + ',' + Math.round(lr.y);
+          if (!reportedPairs.has(lineKey)) {
+            reportedPairs.add(lineKey);
+            overlaps.push({
+              text: text.text,
+              textTag: text.tag,
+              textClass: text.class,
+              textRect: tr,
+              lineTag: line.tag,
+              lineClass: line.class,
+              lineRect: lr,
+              lineType: line.type,
+              gap: Math.round(vGap * 10) / 10,
+              required: minGapPx,
+            });
+          }
+        }
+      } else if (line.type === 'vertical') {
+        // Check horizontal overlap/proximity
+        const textLeft = tr.x;
+        const textRight = tr.x + tr.width;
+        const lineLeft = lr.x;
+        const lineRight = lr.x + lr.width;
+        
+        // Check vertical overlap (they must share some Y range)
+        const yOverlap = !(tr.y + tr.height < lr.y || lr.y + lr.height < tr.y);
+        if (!yOverlap) continue;
+        
+        // Calculate horizontal gap
+        let hGap;
+        if (lineLeft >= textRight) {
+          hGap = lineLeft - textRight;
+        } else if (textLeft >= lineRight) {
+          hGap = textLeft - lineRight;
+        } else {
+          hGap = 0;
+        }
+        
+        if (hGap < minGapPx) {
+          const lineKey = 'v:' + Math.round(lr.x) + ',' + Math.round(lr.y);
+          if (!reportedPairs.has(lineKey)) {
+            reportedPairs.add(lineKey);
+            overlaps.push({
+              text: text.text,
+              textTag: text.tag,
+              textClass: text.class,
+              textRect: tr,
+              lineTag: line.tag,
+              lineClass: line.class,
+              lineRect: lr,
+              lineType: line.type,
+              gap: Math.round(hGap * 10) / 10,
+              required: minGapPx,
+            });
+          }
+        }
+      }
+    }
+  }
+  
+  return {
+    textElements: textElements.length,
+    lineElements: lineElements.length,
+    overlaps: overlaps,
+  };
+})
+`;
+
+// ── Main ──
+
+async function main() {
+  const { input, width, height, minGap } = parseArgs(process.argv);
+
+  if (!input) {
+    console.error('✗ No input file specified. Usage: node cover_validate.js cover.html');
+    process.exit(2);
+  }
+
+  const absIn = path.resolve(input);
+  if (!fs.existsSync(absIn)) {
+    console.error(`✗ File not found: ${absIn}`);
+    process.exit(2);
+  }
+
+  const widthPx = dimToPx(width) || 794;   // A4 width in px
+  const heightPx = dimToPx(height) || 1123; // A4 height in px
+  const gap = minGap || Math.round(widthPx * 0.05);  // 1U = 5% of page width
+
+  console.log(`🔍 cover_validate — Cover overlap detection`);
+  console.log(`   Input:  ${absIn}`);
+  console.log(`   Page:   ${widthPx}×${heightPx}px`);
+  console.log(`   Min gap: ${gap}px (1U)`);
+
+  const { chromium } = playwright;
+  const bInfo = resolveChromium(chromium);
+
+  if (bInfo.status === 'missing') {
+    console.error('✗ No Chromium found. Install via: npx playwright install chromium');
+    process.exit(2);
+  }
+
+  let browser;
+  try {
+    const opts = { headless: true };
+    if (bInfo.status === 'fallback') opts.executablePath = bInfo.executablePath;
+    browser = await chromium.launch(opts);
+  } catch (err) {
+    console.error(`✗ Browser launch failed: ${err.message}`);
+    process.exit(2);
+  }
+
+  try {
+    const page = await browser.newPage({ viewport: { width: widthPx, height: heightPx } });
+    await page.goto('file://' + absIn, { waitUntil: 'networkidle' });
+    console.log(`   ✓ HTML loaded`);
+
+    // Wait for fonts
+    const fontsLoaded = await page.evaluate(() =>
+      document.fonts.ready.then(() => document.fonts.size)
+    ).catch(() => 0);
+    console.log(`   ✓ Fonts: ${fontsLoaded} loaded`);
+
+    // Run overlap detection
+    const result = await page.evaluate(`(${DECORATIVE_LINE_DETECTION})(${gap})`);
+
+    console.log(`   ✓ Found ${result.textElements} text elements, ${result.lineElements} decorative lines`);
+
+    if (result.overlaps.length === 0) {
+      console.log(`\n   ✅ No overlap issues found`);
+      process.exit(0);
+    }
+
+    // Report overlaps
+    console.error(`\n   ❌ Found ${result.overlaps.length} text-line overlap(s):\n`);
+
+    for (const o of result.overlaps) {
+      const direction = o.lineType === 'vertical' ? 'horizontal' : 'vertical';
+      console.error(`   ERROR: ${direction} gap = ${o.gap}px (required ≥ ${o.required}px)`);
+      console.error(`     Text: <${o.textTag}> "${o.text}" @ y=${Math.round(o.textRect.y)}-${Math.round(o.textRect.y + o.textRect.height)}`);
+      console.error(`     Line: <${o.lineTag}${o.lineClass ? '.' + o.lineClass.split(' ')[0] : ''}> [${o.lineType}] @ y=${Math.round(o.lineRect.y)}-${Math.round(o.lineRect.y + o.lineRect.height)}`);
+      console.error(`     Fix: Move the decorative line at least ${Math.ceil(o.required - o.gap)}px away from the text.`);
+      console.error('');
+    }
+
+    process.exit(1);
+
+  } finally {
+    await browser.close();
+  }
+}
+
+main().catch(err => {
+  console.error(`✗ Unexpected error: ${err.message}`);
+  process.exit(2);
+});
--- a/skills/pdf/scripts/design_engine.py
+++ b/skills/pdf/scripts/design_engine.py
--- a/skills/pdf/scripts/html2pdf-next.js
+++ b/skills/pdf/scripts/html2pdf-next.js
@@ -0,0 +1,754 @@
+#!/usr/bin/env node
+/**
+ * html2pdf-next.js — HTML → PDF converter using Playwright + pdf-lib
+ *
+ * Drop-in replacement for html2pdf.js, WITHOUT Paged.js dependency.
+ * Uses Chromium native @page CSS for pagination + pdf-lib for post-processing.
+ *
+ * Usage:
+ *   node html2pdf-next.js input.html
+ *   node html2pdf-next.js input.html --output result.pdf
+ *   node html2pdf-next.js input.html --css extra.css
+ *   node html2pdf-next.js input.html --width 720px --height 960px
+ *   node html2pdf-next.js input.html --direct   (same as default now — no Paged.js to skip)
+ *   node html2pdf-next.js input.html --merge a.pdf b.pdf  (merge additional PDFs after)
+ *
+ * Architecture:
+ *   1. Playwright renders HTML → raw PDF via Chromium's native print engine
+ *   2. Pre-render hooks: Mermaid, KaTeX, oversized element fixes
+ *   3. Post-render: pdf-lib for merge, metadata, page count extraction
+ *   4. No Paged.js, no paged.polyfill.js — CSS @page handles pagination natively
+ */
+
+const fs = require('fs');
+const path = require('path');
+const { execSync, spawnSync } = require('child_process');
+
+const sleep = ms => new Promise(r => setTimeout(r, ms));
+
+// ═══════════════════════════════════════════════════════════════════
+// Playwright / Chromium resolution (self-contained, no external helper)
+// ═══════════════════════════════════════════════════════════════════
+
+function loadPlaywright() {
+  // Try direct require first
+  try { return require('playwright'); } catch (_) {}
+
+  // Search common global paths
+  const Module = require('module');
+  const roots = new Set();
+  if (process.env.PLAYWRIGHT_PATH) roots.add(process.env.PLAYWRIGHT_PATH);
+  if (process.env.NODE_PATH) {
+    process.env.NODE_PATH.split(path.delimiter).filter(Boolean).forEach(p => roots.add(p));
+  }
+  try {
+    const g = execSync('npm root -g', { stdio: ['ignore', 'pipe', 'ignore'] }).toString().trim();
+    if (g) roots.add(g);
+  } catch (_) {}
+
+  for (const base of roots) {
+    const pkg = path.join(base, 'playwright', 'package.json');
+    if (!fs.existsSync(pkg)) continue;
+    try { return Module.createRequire(pkg)('playwright'); } catch (_) {}
+  }
+  throw new Error('Playwright not found. Install: npm install -g playwright');
+}
+
+function loadPdfLib() {
+  try { return require('pdf-lib'); } catch (_) {}
+  const Module = require('module');
+  try {
+    const g = execSync('npm root -g', { stdio: ['ignore', 'pipe', 'ignore'] }).toString().trim();
+    const pkg = path.join(g, 'pdf-lib', 'package.json');
+    if (fs.existsSync(pkg)) return Module.createRequire(pkg)('pdf-lib');
+  } catch (_) {}
+  throw new Error('pdf-lib not found. Install: npm install -g pdf-lib');
+}
+
+function resolveChromium(chromiumObj, allowInstall = false) {
+  let exe;
+  try { exe = chromiumObj.executablePath(); } catch (_) { exe = null; }
+
+  if (exe && fs.existsSync(exe)) {
+    return { status: 'ok', executablePath: exe };
+  }
+
+  // Try system Chrome/Chromium
+  const candidates = [
+    '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+    '/Applications/Chromium.app/Contents/MacOS/Chromium',
+    '/usr/bin/chromium-browser', '/usr/bin/chromium', '/usr/bin/google-chrome',
+  ];
+  if (process.env.PLAYWRIGHT_CHROMIUM_PATH) candidates.unshift(process.env.PLAYWRIGHT_CHROMIUM_PATH);
+
+  for (const c of candidates) {
+    if (fs.existsSync(c)) return { status: 'fallback', executablePath: c };
+  }
+
+  if (allowInstall) {
+    const r = spawnSync('npx', ['playwright', 'install', 'chromium'], { stdio: 'inherit', shell: true });
+    if (r.status === 0) {
+      try { exe = chromiumObj.executablePath(); } catch (_) {}
+      if (exe && fs.existsSync(exe)) return { status: 'installed', executablePath: exe };
+    }
+  }
+
+  return { status: 'missing', executablePath: exe || '' };
+}
+
+// ═══════════════════════════════════════════════════════════════════
+// CLI
+// ═══════════════════════════════════════════════════════════════════
+
+function cli() {
+  const tokens = process.argv.slice(2);
+  if (!tokens.length || tokens[0] === '-h' || tokens[0] === '--help') {
+    console.log(`
+Usage: node html2pdf-next.js <input.html> [options]
+
+Options:
+  --output, -o <file>   Output PDF path (default: <input>.pdf)
+  --css <file>          Inject extra stylesheet
+  --width <px>          Custom page width  (e.g. 720px)
+  --height <px>         Custom page height (e.g. 960px)
+  --direct              (no-op, kept for backward compat — always direct now)
+  --merge <files...>    Append additional PDF files after conversion
+  --title <text>        Set PDF document title metadata
+  --help, -h            Show help
+`);
+    process.exit(0);
+  }
+
+  const inputFile = tokens[0];
+  let outputFile = null, customCSS = null, width = null, height = null;
+  let mergeFiles = [], title = null;
+
+  for (let i = 1; i < tokens.length; i++) {
+    const t = tokens[i];
+    if (t === '--output' || t === '-o') outputFile = tokens[++i];
+    else if (t === '--css') customCSS = tokens[++i];
+    else if (t === '--width') width = tokens[++i];
+    else if (t === '--height') height = tokens[++i];
+    else if (t === '--direct') { /* no-op, always direct */ }
+    else if (t === '--title') title = tokens[++i];
+    else if (t === '--merge') {
+      while (i + 1 < tokens.length && !tokens[i + 1].startsWith('--')) {
+        mergeFiles.push(tokens[++i]);
+      }
+    }
+  }
+
+  if (!outputFile) {
+    const p = path.parse(inputFile);
+    outputFile = path.join(p.dir || '.', p.name + '.pdf');
+  }
+
+  return { inputFile, outputFile, customCSS, width, height, mergeFiles, title };
+}
+
+// ═══════════════════════════════════════════════════════════════════
+// Helpers
+// ═══════════════════════════════════════════════════════════════════
+
+function prettyBytes(n) {
+  const units = ['B', 'KB', 'MB', 'GB'];
+  let u = 0;
+  while (n >= 1024 && u < units.length - 1) { n /= 1024; u++; }
+  return `${n.toFixed(1)} ${units[u]}`;
+}
+
+// ═══════════════════════════════════════════════════════════════════
+// Pre-render hooks (run in browser context before PDF export)
+// ═══════════════════════════════════════════════════════════════════
+
+async function preRenderHooks(page) {
+  const warnings = [];
+
+  // 1. Wait for Mermaid diagrams
+  const hasMermaid = await page.evaluate(() => document.querySelectorAll('.mermaid').length > 0);
+  if (hasMermaid) {
+    console.log('  ⏳ Waiting for Mermaid diagrams...');
+    try {
+      await page.waitForFunction(() => {
+        for (const m of document.querySelectorAll('.mermaid'))
+          if (!m.querySelector('svg') && !m.getAttribute('data-processed')) return false;
+        return true;
+      }, { timeout: 30000 });
+      await sleep(2000);
+      console.log('  ✓ Mermaid rendered');
+    } catch (_) {
+      warnings.push('Mermaid rendering timed out (30s)');
+    }
+  }
+
+  // 2. Trigger KaTeX math rendering
+  const katexStatus = await page.evaluate(() => ({
+    lib: typeof renderMathInElement === 'function' || typeof katex !== 'undefined',
+    rendered: document.querySelectorAll('.katex').length > 0,
+    raw: /\$[^$]+\$|\$\$[^$]+\$\$|\\\(.*?\\\)|\\\[.*?\\\]/.test(document.body.innerText),
+  }));
+
+  // Auto-inject KaTeX CDN if raw math detected but library not loaded
+  if (!katexStatus.lib && katexStatus.raw && !katexStatus.rendered) {
+    console.log('  ⏳ Auto-injecting KaTeX CDN (math formulas detected but KaTeX not loaded)...');
+    await page.addStyleTag({ url: 'https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/katex.min.css' });
+    await page.addScriptTag({ url: 'https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/katex.min.js' });
+    await page.addScriptTag({ url: 'https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/contrib/auto-render.min.js' });
+    await sleep(2000); // Wait for CDN scripts to load
+    // Re-check
+    const recheckLib = await page.evaluate(() => typeof renderMathInElement === 'function');
+    if (recheckLib) {
+      console.log('  ✓ KaTeX CDN loaded successfully');
+    } else {
+      console.log('  ⚠ KaTeX CDN failed to load — math will render as raw text');
+      warnings.push('KaTeX CDN injection failed; math formulas may appear as raw LaTeX code');
+    }
+  }
+
+  // Re-evaluate after potential CDN injection
+  const katexReady = await page.evaluate(() => ({
+    lib: typeof renderMathInElement === 'function' || typeof katex !== 'undefined',
+    rendered: document.querySelectorAll('.katex').length > 0,
+    raw: /\$[^$]+\$|\$\$[^$]+\$\$|\\\(.*?\\\)|\\\[.*?\\\]/.test(document.body.innerText),
+  }));
+
+  if (katexReady.lib && !katexReady.rendered && katexReady.raw) {
+    console.log('  ⏳ Triggering KaTeX rendering...');
+    await page.evaluate(() => {
+      if (typeof renderMathInElement === 'function')
+        renderMathInElement(document.body, {
+          delimiters: [
+            { left: '$$', right: '$$', display: true },
+            { left: '$', right: '$', display: false },
+            { left: '\\(', right: '\\)', display: false },
+            { left: '\\[', right: '\\]', display: true },
+          ],
+          throwOnError: false,
+        });
+    });
+    await sleep(1000);
+    console.log('  ✓ KaTeX rendered');
+  } else if (katexReady.rendered) {
+    await sleep(500); // Font loading settle
+  }
+
+  // 3. Fix oversized elements that prevent page breaks
+  const nFixed = await page.evaluate(() => {
+    const LIMIT = 1000;
+    let n = 0;
+    document.querySelectorAll(
+      '[style*="page-break-inside: avoid"],[style*="break-inside: avoid"],' +
+      '.avoid-break,table,figure,.theorem,.algorithm'
+    ).forEach(el => {
+      if (el.getBoundingClientRect().height > LIMIT) {
+        el.style.pageBreakInside = 'auto';
+        el.style.breakInside = 'auto';
+        n++;
+      }
+    });
+    return n;
+  });
+  if (nFixed) {
+    console.log(`  ⚠ Fixed ${nFixed} oversized elements (removed break-inside: avoid)`);
+  }
+
+  // 4. Detect overflow (horizontal AND vertical)
+  const overflows = await page.evaluate(() => {
+    const out = [];
+    document.querySelectorAll('pre,table,figure,img,svg,.mermaid,blockquote,.equation').forEach(el => {
+      const hDiff = el.scrollWidth - el.clientWidth;
+      const vDiff = el.scrollHeight - el.clientHeight;
+      if (hDiff > 2 || vDiff > 2) out.push({
+        tag: el.tagName.toLowerCase(),
+        cls: el.className || '',
+        hOverflow: hDiff > 2 ? hDiff : 0,
+        vOverflow: vDiff > 2 ? vDiff : 0,
+        preview: (el.textContent || '').slice(0, 50).replace(/\s+/g, ' '),
+      });
+    });
+    return out;
+  });
+  if (overflows.length) {
+    console.log('  ⚠ Overflow detected:');
+    overflows.forEach(o => {
+      const parts = [];
+      if (o.hOverflow) parts.push(`H +${o.hOverflow}px`);
+      if (o.vOverflow) parts.push(`V +${o.vOverflow}px`);
+      console.log(`    <${o.tag}${o.cls ? '.' + o.cls.split(' ')[0] : ''}> ${parts.join(', ')}`);
+    });
+    warnings.push(`${overflows.length} element(s) have overflow`);
+  }
+
+  // 4b. Fix vertical overflow on page-level containers
+  //     When html/body or the main content canvas has a fixed height + overflow:hidden,
+  //     content gets clipped. For documents (html2pdf-next.js), we DON'T expand the
+  //     container to its scrollHeight — that creates an oversized single "page" that
+  //     Playwright splits unevenly. Instead, we remove the fixed height and overflow:hidden
+  //     so content flows naturally and @page CSS handles pagination.
+  //
+  //     (The old "expand to scrollHeight" logic belongs in html2poster.js where a single
+  //     continuous canvas is the desired output.)
+  const vOverflowFix = await page.evaluate(() => {
+    const fixes = [];
+    // Candidates: html, body, and any direct child of body that acts as a full-page canvas
+    const candidates = [document.documentElement, document.body];
+    const bodyChildren = document.body.children;
+    for (let i = 0; i < bodyChildren.length; i++) {
+      const child = bodyChildren[i];
+      // Skip SVG defs, script, style elements
+      const tag = child.tagName.toLowerCase();
+      if (tag === 'svg' || tag === 'script' || tag === 'style' || tag === 'link') continue;
+      candidates.push(child);
+      // Also check one level deeper (e.g., .canvas > .content)
+      for (let j = 0; j < child.children.length; j++) {
+        const grandchild = child.children[j];
+        const gtag = grandchild.tagName.toLowerCase();
+        if (gtag === 'svg' || gtag === 'script' || gtag === 'style') continue;
+        candidates.push(grandchild);
+      }
+    }
+
+    for (const el of candidates) {
+      const computed = getComputedStyle(el);
+      const overflow = computed.overflow || computed.overflowY;
+      const hasHiddenOverflow = overflow === 'hidden' || overflow === 'clip';
+      const diff = el.scrollHeight - el.clientHeight;
+
+      if (hasHiddenOverflow && diff > 5) {
+        // This element is clipping content vertically
+        const tag = el.tagName.toLowerCase();
+        const id = el.id ? `#${el.id}` : '';
+        const cls = el.className ? `.${String(el.className).split(' ')[0]}` : '';
+        const selector = `${tag}${id}${cls}`;
+
+        const oldHeight = el.clientHeight;
+
+        // Document mode: remove fixed height + overflow:hidden,
+        // let @page handle natural pagination
+        el.style.height = 'auto';
+        el.style.minHeight = 'auto';
+        el.style.maxHeight = 'none';
+        el.style.overflow = 'visible';
+        el.style.overflowY = 'visible';
+
+        fixes.push({
+          selector,
+          oldHeight,
+          clipped: diff,
+        });
+      }
+    }
+
+    // After fixing containers, re-measure to get the final content height
+    const finalHeight = Math.max(
+      document.documentElement.scrollHeight,
+      document.body.scrollHeight
+    );
+
+    return { fixes, finalHeight };
+  });
+
+  if (vOverflowFix.fixes.length) {
+    console.log('  ⚠️  Removed fixed height + overflow:hidden — content will paginate naturally:');
+    vOverflowFix.fixes.forEach(f => {
+      console.log(`    ${f.selector}: was ${f.oldHeight}px with ${f.clipped}px clipped → now auto (content will flow to next page)`);
+    });
+  }
+
+  // 4c. Convert absolute-bottom elements to document flow
+  //     Elements with `position: absolute; bottom: Npx` inside page containers
+  //     are pinned relative to their containing block. When content paginates
+  //     across multiple @page pages, these elements either overlap with body
+  //     text or land on the wrong page. Fix: convert them to static positioning
+  //     so they participate in normal document flow and paginate naturally.
+  const absBottomFix = await page.evaluate(() => {
+    const converted = [];
+    // Scan inside page-level containers (body children and their children)
+    const containers = [];
+    for (let i = 0; i < document.body.children.length; i++) {
+      const child = document.body.children[i];
+      const tag = child.tagName.toLowerCase();
+      if (tag === 'svg' || tag === 'script' || tag === 'style' || tag === 'link') continue;
+      containers.push(child);
+    }
+
+    for (const container of containers) {
+      const descendants = container.querySelectorAll('*');
+      for (const el of descendants) {
+        const computed = getComputedStyle(el);
+        if (computed.position === 'absolute' && computed.bottom !== 'auto' && computed.bottom !== '') {
+          // Check if this element contains visible text (not just decorative)
+          const hasText = el.textContent && el.textContent.trim().length > 0;
+          if (!hasText) continue;
+
+          const tag = el.tagName.toLowerCase();
+          const id = el.id ? `#${el.id}` : '';
+          const cls = el.className ? `.${String(el.className).split(' ')[0]}` : '';
+          const selector = `${tag}${id}${cls}`;
+
+          // Convert to static flow: remove absolute positioning
+          el.style.position = 'static';
+          el.style.bottom = 'auto';
+          el.style.left = 'auto';
+          el.style.right = 'auto';
+          // Preserve horizontal padding/margin from the original left/right values
+          // by keeping any existing padding or margin on the element
+
+          converted.push({ selector, bottom: computed.bottom });
+        }
+      }
+    }
+    return converted;
+  });
+
+  if (absBottomFix.length) {
+    console.log('  ⚠️  Converted absolute-bottom elements to document flow (prevents overlap on multi-page):');
+    absBottomFix.forEach(f => {
+      console.log(`    ${f.selector}: was position:absolute;bottom:${f.bottom} → now static (flows with content)`);
+    });
+  }
+
+  // 5. Inject minimal @page CSS fallback
+  await page.evaluate(() => {
+    const styles = Array.from(document.querySelectorAll('style'));
+    const hasPageRule = styles.some(s => (s.textContent || '').includes('@page'));
+    if (!hasPageRule) {
+      const s = document.createElement('style');
+      s.textContent = `@page { margin: 20mm; }`;
+      document.head.appendChild(s);
+    }
+  });
+
+  // 6. Fix full-page cover sections for print
+  //    In screen mode, height:100vh = viewport height. In print mode, 100vh ≠ page height.
+  //    Detect elements using 100vh and convert to print-safe page-filling behavior.
+  const coverFixed = await page.evaluate(() => {
+    let fixed = 0;
+    // Find elements with height: 100vh (inline or computed)
+    const allEls = document.querySelectorAll('*');
+    for (const el of allEls) {
+      const style = el.style;
+      const computed = getComputedStyle(el);
+      const isVh = style.height === '100vh' || computed.height === '100vh' ||
+                   style.minHeight === '100vh' || computed.minHeight === '100vh';
+      // Also detect via class name hints
+      const isCover = el.classList.contains('cover') || el.classList.contains('cover-page') ||
+                      el.id === 'cover' || el.getAttribute('data-role') === 'cover';
+      if (isVh || (isCover && el.offsetHeight > 0)) {
+        // Force the element to fill the print page
+        el.style.height = '100vh';
+        el.style.minHeight = '100vh';
+        el.style.pageBreakAfter = 'always';
+        el.style.pageBreakInside = 'avoid';
+        el.style.boxSizing = 'border-box';
+        el.style.overflow = 'hidden';
+        fixed++;
+      }
+    }
+    // Inject print-specific CSS to make 100vh work correctly
+    if (fixed > 0) {
+      const s = document.createElement('style');
+      s.textContent = `
+        @media print {
+          .cover, .cover-page, [data-role="cover"] {
+            height: 100vh !important;
+            min-height: 100vh !important;
+            page-break-after: always !important;
+            page-break-inside: avoid !important;
+            overflow: hidden !important;
+          }
+        }
+      `;
+      document.head.appendChild(s);
+    }
+    return fixed;
+  });
+  if (coverFixed) {
+    console.log(`  ✓ Fixed ${coverFixed} full-page cover section(s) for print`);
+    // Also inject named @page rule for cover with zero margins
+    await page.evaluate(() => {
+      const s = document.createElement('style');
+      s.textContent = `
+        @page cover-page {
+          margin: 0 !important;
+        }
+        @media print {
+          .cover, .cover-page, [data-role="cover"] {
+            page: cover-page;
+            margin: 0 !important;
+            padding: 40px !important;
+          }
+        }
+      `;
+      document.head.appendChild(s);
+    });
+  }
+
+  return { warnings, contentHeight: vOverflowFix.finalHeight };
+}
+
+// ═══════════════════════════════════════════════════════════════════
+// Content statistics (post-render, from PDF or page)
+// ═══════════════════════════════════════════════════════════════════
+
+async function collectStats(page) {
+  return page.evaluate(() => {
+    const body = document.body;
+    const text = body.innerText || '';
+    const zhChars = (text.match(/[\u4e00-\u9fa5]/g) || []).length;
+    const enWords = (text.match(/[a-zA-Z]+/g) || []).length;
+    return {
+      wordCount: zhChars + enWords,
+      figures: document.querySelectorAll('figure,.figure,img').length,
+      tables: document.querySelectorAll('table').length,
+    };
+  });
+}
+
+// ═══════════════════════════════════════════════════════════════════
+// pdf-lib post-processing: page count, metadata, merge
+// ═══════════════════════════════════════════════════════════════════
+
+async function postProcess(pdfPath, options = {}) {
+  const { PDFDocument } = loadPdfLib();
+  const pdfBytes = fs.readFileSync(pdfPath);
+  const doc = await PDFDocument.load(pdfBytes);
+
+  // Set metadata
+  if (options.title) doc.setTitle(options.title);
+  doc.setProducer('html2pdf-next (Playwright + pdf-lib)');
+  doc.setCreationDate(new Date());
+
+  const pageCount = doc.getPageCount();
+
+  // Merge additional PDFs
+  if (options.mergeFiles && options.mergeFiles.length) {
+    for (const mf of options.mergeFiles) {
+      if (!fs.existsSync(mf)) {
+        console.log(`  ⚠ Merge file not found: ${mf}`);
+        continue;
+      }
+      console.log(`  📎 Merging: ${path.basename(mf)}`);
+      const donorBytes = fs.readFileSync(mf);
+      const donorDoc = await PDFDocument.load(donorBytes);
+      const copiedPages = await doc.copyPages(donorDoc, donorDoc.getPageIndices());
+      copiedPages.forEach(p => doc.addPage(p));
+    }
+  }
+
+  // Save
+  const finalBytes = await doc.save();
+  fs.writeFileSync(pdfPath, finalBytes);
+
+  return { pageCount: doc.getPageCount(), originalPages: pageCount };
+}
+
+// ═══════════════════════════════════════════════════════════════════
+// Main pipeline
+// ═══════════════════════════════════════════════════════════════════
+
+async function convert(inputFile, outputFile, customCSS, options = {}) {
+  const { width, height, mergeFiles, title } = options;
+
+  if (!fs.existsSync(inputFile)) {
+    console.error(`✗ File not found: ${inputFile}`);
+    process.exit(1);
+  }
+
+  const playwright = loadPlaywright();
+  const { chromium } = playwright;
+
+  // Resolve browser
+  const canInstall = process.env.PDF_SKIP_BROWSER_INSTALL !== '1';
+  const bInfo = resolveChromium(chromium, canInstall);
+
+  if (bInfo.status === 'missing') {
+    console.error('\n✗ Chromium not found. Run: npx playwright install chromium\n');
+    process.exit(2);
+  }
+  if (bInfo.status === 'fallback') {
+    console.log(`⚠ Using fallback Chromium: ${bInfo.executablePath}`);
+  }
+
+  const absIn = path.resolve(inputFile);
+  const absOut = path.resolve(outputFile);
+
+  console.log(`\n🔄 Converting ${path.basename(inputFile)}...`);
+  console.log(`   Engine: Playwright + Chromium native @page (no Paged.js)`);
+
+  // Read and optionally inject CSS
+  let html = fs.readFileSync(absIn, 'utf-8');
+  if (customCSS) {
+    if (!fs.existsSync(customCSS)) {
+      console.error(`✗ CSS file not found: ${customCSS}`);
+      process.exit(1);
+    }
+    const tag = `<style>${fs.readFileSync(customCSS, 'utf-8')}</style>`;
+    html = html.includes('</head>') ? html.replace('</head>', tag + '\n</head>') : tag + '\n' + html;
+    // Write modified HTML for Playwright to load
+    const tmpHtml = absIn + '.tmp.html';
+    fs.writeFileSync(tmpHtml, html);
+    // We'll clean up later
+  }
+
+  // Launch browser
+  let browser;
+  try {
+    const opts = { headless: true };
+    if (bInfo.status === 'fallback') opts.executablePath = bInfo.executablePath;
+    browser = await chromium.launch(opts);
+  } catch (err) {
+    const msg = err.message || '';
+    if (msg.includes('shared libraries') || msg.includes('.so')) {
+      console.error('\n✗ Missing system libraries. Run: npx playwright install-deps chromium\n');
+    } else {
+      console.error(`\n✗ Browser launch failed: ${msg}\n`);
+    }
+    process.exit(1);
+  }
+
+  try {
+    const page = await browser.newPage();
+    const loadFile = customCSS ? absIn + '.tmp.html' : absIn;
+    await page.goto('file://' + loadFile, { waitUntil: 'networkidle' });
+
+    // ── Pre-render hooks ──
+    console.log('\n📋 Pre-render checks:');
+    const preRenderResult = await preRenderHooks(page);
+    const warnings = preRenderResult.warnings;
+    const measuredContentHeight = preRenderResult.contentHeight;
+
+    // ── Detect continuous-canvas mode (design_engine.py output) ──
+    const continuousInfo = await page.evaluate(() => {
+      const el = document.querySelector('.continuous-canvas');
+      if (!el) return null;
+      const root = getComputedStyle(document.documentElement);
+      return {
+        width: root.getPropertyValue('--canvas-w').trim() || '720px',
+        height: root.getPropertyValue('--canvas-h').trim() || '960px',
+        pages: el.querySelectorAll('.page-section').length,
+      };
+    });
+
+    if (continuousInfo) {
+      // Creative PDF: seamless multi-page canvas
+      console.log(`\n🎨 Continuous canvas: ${continuousInfo.pages} pages @ ${continuousInfo.width} × ${continuousInfo.height}`);
+      await page.pdf({
+        path: absOut,
+        printBackground: true,
+        margin: { top: 0, right: 0, bottom: 0, left: 0 },
+        width: continuousInfo.width,
+        height: continuousInfo.height,
+      });
+    } else {
+      // Standard document
+      console.log('\n📄 Rendering PDF...');
+      const pdfOpts = {
+        path: absOut,
+        printBackground: true,
+        preferCSSPageSize: true,
+        tagged: true,
+      };
+
+      if (width || height) {
+        if (width) pdfOpts.width = width;
+        if (height) pdfOpts.height = height;
+        pdfOpts.margin = { top: 0, right: 0, bottom: 0, left: 0 };
+        console.log(`   Custom size: ${pdfOpts.width || 'auto'} × ${pdfOpts.height || 'auto'}`);
+      } else {
+        // No explicit size: check if @page CSS defines a fixed size
+        const pageSize = await page.evaluate(() => {
+          const styles = Array.from(document.querySelectorAll('style'));
+          for (const s of styles) {
+            const text = s.textContent || '';
+            const match = text.match(/@page\s*\{[^}]*size:\s*([\d.]+)px\s+([\d.]+)px/);
+            if (match) return { width: parseFloat(match[1]), height: parseFloat(match[2]) };
+          }
+          return null;
+        });
+
+        if (pageSize) {
+          // @page defines a fixed size — use preferCSSPageSize (already set above).
+          // Playwright will paginate content at @page height boundaries seamlessly.
+          // This is correct for both posters (seamless multi-page) and documents.
+          pdfOpts.margin = { top: 0, right: 0, bottom: 0, left: 0 };
+          console.log(`   @page size: ${pageSize.width}px × ${pageSize.height}px`);
+          if (measuredContentHeight && measuredContentHeight > pageSize.height + 5) {
+            const estPages = Math.ceil(measuredContentHeight / pageSize.height);
+            console.log(`   Content height: ${measuredContentHeight}px → ~${estPages} pages`);
+          }
+        } else {
+          pdfOpts.format = 'A4';
+        }
+      }
+
+      await page.pdf(pdfOpts);
+    }
+
+    // Collect content stats from the page
+    const stats = await collectStats(page);
+
+    // ── pdf-lib post-processing ──
+    console.log('\n🔧 Post-processing (pdf-lib):');
+    const postResult = await postProcess(absOut, { mergeFiles, title });
+
+    // Clean up temp HTML
+    const tmpHtml = absIn + '.tmp.html';
+    if (fs.existsSync(tmpHtml)) fs.unlinkSync(tmpHtml);
+
+    // ── Report ──
+    const sz = fs.statSync(absOut).size;
+    console.log('\n' + '═'.repeat(40));
+    console.log('  PDF Generated Successfully');
+    console.log('═'.repeat(40));
+    console.log(`  File:    ${path.basename(absOut)}`);
+    console.log(`  Pages:   ${postResult.pageCount}`);
+    console.log(`  Size:    ${prettyBytes(sz)}`);
+    console.log(`  Words:   ~${stats.wordCount.toLocaleString()}`);
+    console.log(`  Assets:  ${stats.figures} figures, ${stats.tables} tables`);
+    console.log(`  Engine:  Playwright (no Paged.js)`);
+    console.log(`  Path:    ${absOut}`);
+
+    if (mergeFiles && mergeFiles.length && postResult.pageCount > postResult.originalPages) {
+      console.log(`  Merged:  +${postResult.pageCount - postResult.originalPages} pages from ${mergeFiles.length} file(s)`);
+    }
+
+    if (warnings.length) {
+      console.log('\n⚠ Warnings:');
+      warnings.forEach(w => console.log(`  · ${w}`));
+    }
+
+    // Anomaly detection
+    if (postResult.pageCount > 1 && stats.wordCount > 0) {
+      const avgWordsPerPage = stats.wordCount / postResult.pageCount;
+      if (avgWordsPerPage < 30) {
+        console.log(`\n⚠ Low content density: ~${Math.round(avgWordsPerPage)} words/page (expected 100+)`);
+      }
+    }
+
+  } catch (err) {
+    console.error('\n✗ Conversion failed:', err.message);
+    process.exit(1);
+  } finally {
+    await browser.close();
+  }
+}
+
+// ═══════════════════════════════════════════════════════════════════
+// Entry
+// ═══════════════════════════════════════════════════════════════════
+
+(async () => {
+  try {
+    const args = cli();
+    await convert(args.inputFile, args.outputFile, args.customCSS, {
+      width: args.width,
+      height: args.height,
+      mergeFiles: args.mergeFiles,
+      title: args.title,
+    });
+  } catch (err) {
+    console.error('Error:', err.message);
+    process.exit(1);
+  }
+})();
--- a/skills/pdf/scripts/html2poster.js
+++ b/skills/pdf/scripts/html2poster.js
@@ -0,0 +1,256 @@
+#!/usr/bin/env node
+/**
+ * html2poster.js — Single-page poster/long-image HTML → PDF converter
+ *
+ * Purpose: Convert a fixed-width, dynamic-height HTML poster into a single-page
+ * vector PDF with zero margins. This script is PURPOSE-BUILT for posters and
+ * infographics — it does NOT handle multi-page documents, A4 pagination, or
+ * document-style margins. For those, use html2pdf-next.js.
+ *
+ * Usage:
+ *   node html2poster.js poster.html
+ *   node html2poster.js poster.html --output out.pdf
+ *   node html2poster.js poster.html --width 720px
+ *   node html2poster.js poster.html --width 720px --max-height 8000
+ *
+ * What it does (in order):
+ *   1. Load HTML in Playwright
+ *   2. Force overflow:hidden on .poster/.page containers (clip decorative overflow)
+ *   3. Inject @page { margin: 0 } (override any existing margin)
+ *   4. Ensure html/body have margin:0, padding:0, matching background
+ *   5. Measure .poster scrollHeight (actual content height)
+ *   6. Generate single-page PDF with exact dimensions
+ *
+ * What it does NOT do:
+ *   - No pagination / page breaks
+ *   - No A4 fallback
+ *   - No margin injection (always zero)
+ *   - No cover adaptation
+ *   - No pdf-lib post-processing
+ *   - No continuous-canvas detection
+ *   - No vertical overflow expansion (posters WANT overflow:hidden)
+ */
+
+const fs = require('fs');
+const path = require('path');
+const { spawnSync } = require('child_process');
+
+// ── Chromium resolution (shared logic with html2pdf-next.js) ──
+
+function resolveChromium(chromiumObj) {
+  let exe;
+  try { exe = chromiumObj.executablePath(); } catch (_) { exe = null; }
+  if (exe && fs.existsSync(exe)) return { status: 'ok', executablePath: exe };
+
+  const candidates = [
+    '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+    '/Applications/Chromium.app/Contents/MacOS/Chromium',
+    '/usr/bin/chromium-browser', '/usr/bin/chromium', '/usr/bin/google-chrome',
+  ];
+  if (process.env.PLAYWRIGHT_CHROMIUM_PATH) candidates.unshift(process.env.PLAYWRIGHT_CHROMIUM_PATH);
+
+  for (const c of candidates) {
+    if (fs.existsSync(c)) return { status: 'fallback', executablePath: c };
+  }
+  return { status: 'missing', executablePath: exe || '' };
+}
+
+// ── CLI parsing ──
+
+function parseArgs(argv) {
+  const tokens = argv.slice(2);
+  let input = null, output = null, width = '720px', maxHeight = 16000;
+
+  for (let i = 0; i < tokens.length; i++) {
+    const t = tokens[i];
+    if (t === '--output' || t === '-o') output = tokens[++i];
+    else if (t === '--width') width = tokens[++i];
+    else if (t === '--max-height') maxHeight = parseInt(tokens[++i], 10);
+    else if (t === '--help' || t === '-h') {
+      console.log(`
+Usage: node html2poster.js <input.html> [options]
+
+Options:
+  --output, -o    Output PDF path (default: input with .pdf extension)
+  --width         Poster width (default: 720px)
+  --max-height    Maximum allowed height in px (default: 16000, safety limit)
+  -h, --help      Show this help
+`);
+      process.exit(0);
+    }
+    else if (!input) input = t;
+    else if (!output) output = t;
+  }
+
+  if (!input) {
+    console.error('Error: No input HTML file specified.');
+    process.exit(1);
+  }
+
+  if (!output) {
+    output = input.replace(/\.html?$/i, '.pdf');
+    if (output === input) output = input + '.pdf';
+  }
+
+  return { input, output, width, maxHeight };
+}
+
+// ── Main ──
+
+async function main() {
+  const { input, output, width, maxHeight } = parseArgs(process.argv);
+  const absIn = path.resolve(input);
+  const absOut = path.resolve(output);
+
+  if (!fs.existsSync(absIn)) {
+    console.error(`Error: File not found: ${absIn}`);
+    process.exit(1);
+  }
+
+  console.log(`\n🖼  html2poster — Single-page poster PDF generator`);
+  console.log(`   Input:  ${absIn}`);
+  console.log(`   Output: ${absOut}`);
+  console.log(`   Width:  ${width}`);
+
+  // Load Playwright
+  let playwright;
+  try {
+    playwright = require('playwright');
+  } catch {
+    try {
+      playwright = require('playwright-core');
+    } catch {
+      console.error('Error: playwright or playwright-core not installed.');
+      process.exit(1);
+    }
+  }
+
+  const { chromium } = playwright;
+  const bInfo = resolveChromium(chromium);
+
+  if (bInfo.status === 'missing') {
+    console.error('Error: No Chromium found. Run: npx playwright install chromium');
+    process.exit(1);
+  }
+  if (bInfo.status === 'fallback') {
+    console.log(`   ⚠ Using fallback Chromium: ${bInfo.executablePath}`);
+  }
+
+  // Launch browser
+  const launchOpts = { headless: true };
+  if (bInfo.status === 'fallback') launchOpts.executablePath = bInfo.executablePath;
+
+  const browser = await chromium.launch(launchOpts);
+
+  try {
+    // Use a wide viewport so content doesn't wrap unexpectedly
+    const widthPx = parseInt(width, 10) || 720;
+    const page = await browser.newPage({ viewport: { width: widthPx, height: 1200 } });
+
+    await page.goto('file://' + absIn, { waitUntil: 'networkidle' });
+    console.log(`\n   ✓ HTML loaded`);
+
+    // ── Step 1: Force overflow:hidden on page containers ──
+    // Decorative elements with negative offsets or width>100% inflate scrollWidth,
+    // causing Playwright to shrink content to fit. overflow:hidden clips them.
+    const overflowFixed = await page.evaluate(() => {
+      const selectors = ['.poster', '.page', '#poster', '#page'];
+      let fixed = 0;
+      for (const sel of selectors) {
+        const el = document.querySelector(sel);
+        if (!el) continue;
+        const computed = getComputedStyle(el);
+        if (computed.overflow !== 'hidden') {
+          el.style.overflow = 'hidden';
+          fixed++;
+        }
+      }
+      return fixed;
+    });
+    if (overflowFixed > 0) {
+      console.log(`   ✓ Added overflow:hidden to ${overflowFixed} container(s)`);
+    }
+
+    // ── Step 2: Inject @page { margin: 0 } — override any existing @page rule ──
+    await page.evaluate(() => {
+      const s = document.createElement('style');
+      // Use !important-equivalent: place at end so it wins cascade
+      s.textContent = `@page { margin: 0 !important; size: auto; }`;
+      document.head.appendChild(s);
+    });
+
+    // ── Step 3: Ensure html/body have zero margin/padding ──
+    const bgSync = await page.evaluate(() => {
+      const html = document.documentElement;
+      const body = document.body;
+      html.style.margin = '0';
+      html.style.padding = '0';
+      body.style.margin = '0';
+      body.style.padding = '0';
+
+      // Sync body background with poster background to avoid color gaps
+      const poster = document.querySelector('.poster') || document.querySelector('.page');
+      if (poster) {
+        const posterBg = getComputedStyle(poster).backgroundColor;
+        if (posterBg && posterBg !== 'rgba(0, 0, 0, 0)' && posterBg !== 'transparent') {
+          body.style.backgroundColor = posterBg;
+          html.style.backgroundColor = posterBg;
+          return posterBg;
+        }
+      }
+      return null;
+    });
+    if (bgSync) {
+      console.log(`   ✓ Synced body background: ${bgSync}`);
+    }
+
+    // ── Step 4: Measure actual content height ──
+    const measurement = await page.evaluate(() => {
+      const poster = document.querySelector('.poster') || document.querySelector('.page') || document.body;
+      return {
+        scrollHeight: poster.scrollHeight,
+        scrollWidth: poster.scrollWidth,
+        offsetWidth: poster.offsetWidth,
+        selector: poster.className ? '.' + poster.className.split(' ')[0] : poster.tagName,
+      };
+    });
+
+    console.log(`   ✓ Measured: ${measurement.selector} = ${measurement.scrollWidth}×${measurement.scrollHeight}px`);
+
+    if (measurement.scrollWidth > widthPx + 2) {
+      console.log(`   ⚠ WARNING: scrollWidth (${measurement.scrollWidth}px) > width (${widthPx}px)`);
+      console.log(`     Decorative elements may still overflow. Check for position:absolute elements with negative offsets.`);
+    }
+
+    let contentHeight = measurement.scrollHeight;
+    if (contentHeight > maxHeight) {
+      console.log(`   ⚠ Content height ${contentHeight}px exceeds max ${maxHeight}px, clamping.`);
+      contentHeight = maxHeight;
+    }
+    if (contentHeight < 100) {
+      console.log(`   ⚠ Content height ${contentHeight}px seems too small, using 960px fallback.`);
+      contentHeight = 960;
+    }
+
+    // ── Step 5: Generate PDF ──
+    console.log(`\n   📄 Generating PDF: ${width} × ${contentHeight}px`);
+    await page.pdf({
+      path: absOut,
+      width: width,
+      height: contentHeight + 'px',
+      printBackground: true,
+      margin: { top: '0', right: '0', bottom: '0', left: '0' },
+    });
+
+    console.log(`\n   ✅ Done: ${absOut}`);
+    console.log(`      Size: ${(fs.statSync(absOut).size / 1024).toFixed(1)} KB`);
+
+  } finally {
+    await browser.close();
+  }
+}
+
+main().catch(err => {
+  console.error(`\n✗ Fatal: ${err.message}`);
+  process.exit(1);
+});
--- a/skills/pdf/scripts/pdf.py
+++ b/skills/pdf/scripts/pdf.py
--- a/skills/pdf/scripts/pdf_qa.py
+++ b/skills/pdf/scripts/pdf_qa.py
@@ -0,0 +1,901 @@
+#!/usr/bin/env python3
+"""
+PDF Quality Assurance Checker
+=============================
+Automatically detects common typesetting issues in PDFs.
+
+Usage: python3 pdf_qa.py <pdf_path>
+
+Checks:
+  1. Page size consistency across all pages
+  2. Blank page detection
+  3. CJK punctuation placement (line-start/end forbidden punctuation)
+  4. Color analysis (informational only — counts and lists colors)
+  5. Font embedding check (warns on non-embedded fonts)
+  6. PDF metadata check (title/author/creator)
+  7. Content overflow detection (text exceeding page boundaries)
+  8. Content fill ratio per page (multi-page docs, warns if < 40%)
+  9. Cover/poster full-bleed check (background extends to page edges)
+ 10. Margin symmetry check (left/right text margins)
+ 11. Table centering check (if detected)
+ 12. Formula overflow check (optional)
+"""
+
+import sys
+import os
+import re
+import json
+from collections import Counter
+
+try:
+    import pymupdf  # PyMuPDF
+except ImportError:
+    import fitz as pymupdf
+
+# ============================================================
+# Config
+# ============================================================
+
+# CJK punctuation forbidden at line start
+LINE_START_FORBIDDEN = set(
+    "。、，；：！？）】〛〉」』"
+    "\u201c\u201d"  # "" curly double quotes
+    "\u2026"        # … ellipsis
+    "\u2014"        # — em dash
+    "\uff5e"        # ～ fullwidth tilde
+    "\u00b7"        # · middle dot
+)
+
+# CJK punctuation forbidden at line end
+LINE_END_FORBIDDEN = set(
+    "（【《〈「"
+    "\u2018\u2019"  # '' curly single quotes
+    "\u201c"        # " left curly double quote
+)
+
+# Minimum fill ratio for last page (DISABLED — caused false positives)
+# LAST_PAGE_MIN_FILL = 0.40
+
+# Maximum allowed color count — REMOVED (color count is now info-only)
+# MAX_COLORS = 8
+
+# ============================================================
+# Checks
+# ============================================================
+
+class QAResult:
+    def __init__(self):
+        self.issues = []     # (severity, category, message)
+        self.passes = []     # passed checks
+        self.info = []       # informational
+    
+    def error(self, cat, msg):
+        self.issues.append(('ERROR', cat, msg))
+    
+    def warn(self, cat, msg):
+        self.issues.append(('WARN', cat, msg))
+    
+    def ok(self, msg):
+        self.passes.append(msg)
+    
+    def add_info(self, msg):
+        self.info.append(msg)
+
+
+def check_last_page_fill(doc, result):
+    """Check content fill ratio of the last page"""
+    if len(doc) < 2:
+        result.ok("Single-page document, no last-page blank check needed")
+        return
+    
+    last_page = doc[-1]
+    page_rect = last_page.rect
+    page_area = page_rect.width * page_rect.height
+    
+    # Get bounding boxes of all content on last page
+    blocks = last_page.get_text("blocks")
+    if not blocks:
+        result.error("Last page blank", f"Page {len(doc)} (last page) has no content at all!")
+        return
+    
+    # Calculate max y-coordinate covered by content
+    max_y = 0
+    min_y = page_rect.height
+    for b in blocks:
+        if b[4].strip():  # Has text content
+            min_y = min(min_y, b[1])
+            max_y = max(max_y, b[3])
+    
+    if max_y == 0:
+        result.error("Last page blank", f"Page {len(doc)} (last page) has no valid text content")
+        return
+    
+    content_height = max_y - min_y
+    fill_ratio = content_height / page_rect.height
+    
+    result.add_info(f"Last page fill ratio: {fill_ratio:.0%} (content height {content_height:.0f}px / page height {page_rect.height:.0f}px)")
+    
+    if fill_ratio < 0.25:
+        result.error("Last page blank", f"Last page fill ratio only {fill_ratio:.0%}, mostly blank! Consider compressing preceding page spacing or trimming content")
+    elif fill_ratio < LAST_PAGE_MIN_FILL:
+        result.warn("Last page blank", f"Last page fill ratio {fill_ratio:.0%}, somewhat sparse — optimization recommended")
+    else:
+        result.ok(f"Last page fill ratio {fill_ratio:.0%} ✓")
+
+
+def check_punctuation(doc, result):
+    """Check CJK punctuation placement rules"""
+    violations = []
+    
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        # Extract text by line
+        text_dict = page.get_text("dict")
+        
+        for block in text_dict.get("blocks", []):
+            if block.get("type") != 0:  # Only check text blocks
+                continue
+            for line in block.get("lines", []):
+                line_text = ""
+                for span in line.get("spans", []):
+                    line_text += span.get("text", "")
+                
+                line_text = line_text.strip()
+                if not line_text:
+                    continue
+                
+                # Check line start
+                first_char = line_text[0]
+                if first_char in LINE_START_FORBIDDEN:
+                    violations.append((page_num + 1, f"Forbidden line-start punctuation '{first_char}': ...{line_text[:30]}"))
+                
+                # Check line end
+                last_char = line_text[-1] if len(line_text) > 0 else ''
+                if last_char in LINE_END_FORBIDDEN:
+                    violations.append((page_num + 1, f"Forbidden line-end punctuation '{last_char}': {line_text[-30:]}..."))
+    
+    if violations:
+        # Show at most 10
+        shown = violations[:10]
+        for page_num, desc in shown:
+            result.warn("Punctuation rules", f"Page {page_num} - {desc}")
+        if len(violations) > 10:
+            result.warn("Punctuation rules", f"...{len(violations) - 10} more violations")
+    else:
+        result.ok("Punctuation placement check passed ✓")
+
+
+def check_blank_pages(doc, result):
+    """Check for completely blank pages"""
+    blank_pages = []
+    for i in range(len(doc)):
+        page = doc[i]
+        text = page.get_text().strip()
+        # Also check for images
+        images = page.get_images()
+        drawings = page.get_drawings()
+        
+        if not text and not images and not drawings:
+            blank_pages.append(i + 1)
+    
+    if blank_pages:
+        result.error("Blank pages", f"Found blank pages: {blank_pages}")
+    else:
+        result.ok("No blank pages ✓")
+
+
+def check_colors(doc, result):
+    """Analyze colors used in the document (informational only, no pass/fail)"""
+    colors = set()
+    
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        text_dict = page.get_text("dict")
+        
+        for block in text_dict.get("blocks", []):
+            if block.get("type") != 0:
+                continue
+            for line in block.get("lines", []):
+                for span in line.get("spans", []):
+                    color = span.get("color", 0)
+                    if color != 0:  # Exclude pure black
+                        r = (color >> 16) & 0xFF
+                        g = (color >> 8) & 0xFF
+                        b = color & 0xFF
+                        hex_color = f"#{r:02x}{g:02x}{b:02x}"
+                        colors.add(hex_color)
+        
+        # Check drawing colors
+        drawings = page.get_drawings()
+        for d in drawings:
+            if d.get("color"):
+                c = d["color"]
+                if isinstance(c, (tuple, list)) and len(c) >= 3:
+                    hex_color = f"#{int(c[0]*255):02x}{int(c[1]*255):02x}{int(c[2]*255):02x}"
+                    colors.add(hex_color)
+            if d.get("fill"):
+                c = d["fill"]
+                if isinstance(c, (tuple, list)) and len(c) >= 3:
+                    hex_color = f"#{int(c[0]*255):02x}{int(c[1]*255):02x}{int(c[2]*255):02x}"
+                    colors.add(hex_color)
+    
+    # Filter out near-black/white/gray colors
+    distinct_colors = []
+    for c in colors:
+        r = int(c[1:3], 16)
+        g = int(c[3:5], 16)
+        b = int(c[5:7], 16)
+        max_diff = max(abs(r-g), abs(g-b), abs(r-b))
+        if max_diff > 20:
+            distinct_colors.append(c)
+    
+    result.add_info(f"Total text colors: {len(colors)} (chromatic: {len(distinct_colors)})")
+    
+    if distinct_colors:
+        result.add_info(f"Chromatic colors: {', '.join(sorted(distinct_colors)[:10])}")
+
+
+def check_page_size_consistency(doc, result):
+    """Check whether all page sizes are consistent"""
+    if len(doc) < 2:
+        result.ok("Single-page document, size consistent ✓")
+        return
+    
+    sizes = set()
+    for i in range(len(doc)):
+        page = doc[i]
+        w = round(page.rect.width, 1)
+        h = round(page.rect.height, 1)
+        sizes.add((w, h))
+    
+    if len(sizes) > 1:
+        result.warn("Page size", f"Inconsistent page sizes: {sizes}")
+    else:
+        size = list(sizes)[0]
+        # Convert to mm
+        w_mm = size[0] * 25.4 / 72
+        h_mm = size[1] * 25.4 / 72
+        result.add_info(f"Page size: {w_mm:.0f}mm × {h_mm:.0f}mm ({len(doc)} pages)")
+        result.ok("Page size consistent ✓")
+
+
+def check_text_overflow(doc, result):
+    """Check whether text overflows page boundaries"""
+    overflow_pages = []
+    
+    for i in range(len(doc)):
+        page = doc[i]
+        rect = page.rect
+        blocks = page.get_text("blocks")
+        
+        for b in blocks:
+            # b = (x0, y0, x1, y1, text, block_no, block_type)
+            if b[2] > rect.width + 2 or b[3] > rect.height + 2:  # 2px tolerance
+                overflow_pages.append(i + 1)
+                break
+            if b[0] < -2 or b[1] < -2:
+                overflow_pages.append(i + 1)
+                break
+    
+    if overflow_pages:
+        result.warn("Content overflow", f"Pages {overflow_pages} may have content exceeding page boundaries")
+    else:
+        result.ok("No content overflow ✓")
+
+
+def check_content_fill_ratio(doc, result):
+    """Check content fill ratio per page — warns when content is crammed at top leaving large void below.
+    
+    Rules:
+    - Skip single-page documents (may be intentional design)
+    - Skip page 1 (usually cover with intentional whitespace)
+    - Middle pages: warn if fill ratio < 40%
+    - Last page: warn if fill ratio < 25% (naturally has less content)
+    """
+    if len(doc) < 2:
+        result.ok("Single-page document, skipping content fill ratio check ✓")
+        return
+    
+    low_fill_pages = []
+    
+    for i in range(len(doc)):
+        page = doc[i]
+        page_rect = page.rect
+        page_height = page_rect.height
+        
+        # Skip page 1 (cover)
+        if i == 0:
+            continue
+        
+        blocks = page.get_text("blocks")
+        images = page.get_images()
+        drawings = page.get_drawings()
+        
+        if not blocks and not images and not drawings:
+            continue  # Blank page check handles this
+        
+        # Calculate content bbox
+        max_y = 0
+        for b in blocks:
+            if b[4].strip():
+                max_y = max(max_y, b[3])
+        
+        # Include images in bbox
+        for img in images:
+            try:
+                img_rects = page.get_image_rects(img[0])
+                for r in img_rects:
+                    max_y = max(max_y, r.y1)
+            except Exception:
+                pass
+        
+        if max_y == 0:
+            continue
+        
+        fill_ratio = max_y / page_height
+        is_last = (i == len(doc) - 1)
+        threshold = 0.25 if is_last else 0.40
+        
+        if fill_ratio < threshold:
+            low_fill_pages.append((i + 1, fill_ratio, threshold))
+    
+    if low_fill_pages:
+        for pg, ratio, thresh in low_fill_pages:
+            result.warn(
+                "Content fill ratio",
+                f"Page {pg} content only fills {ratio:.0%} of page height "
+                f"(threshold: {thresh:.0%}). Content may be crammed at the top "
+                f"with a large blank area below."
+            )
+    else:
+        result.ok("Content fill ratio adequate on all pages ✓")
+
+
+def check_cover_bleed(doc, result, poster=False):
+    """Check if the cover page (page 1) fills the entire page area (full-bleed).
+
+    A properly designed cover should have background color/graphics extending
+    to the page edges. If the content bbox has significant margins on all sides,
+    the cover likely wasn't rendered full-bleed (e.g. ReportLab with default margins).
+
+    For poster mode: checks ALL pages (not just the cover) since every page of a
+    seamlessly-paginated poster should have consistent background fill.
+
+    Strategy: combine bounding boxes of drawings (rects, paths), images, and colored
+    backgrounds. If the union bbox leaves > 5% margin on any side, warn.
+    """
+    if not poster and len(doc) < 2:
+        # Single page doc (non-poster) — not necessarily a cover scenario
+        return
+
+    pages_to_check = range(len(doc)) if poster else [0]
+    
+    for page_idx in pages_to_check:
+        page = doc[page_idx]
+        page_rect = page.rect
+        pw, ph = page_rect.width, page_rect.height
+
+        # Collect all content bounding boxes
+        min_x, min_y = pw, ph
+        max_x, max_y = 0.0, 0.0
+        has_content = False
+
+        # 1. Drawings (vector paths, rectangles — typical for colored backgrounds)
+        for d in page.get_drawings():
+            r = d.get("rect")
+            if r:
+                min_x = min(min_x, r.x0)
+                min_y = min(min_y, r.y0)
+                max_x = max(max_x, r.x1)
+                max_y = max(max_y, r.y1)
+                has_content = True
+
+        # 2. Images
+        for img in page.get_images():
+            try:
+                for r in page.get_image_rects(img[0]):
+                    min_x = min(min_x, r.x0)
+                    min_y = min(min_y, r.y0)
+                    max_x = max(max_x, r.x1)
+                    max_y = max(max_y, r.y1)
+                    has_content = True
+            except Exception:
+                pass
+
+        page_label = f"Page {page_idx + 1}" if poster else "Cover page (p1)"
+
+        if not has_content:
+            blocks = page.get_text("blocks")
+            if blocks:
+                result.warn(
+                    f"{page_label} not full-bleed",
+                    f"{page_label} has no background graphics (no filled rectangles or images). "
+                    "A proper cover/poster page should have a full-page background color or image "
+                    "extending to all edges."
+                )
+            continue
+
+        # Calculate margin ratios (how far content is from page edges)
+        margin_left = max(0, min_x) / pw
+        margin_top = max(0, min_y) / ph
+        margin_right = max(0, pw - max_x) / pw
+        margin_bottom = max(0, ph - max_y) / ph
+
+        threshold = 0.05
+        margins_ok = (margin_left <= threshold and margin_top <= threshold and
+                      margin_right <= threshold and margin_bottom <= threshold)
+
+        if margins_ok:
+            result.ok(f"{page_label} content extends to page edges (full-bleed) ✓")
+        else:
+            sides = []
+            if margin_left > threshold:
+                sides.append(f"left {margin_left:.0%}")
+            if margin_top > threshold:
+                sides.append(f"top {margin_top:.0%}")
+            if margin_right > threshold:
+                sides.append(f"right {margin_right:.0%}")
+            if margin_bottom > threshold:
+                sides.append(f"bottom {margin_bottom:.0%}")
+            result.warn(
+                f"{page_label} not full-bleed",
+                f"{page_label} has visible margins: {', '.join(sides)}. "
+                f"Background/graphics should extend to page edges."
+            )
+
+
+def check_margin_symmetry(doc, result, skip_cover=False):
+    """Check left/right margin symmetry using text block bounds."""
+    warn_pages = []
+
+    for page_num in range(len(doc)):
+        if skip_cover and page_num == 0:
+            continue
+
+        page = doc[page_num]
+        blocks = page.get_text("blocks")
+        text_blocks = [b for b in blocks if b[4].strip()]
+
+        if len(text_blocks) < 3:
+            continue  # Skip decorative/cover-like pages
+
+        left_margin = min(b[0] for b in text_blocks)
+        right_margin = page.rect.width - max(b[2] for b in text_blocks)
+        diff = abs(left_margin - right_margin)
+
+        if diff > page.rect.width * 0.05:
+            warn_pages.append((page_num + 1, left_margin, right_margin, diff))
+
+    if warn_pages:
+        for pg, left, right, diff in warn_pages:
+            result.warn(
+                "Margin symmetry",
+                f"Page {pg} left/right margins differ by {diff:.0f}pt "
+                f"(L {left:.0f}pt, R {right:.0f}pt)"
+            )
+    else:
+        result.ok("Left/right margins appear symmetric \u2713")
+
+
+def check_table_centering(doc, result):
+    """Check if detected table regions are centered."""
+    def _bbox_intersects(a, b, tol=6):
+        return not (a[2] < b[0] - tol or a[0] > b[2] + tol or
+                    a[3] < b[1] - tol or a[1] > b[3] + tol)
+
+    def _rect_tuple(r):
+        if hasattr(r, "x0"):
+            return (r.x0, r.y0, r.x1, r.y1)
+        return (r[0], r[1], r[2], r[3])
+
+    any_tables = False
+
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        drawings = page.get_drawings()
+        segments = []
+
+        for d in drawings:
+            for item in d.get("items", []):
+                if not item:
+                    continue
+                op = item[0]
+                if op == "l" and len(item) >= 3:
+                    p0, p1 = item[1], item[2]
+                    segments.append((p0[0], p0[1], p1[0], p1[1]))
+                elif op == "re" and len(item) >= 2:
+                    x0, y0, x1, y1 = _rect_tuple(item[1])
+                    segments.extend([
+                        (x0, y0, x1, y0),
+                        (x0, y1, x1, y1),
+                        (x0, y0, x0, y1),
+                        (x1, y0, x1, y1),
+                    ])
+
+        if not segments:
+            continue
+
+        cluster_list = []
+        for x0, y0, x1, y1 in segments:
+            min_x, max_x = min(x0, x1), max(x0, x1)
+            min_y, max_y = min(y0, y1), max(y0, y1)
+            bbox = (min_x, min_y, max_x, max_y)
+            is_h = abs(y0 - y1) < 1 and (max_x - min_x) > 20
+            is_v = abs(x0 - x1) < 1 and (max_y - min_y) > 20
+            if not is_h and not is_v:
+                continue
+
+            placed = False
+            for cl in cluster_list:
+                if _bbox_intersects(bbox, cl["bbox"]):
+                    cl["segments"].append((x0, y0, x1, y1, is_h, is_v))
+                    cl["bbox"] = (
+                        min(cl["bbox"][0], bbox[0]),
+                        min(cl["bbox"][1], bbox[1]),
+                        max(cl["bbox"][2], bbox[2]),
+                        max(cl["bbox"][3], bbox[3]),
+                    )
+                    if is_h:
+                        cl["h"] += 1
+                    if is_v:
+                        cl["v"] += 1
+                    placed = True
+                    break
+            if not placed:
+                cluster_list.append({
+                    "bbox": bbox,
+                    "segments": [(x0, y0, x1, y1, is_h, is_v)],
+                    "h": 1 if is_h else 0,
+                    "v": 1 if is_v else 0,
+                })
+
+        for cl in cluster_list:
+            if cl["h"] < 2 or cl["v"] < 2:
+                continue
+            any_tables = True
+            bbox = cl["bbox"]
+            page_width = page.rect.width
+            left_margin = bbox[0]
+            right_margin = page_width - bbox[2]
+            if abs(left_margin - right_margin) > page_width * 0.05:
+                result.warn(
+                    "Table centering",
+                    f"Page {page_num + 1}: Table not centered "
+                    f"(L {left_margin:.0f}pt, R {right_margin:.0f}pt)"
+                )
+
+    if any_tables:
+        result.ok("Table centering check complete \u2713")
+
+
+def check_font_embedding(doc, result):
+    """Check font embedding status using PyMuPDF font list."""
+    fonts_used = set()
+    non_embedded = set()
+
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        for font in page.get_fonts():
+            basefont = font[3] if len(font) > 3 else "unknown"
+            ext = font[1] if len(font) > 1 else ""
+            fonts_used.add(basefont)
+            if not ext:
+                non_embedded.add(basefont)
+
+    if fonts_used:
+        result.add_info(f"Fonts used: {', '.join(sorted(fonts_used))}")
+    else:
+        result.add_info("Fonts used: (none detected)")
+
+    if non_embedded:
+        for basefont in sorted(non_embedded):
+            result.warn(
+                "Font embedding",
+                f"Font {basefont} is not embedded. May display differently on other systems."
+            )
+    else:
+        result.ok("All fonts are embedded \u2713")
+
+
+def check_helvetica_in_cjk(doc, result):
+    """Detect Helvetica rendering visible text in documents containing CJK text.
+
+    Helvetica is a Latin-only built-in PDF font. When it appears rendering
+    actual text content in a CJK document, it almost always means a raw string
+    was passed to a ReportLab Table or flowable without wrapping it in
+    Paragraph() with a CJK font. The CJK characters rendered via Helvetica
+    become garbled (fall back to ZapfDingbats symbols).
+
+    We only check Helvetica (not ZapfDingbats) because ZapfDingbats is
+    legitimately used for bullet symbols in list items.
+
+    We check actual rendered text spans (not just font presence in font list)
+    because ReportLab internally registers Helvetica on every page even when
+    only CJK fonts are used in visible content.
+    """
+    has_cjk = False
+    helvetica_pages = []
+
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        text = page.get_text("text") or ""
+
+        # Check if document contains CJK characters
+        if not has_cjk:
+            for ch in text:
+                if '\u4e00' <= ch <= '\u9fff' or '\u3400' <= ch <= '\u4dbf':
+                    has_cjk = True
+                    break
+
+        # Check if Helvetica is actually used to render visible text on this page
+        blocks = page.get_text("dict", sort=True).get("blocks", [])
+        found_on_page = False
+        for block in blocks:
+            if found_on_page:
+                break
+            for line in block.get("lines", []):
+                if found_on_page:
+                    break
+                for span in line.get("spans", []):
+                    font = span.get("font", "")
+                    txt = span.get("text", "").strip()
+                    if "Helvetica" in font and len(txt) > 0:
+                        helvetica_pages.append(page_num + 1)
+                        found_on_page = True
+                        break
+
+    if has_cjk and helvetica_pages:
+        pages_str = ', '.join(str(p) for p in helvetica_pages[:5])
+        if len(helvetica_pages) > 5:
+            pages_str += f' ...and {len(helvetica_pages) - 5} more'
+        result.warn(
+            "Helvetica in CJK document",
+            f"Helvetica font detected rendering text on page(s) {pages_str} in a CJK document. "
+            f"This usually means a raw string was passed to a ReportLab Table or flowable "
+            f"without wrapping in Paragraph(text, style) with a CJK-capable font. "
+            f"CJK characters rendered via Helvetica will appear as garbled symbols."
+        )
+
+
+def check_metadata(doc, result):
+    """Check PDF metadata presence for title, author, creator."""
+    meta = doc.metadata or {}
+
+    def _missing(v):
+        if v is None:
+            return True
+        if not str(v).strip():
+            return True
+        return False
+
+    title = meta.get("title")
+    author = meta.get("author")
+    creator = meta.get("creator")
+
+    if _missing(title) or str(title).strip().lower() in ("untitled", "(anonymous)"):
+        result.warn("Metadata", "Missing/invalid title metadata")
+    else:
+        result.ok("Title metadata present \u2713")
+
+    if _missing(author):
+        result.warn("Metadata", "Missing author metadata")
+    else:
+        result.ok("Author metadata present \u2713")
+
+    if _missing(creator):
+        result.warn("Metadata", "Missing creator metadata")
+    else:
+        result.ok("Creator metadata present \u2713")
+
+
+def check_toc_without_cover(doc, result):
+    """Detect TOC on page 1 without a preceding cover page.
+    
+    If the first page contains Table of Contents / 目录, it means the document
+    has a TOC but no cover page. This is a structural issue — documents with
+    TOC should have: Cover (p1) → TOC (p2) → Content (p3+).
+    """
+    if len(doc) < 2:
+        # Single-page docs don't need TOC/cover checks
+        return
+    
+    page1 = doc[0]
+    text = page1.get_text("text", sort=True).strip()
+    
+    # Normalize for matching
+    text_lower = text.lower()
+    first_300 = text_lower[:300]
+    
+    toc_keywords = [
+        "table of contents", "contents",
+        "目录", "目 录",
+    ]
+    
+    has_toc = any(kw in first_300 for kw in toc_keywords)
+    
+    if has_toc:
+        result.warn(
+            "TOC without cover",
+            "Page 1 appears to be a Table of Contents with no preceding cover page. "
+            "Documents with TOC should have: Cover (p1) → TOC (p2) → Content (p3+)."
+        )
+
+
+def check_formula_overflow(doc, result):
+    """Detect likely formula overflow past right content margin."""
+    math_re = re.compile(r"[=+\-*/<>\u2264\u2265\u2211\u222b\u221a\u03c0\u00b5\u221e\u2202\u2206\u2248\u2260\u00b1\u00d7\u00f7]")
+
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        blocks = page.get_text("blocks")
+        text_blocks = [b for b in blocks if b[4].strip()]
+
+        if len(text_blocks) < 3:
+            continue
+
+        right_edges = sorted(b[2] for b in text_blocks)
+        mid = len(right_edges) // 2
+        content_right = right_edges[mid] if right_edges else 0
+
+        for b in text_blocks:
+            x0, x1, text = b[0], b[2], b[4]
+            if x1 <= content_right + 10:
+                continue
+
+            is_single_line = "\n" not in text.strip()
+            is_wide = (x1 - x0) > page.rect.width * 0.5
+            has_math = bool(math_re.search(text))
+
+            if (is_single_line and is_wide) or has_math:
+                delta = x1 - content_right
+                result.warn(
+                    "Formula overflow",
+                    f"Page {page_num + 1}: Content extends {delta:.0f}pt beyond right content margin "
+                    "(possible formula overflow)"
+                )
+                break
+
+
+# ============================================================
+# Main
+# ============================================================
+
+def run_qa(pdf_path, poster=False, skip_cover=False, check_tables=True, check_formulas=False):
+    result = QAResult()
+    
+    if not os.path.exists(pdf_path):
+        result.error("File", f"File not found: {pdf_path}")
+        return result
+    
+    doc = pymupdf.open(pdf_path)
+    
+    result.add_info(f"File: {os.path.basename(pdf_path)}")
+    result.add_info(f"Size: {os.path.getsize(pdf_path) / 1024:.1f} KB")
+    if poster:
+        result.add_info("Mode: poster (creative)")
+    
+    # Run all checks
+    check_metadata(doc, result)
+    check_page_size_consistency(doc, result)
+    check_blank_pages(doc, result)
+    check_punctuation(doc, result)
+    check_colors(doc, result)
+    check_font_embedding(doc, result)
+    check_helvetica_in_cjk(doc, result)
+    check_text_overflow(doc, result)
+    if not poster:
+        # Content fill ratio is not meaningful for posters — the last page
+        # of a seamlessly-paginated poster naturally has less content.
+        check_content_fill_ratio(doc, result)
+    check_cover_bleed(doc, result, poster=poster)
+    check_margin_symmetry(doc, result, skip_cover=skip_cover)
+    if check_tables:
+        check_table_centering(doc, result)
+    if check_formulas:
+        check_formula_overflow(doc, result)
+    if not poster:
+        check_toc_without_cover(doc, result)
+    
+    doc.close()
+    return result
+
+
+def format_report(result):
+    lines = []
+    lines.append("=" * 56)
+    lines.append("  PDF Quality Assurance Report")
+    lines.append("=" * 56)
+    
+    # Info
+    if result.info:
+        lines.append("")
+        lines.append("ℹ️  Info:")
+        for msg in result.info:
+            lines.append(f"   {msg}")
+    
+    # Passes
+    if result.passes:
+        lines.append("")
+        lines.append(f"✅ Passed ({len(result.passes)}):")
+        for msg in result.passes:
+            lines.append(f"   {msg}")
+    
+    # Issues
+    errors = [(s, c, m) for s, c, m in result.issues if s == 'ERROR']
+    warns = [(s, c, m) for s, c, m in result.issues if s == 'WARN']
+    
+    if errors:
+        lines.append("")
+        lines.append(f"❌ Errors ({len(errors)}):")
+        for _, cat, msg in errors:
+            lines.append(f"   [{cat}] {msg}")
+    
+    if warns:
+        lines.append("")
+        lines.append(f"⚠️  Warnings ({len(warns)}):")
+        for _, cat, msg in warns:
+            lines.append(f"   [{cat}] {msg}")
+    
+    # Summary
+    lines.append("")
+    lines.append("-" * 56)
+    total_issues = len(result.issues)
+    if total_issues == 0:
+        lines.append("🎉 PASS — All checks passed!")
+    elif errors:
+        lines.append(f"💀 FAIL — {len(errors)} error(s), {len(warns)} warning(s)")
+    else:
+        lines.append(f"⚠️  WARN — {len(warns)} warning(s), optimization recommended")
+    lines.append("-" * 56)
+    
+    return "\n".join(lines)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python3 pdf_qa.py <pdf_path>")
+        print("       python3 pdf_qa.py *.pdf  (batch check)")
+        print("Options:")
+        print("  --poster      Poster mode (creative)")
+        print("  --skip-cover  Skip page 1 margin symmetry check")
+        print("  --no-tables   Disable table centering check")
+        print("  --formulas    Enable formula overflow check")
+        sys.exit(1)
+    
+    import glob
+    files = []
+    poster = False
+    skip_cover = False
+    check_tables = True
+    check_formulas = False
+    args = sys.argv[1:]
+    if '--poster' in args:
+        poster = True
+        args.remove('--poster')
+    if '--skip-cover' in args:
+        skip_cover = True
+        args.remove('--skip-cover')
+    if '--no-tables' in args:
+        check_tables = False
+        args.remove('--no-tables')
+    if '--formulas' in args:
+        check_formulas = True
+        args.remove('--formulas')
+    for arg in args:
+        files.extend(glob.glob(arg))
+    
+    if not files:
+        print(f"File not found: {args}")
+        sys.exit(1)
+    
+    for pdf_path in files:
+        result = run_qa(
+            pdf_path,
+            poster=poster,
+            skip_cover=skip_cover,
+            check_tables=check_tables,
+            check_formulas=check_formulas
+        )
+        print(format_report(result))
+        if len(files) > 1:
+            print("\n")
--- a/skills/pdf/scripts/poster_validate.py
+++ b/skills/pdf/scripts/poster_validate.py
--- a/skills/pdf/scripts/setup.sh
+++ b/skills/pdf/scripts/setup.sh
@@ -0,0 +1,269 @@
+#!/usr/bin/env bash
+# ---
+# name: pdf-setup
+# author: Z.AI
+# version: "1.0"
+# description: Environment setup for the PDF skill. Checks and installs all required dependencies.
+# ---
+#
+# Installs only dependencies required by the PDF skill.
+set -euo pipefail
+
+RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
+ok() { echo -e "  ${GREEN}✓${NC} $1"; }
+fail() { echo -e "  ${RED}✗${NC} $1"; }
+warn() { echo -e "  ${YELLOW}○${NC} $1"; }
+info() { echo -e "  ${BLUE}→${NC} $1"; }
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+echo "============================================"
+echo "  PDF Skill — Environment Setup"
+echo "============================================"
+echo ""
+
+# ── Detect platform ──
+OS="$(uname -s)"
+ARCH="$(uname -m)"
+echo "Platform: $OS $ARCH"
+echo ""
+
+# ── 0. macOS: Homebrew ──
+if [ "$OS" = "Darwin" ]; then
+    echo "--- Homebrew (macOS package manager) ---"
+    if command -v brew &>/dev/null; then
+        BREW_VER=$(brew --version 2>/dev/null | head -1)
+        ok "brew ($BREW_VER)"
+    else
+        fail "brew not found — most dependencies below need Homebrew on macOS"
+        info "Install: /bin/bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""
+    fi
+    echo ""
+fi
+
+# ── 1. Python 3 ──
+echo "--- Python ---"
+if command -v python3 &>/dev/null; then
+    PY_VER=$(python3 --version 2>&1)
+    ok "python3 ($PY_VER)"
+    # macOS: warn if using system Python
+    if [ "$OS" = "Darwin" ]; then
+        PY_PATH=$(which python3 2>/dev/null)
+        if [[ "$PY_PATH" == "/usr/bin/python3" ]]; then
+            warn "Using macOS system Python (limited). Recommend: brew install python3"
+        fi
+    fi
+else
+    fail "python3 not found"
+    case "$OS" in
+        Darwin) info "Install: brew install python3" ;;
+        Linux)  info "Install: sudo apt install python3 python3-pip  (Debian/Ubuntu)"
+                info "         sudo dnf install python3 python3-pip  (Fedora/RHEL)" ;;
+        *)      info "Install: https://www.python.org/downloads/" ;;
+    esac
+fi
+
+# ── 2. pip ──
+echo ""
+echo "--- pip ---"
+if python3 -m pip --version &>/dev/null 2>&1; then
+    PIP_VER=$(python3 -m pip --version 2>/dev/null | head -1)
+    ok "pip ($PIP_VER)"
+else
+    fail "pip not found"
+    case "$OS" in
+        Darwin) info "Install: python3 -m ensurepip --upgrade"
+                info "     or: brew install python3 (includes pip)" ;;
+        Linux)  info "Install: sudo apt install python3-pip  (Debian/Ubuntu)" ;;
+        *)      info "Install: python3 -m ensurepip --upgrade" ;;
+    esac
+fi
+
+# ── 3. Python packages (pip) ──
+echo ""
+echo "--- Python Packages ---"
+PY_PKGS=(
+    "pikepdf:pikepdf"
+    "pdfplumber:pdfplumber"
+    "pypdf:pypdf"
+    "reportlab:reportlab"
+    "pymupdf:PyMuPDF"
+)
+
+MISSING_PY=()
+for entry in "${PY_PKGS[@]}"; do
+    mod="${entry%%:*}"
+    pkg="${entry##*:}"
+    if python3 -c "import $mod" 2>/dev/null; then
+        ver=$(python3 -c "import $mod; print(getattr($mod, '__version__', 'installed'))" 2>/dev/null)
+        ok "$pkg ($ver)"
+    else
+        fail "$pkg not installed"
+        MISSING_PY+=("$pkg")
+    fi
+done
+
+if [ ${#MISSING_PY[@]} -gt 0 ]; then
+    echo ""
+    if [ -t 0 ]; then
+        read -p "  Install missing Python packages? [Y/n] " -n 1 -r REPLY
+        echo ""
+        REPLY=${REPLY:-Y}
+    else
+        warn "Non-interactive mode — skipping auto-install. Run interactively or install manually."
+        REPLY=N
+    fi
+    if [[ ! $REPLY =~ ^[Nn]$ ]]; then
+        python3 -m pip install -q "${MISSING_PY[@]}" 2>/dev/null \
+            || python3 -m pip install -q --user "${MISSING_PY[@]}" 2>/dev/null \
+            || python3 -m pip install -q --break-system-packages "${MISSING_PY[@]}" 2>/dev/null \
+            || { fail "pip install failed. Try manually: pip install ${MISSING_PY[*]}"; }
+        ok "Installed: ${MISSING_PY[*]}"
+    fi
+fi
+
+# ── 4. Node.js ──
+echo ""
+echo "--- Node.js ---"
+if command -v node &>/dev/null; then
+    NODE_VER=$(node --version)
+    ok "node ($NODE_VER)"
+else
+    fail "node not found"
+    case "$OS" in
+        Darwin) info "Install: brew install node" ;;
+        Linux)  info "Install: curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -"
+                info "         sudo apt install -y nodejs" ;;
+        *)      info "Install: https://nodejs.org/" ;;
+    esac
+fi
+
+# ── 5. npm ──
+echo ""
+echo "--- npm ---"
+if command -v npm &>/dev/null; then
+    NPM_VER=$(npm --version 2>/dev/null)
+    ok "npm ($NPM_VER)"
+else
+    fail "npm not found"
+    case "$OS" in
+        Darwin) info "Install: brew install node (includes npm)" ;;
+        Linux)  info "Install: comes with nodejs" ;;
+        *)      info "Install: https://nodejs.org/" ;;
+    esac
+fi
+
+# ── 6. Playwright + Chromium ──
+echo ""
+echo "--- Playwright (HTML→PDF engine) ---"
+if node -e "require('playwright')" 2>/dev/null; then
+    PW_VER=$(node -e "console.log(require('playwright/package.json').version)" 2>/dev/null)
+    ok "playwright ($PW_VER)"
+else
+    fail "playwright not installed"
+    info "Install: npm install -g playwright"
+fi
+
+# Check Chromium
+if [ "$OS" = "Darwin" ]; then
+    PW_CACHE="$HOME/Library/Caches/ms-playwright"
+else
+    PW_CACHE="$HOME/.cache/ms-playwright"
+fi
+if ls "$PW_CACHE"/chromium-* &>/dev/null 2>&1; then
+    CR_DIR=$(ls -d "$PW_CACHE"/chromium-* 2>/dev/null | tail -1)
+    ok "chromium ($(basename "$CR_DIR"))"
+else
+    fail "chromium not installed"
+    info "Install: npx playwright install chromium"
+    if [ "$OS" = "Linux" ]; then
+        info "         npx playwright install-deps  (system libs, needs sudo)"
+    fi
+fi
+
+# ── 7. Tectonic (LaTeX engine, optional) ──
+echo ""
+echo "--- Tectonic (LaTeX→PDF, optional) ---"
+BUNDLED="$SCRIPT_DIR/tectonic"
+if [ -x "$BUNDLED" ]; then
+    if [ "$OS" = "Darwin" ] && [ "$ARCH" = "arm64" ]; then
+        ok "tectonic (bundled, macOS arm64)"
+    else
+        warn "bundled tectonic is macOS arm64 only — cannot run on $OS $ARCH"
+        if command -v tectonic &>/dev/null; then
+            TEC_VER=$(tectonic --version 2>&1 | head -1)
+            ok "tectonic (system: $TEC_VER)"
+        else
+            fail "tectonic not in PATH"
+            case "$OS" in
+                Darwin) info "Install: brew install tectonic" ;;
+                Linux)  info "Install: conda install -c conda-forge tectonic"
+                        info "     or: curl -fsSL https://drop-sh.fullyjustified.net | sh" ;;
+                MINGW*|MSYS*|CYGWIN*) info "Install: scoop install tectonic  /  choco install tectonic" ;;
+            esac
+        fi
+    fi
+elif command -v tectonic &>/dev/null; then
+    TEC_VER=$(tectonic --version 2>&1 | head -1)
+    ok "tectonic ($TEC_VER)"
+elif [ -x "$HOME/tectonic" ]; then
+    ok "tectonic (~/tectonic)"
+else
+    warn "tectonic not installed (needed only for LaTeX/academic PDFs)"
+    case "$OS" in
+        Darwin) info "Install: brew install tectonic" ;;
+        Linux)  info "Install: conda install -c conda-forge tectonic"
+                info "     or: curl -fsSL https://drop-sh.fullyjustified.net | sh" ;;
+        MINGW*|MSYS*|CYGWIN*) info "Install: scoop install tectonic  /  choco install tectonic" ;;
+    esac
+fi
+
+# ── 8. LibreOffice (optional, for Office→PDF conversion) ──
+echo ""
+echo "--- LibreOffice (optional, Office→PDF) ---"
+if command -v soffice &>/dev/null; then
+    LO_VER=$(soffice --version 2>/dev/null | head -1)
+    ok "libreoffice ($LO_VER)"
+else
+    warn "libreoffice not installed (needed only for .docx/.xlsx→PDF conversion)"
+    case "$OS" in
+        Darwin) info "Install: brew install --cask libreoffice" ;;
+        Linux)  info "Install: sudo apt install libreoffice-core  (Debian/Ubuntu)" ;;
+    esac
+fi
+
+# ── 9. CJK Fonts ──
+echo ""
+echo "--- CJK Fonts ---"
+FONT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)/fonts"
+if [ -d "$FONT_DIR" ]; then
+    FONT_COUNT=$(find "$FONT_DIR" -name "*.ttf" -o -name "*.otf" 2>/dev/null | head -20 | wc -l | tr -d ' ')
+    ok "fonts directory ($FONT_COUNT font files in $FONT_DIR)"
+else
+    warn "no fonts/ directory found — CJK PDFs may have missing glyphs"
+    info "Expected at: $FONT_DIR"
+fi
+# Check system CJK fonts
+if [ "$OS" = "Darwin" ]; then
+    if ls /System/Library/Fonts/PingFang.ttc &>/dev/null 2>&1 \
+       || ls /System/Library/Fonts/STHeiti*.ttc &>/dev/null 2>&1 \
+       || ls "$HOME/Library/Fonts/"*SimHei* &>/dev/null 2>&1; then
+        ok "macOS CJK system fonts available"
+    else
+        warn "no common CJK system fonts found"
+    fi
+elif [ "$OS" = "Linux" ]; then
+    if fc-list :lang=zh 2>/dev/null | head -1 | grep -q .; then
+        ok "system CJK fonts available (fc-list)"
+    else
+        warn "no CJK fonts found. Install: sudo apt install fonts-noto-cjk"
+    fi
+fi
+
+# ── Summary ──
+echo ""
+echo "============================================"
+echo "  Setup complete."
+echo "  Run 'python3 pdf.py env.check' for detailed status."
+echo "  Run 'python3 pdf.py env.fix'   to auto-install Python deps."
+echo "============================================"
--- a/skills/pdf/scripts/toc_validate.py
+++ b/skills/pdf/scripts/toc_validate.py