175 lines
5.6 KiB
JavaScript
175 lines
5.6 KiB
JavaScript
/**
|
|
* Text Formatter Utilities - "Pro" Protocol
|
|
* Sanitizes text before rendering to remove debug noise and HTML entities
|
|
*
|
|
* @module ui/utils/textFormatter
|
|
*/
|
|
|
|
import he from 'he';
|
|
|
|
// ═══════════════════════════════════════════════════════════════
|
|
// SANITIZATION PATTERNS
|
|
// ═══════════════════════════════════════════════════════════════
|
|
|
|
// Debug log patterns to strip
|
|
const DEBUG_PATTERNS = [
|
|
/\d+\s+[A-Z]:\\[^\n]+/g, // Windows paths: "xx E:\path\to\file"
|
|
/\[\d{4}-\d{2}-\d{2}[^\]]+\]/g, // Timestamps: "[2024-01-01 12:00:00]"
|
|
/DEBUG:\s*[^\n]+/gi, // DEBUG: messages
|
|
/^>\s*undefined$/gm, // Stray undefined
|
|
/^\s*at\s+[^\n]+$/gm, // Stack trace lines
|
|
];
|
|
|
|
// HTML entities that commonly appear in AI output
|
|
const ENTITY_MAP = {
|
|
''': "'",
|
|
'"': '"',
|
|
'&': '&',
|
|
'<': '<',
|
|
'>': '>',
|
|
' ': ' ',
|
|
''': "'",
|
|
'/': '/',
|
|
};
|
|
|
|
// ═══════════════════════════════════════════════════════════════
|
|
// CORE SANITIZERS
|
|
// ═══════════════════════════════════════════════════════════════
|
|
|
|
/**
|
|
* Decode HTML entities to clean text
|
|
* @param {string} text - Raw text with possible HTML entities
|
|
* @returns {string} Clean text
|
|
*/
|
|
export function decodeEntities(text) {
|
|
if (!text || typeof text !== 'string') return '';
|
|
|
|
// First pass: common entities via map
|
|
let result = text;
|
|
for (const [entity, char] of Object.entries(ENTITY_MAP)) {
|
|
result = result.replace(new RegExp(entity, 'g'), char);
|
|
}
|
|
|
|
// Second pass: use 'he' library for comprehensive decoding
|
|
try {
|
|
result = he.decode(result);
|
|
} catch (e) {
|
|
// Fallback if he fails
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Strip debug noise from text
|
|
* @param {string} text - Text with possible debug output
|
|
* @returns {string} Clean text without debug noise
|
|
*/
|
|
export function stripDebugNoise(text) {
|
|
if (!text || typeof text !== 'string') return '';
|
|
|
|
let result = text;
|
|
for (const pattern of DEBUG_PATTERNS) {
|
|
result = result.replace(pattern, '');
|
|
}
|
|
|
|
// Clean up resulting empty lines
|
|
result = result.replace(/\n{3,}/g, '\n\n');
|
|
|
|
return result.trim();
|
|
}
|
|
|
|
/**
|
|
* Fix broken list formatting
|
|
* @param {string} text - Text with potentially broken lists
|
|
* @returns {string} Text with fixed list formatting
|
|
*/
|
|
export function fixListFormatting(text) {
|
|
if (!text || typeof text !== 'string') return '';
|
|
|
|
// Fix bullet points that got mangled
|
|
let result = text
|
|
.replace(/•\s*([a-z])/g, '• $1') // Fix stuck bullets
|
|
.replace(/(\d+)\.\s*([a-z])/g, '$1. $2') // Fix numbered lists
|
|
.replace(/:\s*\n\s*•/g, ':\n\n•') // Add spacing before lists
|
|
.replace(/([.!?])\s*•/g, '$1\n\n•'); // Add line break before bullets
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Ensure proper paragraph spacing
|
|
* @param {string} text - Text to process
|
|
* @returns {string} Text with proper paragraph breaks
|
|
*/
|
|
export function ensureParagraphSpacing(text) {
|
|
if (!text || typeof text !== 'string') return '';
|
|
|
|
// Ensure sentences starting new topics get proper breaks
|
|
let result = text
|
|
.replace(/([.!?])\s*([A-Z][a-z])/g, '$1\n\n$2') // New sentence, new paragraph
|
|
.replace(/\n{4,}/g, '\n\n\n'); // Max 3 newlines
|
|
|
|
return result;
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════
|
|
// MAIN PIPELINE
|
|
// ═══════════════════════════════════════════════════════════════
|
|
|
|
/**
|
|
* Full sanitization pipeline for content before rendering
|
|
* @param {string} text - Raw text from AI or system
|
|
* @returns {string} Clean, formatted text ready for display
|
|
*/
|
|
export function cleanContent(text) {
|
|
if (!text || typeof text !== 'string') return '';
|
|
|
|
let result = text;
|
|
|
|
// Step 1: Decode HTML entities
|
|
result = decodeEntities(result);
|
|
|
|
// Step 2: Strip debug noise
|
|
result = stripDebugNoise(result);
|
|
|
|
// Step 3: Fix list formatting
|
|
result = fixListFormatting(result);
|
|
|
|
// Step 4: Normalize whitespace
|
|
result = result.replace(/\r\n/g, '\n').trim();
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Format for single-line display (status messages, etc)
|
|
* @param {string} text - Text to format
|
|
* @param {number} maxLength - Maximum length before truncation
|
|
* @returns {string} Single-line formatted text
|
|
*/
|
|
export function formatSingleLine(text, maxLength = 80) {
|
|
if (!text || typeof text !== 'string') return '';
|
|
|
|
let result = cleanContent(text);
|
|
|
|
// Collapse to single line
|
|
result = result.replace(/\n+/g, ' ').replace(/\s+/g, ' ').trim();
|
|
|
|
// Truncate if needed
|
|
if (result.length > maxLength) {
|
|
result = result.slice(0, maxLength - 3) + '...';
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
export default {
|
|
cleanContent,
|
|
decodeEntities,
|
|
stripDebugNoise,
|
|
fixListFormatting,
|
|
ensureParagraphSpacing,
|
|
formatSingleLine
|
|
};
|