- Add full Telegram bot functionality with Z.AI API integration
- Implement 4 tools: Bash, FileEdit, WebSearch, Git
- Add 3 agents: Code Reviewer, Architect, DevOps Engineer
- Add 6 skills for common coding tasks
- Add systemd service file for 24/7 operation
- Add nginx configuration for HTTPS webhook
- Add comprehensive documentation
- Implement WebSocket server for real-time updates
- Add logging system with Winston
- Add environment validation
🤖 zCode CLI X - Agentic coder with Z.AI + Telegram integration
194 lines
7.3 KiB
JavaScript
194 lines
7.3 KiB
JavaScript
import isFullwidthCodePoint from "is-fullwidth-code-point";
|
|
import { getEndCode } from "./ansiCodes.js";
|
|
import { CC_0, CC_9, CC_BEL, CC_BACKSLASH, CC_C1_ST, CC_ESC, CC_M, CC_CSI, CC_OSC, CC_SEMI, ESCAPES, linkCodePrefix, linkCodePrefixCharCodes, } from "./consts.js";
|
|
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
|
|
function isFullwidthGrapheme(grapheme, baseCodePoint) {
|
|
if (isFullwidthCodePoint(baseCodePoint))
|
|
return true;
|
|
// Variation Selector 16 forces emoji presentation (2 columns wide)
|
|
if (grapheme.includes("\uFE0F"))
|
|
return true;
|
|
// Regional indicator pairs form flag emoji (2 columns wide)
|
|
if (baseCodePoint >= 0x1f1e6 && baseCodePoint <= 0x1f1ff)
|
|
return true;
|
|
return false;
|
|
}
|
|
// HOT PATH: Use only basic string/char code operations for maximum performance
|
|
function parseLinkCode(string, offset) {
|
|
string = string.slice(offset);
|
|
for (let index = 1; index < linkCodePrefixCharCodes.length; index++) {
|
|
if (string.charCodeAt(index) !== linkCodePrefixCharCodes[index]) {
|
|
return undefined;
|
|
}
|
|
}
|
|
// Find the semicolon that ends params
|
|
const paramsEndIndex = string.indexOf(";", linkCodePrefix.length);
|
|
if (paramsEndIndex === -1)
|
|
return undefined;
|
|
// This is a link code (with or without the URL part). Find the end of it.
|
|
const endIndex = findOSCTerminatorIndex(string, paramsEndIndex + 1);
|
|
if (endIndex === -1)
|
|
return undefined;
|
|
return string.slice(0, endIndex + 1);
|
|
}
|
|
// HOT PATH: Generic fallback for non-link OSC sequences (window title, notifications, etc.)
|
|
function parseOSCSequence(string, offset) {
|
|
string = string.slice(offset);
|
|
// Find the OSC terminator (starting after "ESC ]")
|
|
const endIndex = findOSCTerminatorIndex(string, 2);
|
|
if (endIndex === -1)
|
|
return undefined;
|
|
return string.slice(0, endIndex + 1);
|
|
}
|
|
/**
|
|
* Finds the index of the last character of the first OSC terminator at or after startIndex.
|
|
* Recognizes BEL (\x07), C1 ST (\x9C), and ESC+backslash (\x1B\x5C).
|
|
* Returns -1 if no terminator is found.
|
|
*/
|
|
function findOSCTerminatorIndex(string, startIndex) {
|
|
for (let i = startIndex; i < string.length; i++) {
|
|
const ch = string.charCodeAt(i);
|
|
if (ch === CC_BEL)
|
|
return i;
|
|
if (ch === CC_C1_ST)
|
|
return i;
|
|
if (ch === CC_ESC && i + 1 < string.length && string.charCodeAt(i + 1) === CC_BACKSLASH) {
|
|
return i + 1;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
/**
|
|
* Scans through the given string and finds the index of the last character of an SGR sequence
|
|
* like `\x1B[38;2;123;123;123m`. This assumes that the string has been checked to start with `\x1B[`.
|
|
* Returns -1 if no valid SGR sequence is found.
|
|
*/
|
|
function findSGRSequenceEndIndex(str) {
|
|
for (let index = 2; index < str.length; index++) {
|
|
const charCode = str.charCodeAt(index);
|
|
// m marks the end of the SGR sequence
|
|
if (charCode === CC_M)
|
|
return index;
|
|
// Digits and semicolons are valid
|
|
if (charCode === CC_SEMI)
|
|
continue;
|
|
if (charCode >= CC_0 && charCode <= CC_9)
|
|
continue;
|
|
// Everything else is invalid
|
|
break;
|
|
}
|
|
return -1;
|
|
}
|
|
// HOT PATH: Use only basic string/char code operations for maximum performance
|
|
function parseSGRSequence(string, offset) {
|
|
string = string.slice(offset);
|
|
const endIndex = findSGRSequenceEndIndex(string);
|
|
if (endIndex === -1)
|
|
return;
|
|
return string.slice(0, endIndex + 1);
|
|
}
|
|
/**
|
|
* Splits compound SGR sequences like `\x1B[1;3;31m` into individual components
|
|
*/
|
|
function splitCompoundSGRSequences(code) {
|
|
if (!code.includes(";")) {
|
|
// Not a compound code
|
|
return [code];
|
|
}
|
|
const codeParts = code
|
|
// Strip off the escape sequences \x1B[ and m
|
|
.slice(2, -1)
|
|
.split(";");
|
|
const ret = [];
|
|
for (let i = 0; i < codeParts.length; i++) {
|
|
const rawCode = codeParts[i];
|
|
// Keep 8-bit and 24-bit color codes (containing multiple ";") together
|
|
if (rawCode === "38" || rawCode === "48") {
|
|
if (i + 2 < codeParts.length && codeParts[i + 1] === "5") {
|
|
// 8-bit color, followed by another number
|
|
ret.push(codeParts.slice(i, i + 3).join(";"));
|
|
i += 2;
|
|
continue;
|
|
}
|
|
else if (i + 4 < codeParts.length && codeParts[i + 1] === "2") {
|
|
// 24-bit color, followed by three numbers
|
|
ret.push(codeParts.slice(i, i + 5).join(";"));
|
|
i += 4;
|
|
continue;
|
|
}
|
|
}
|
|
// Not a (valid) 8/24-bit color code, push as is
|
|
ret.push(rawCode);
|
|
}
|
|
return ret.map((part) => `\x1b[${part}m`);
|
|
}
|
|
export function tokenize(str, endChar = Number.POSITIVE_INFINITY) {
|
|
const ret = [];
|
|
let visible = 0;
|
|
let codeEndIndex = 0;
|
|
for (const { segment, index } of segmenter.segment(str)) {
|
|
// Skip segments consumed as part of an ANSI sequence
|
|
if (index < codeEndIndex)
|
|
continue;
|
|
const codePoint = segment.codePointAt(0);
|
|
if (ESCAPES.has(codePoint)) {
|
|
let code;
|
|
// Peek the next code point to determine the type of ANSI sequence
|
|
const nextCodePoint = str.codePointAt(index + 1);
|
|
if (nextCodePoint === CC_OSC) {
|
|
// ] = operating system commands
|
|
code = parseLinkCode(str, index);
|
|
if (code) {
|
|
// OSC 8 hyperlinks are paired codes with an endCode
|
|
ret.push({
|
|
type: "ansi",
|
|
code: code,
|
|
endCode: getEndCode(code),
|
|
});
|
|
}
|
|
else {
|
|
// Other OSC sequences (window title, etc.) are self-contained
|
|
// control codes with no endCode.
|
|
code = parseOSCSequence(str, index);
|
|
if (code) {
|
|
ret.push({
|
|
type: "control",
|
|
code: code,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
else if (nextCodePoint === CC_CSI) {
|
|
// [ = control sequence introducer, like SGR sequences [...m
|
|
code = parseSGRSequence(str, index);
|
|
if (code) {
|
|
// Split compound codes into individual tokens
|
|
const codes = splitCompoundSGRSequences(code);
|
|
for (const individualCode of codes) {
|
|
ret.push({
|
|
type: "ansi",
|
|
code: individualCode,
|
|
endCode: getEndCode(individualCode),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
if (code) {
|
|
codeEndIndex = index + code.length;
|
|
continue;
|
|
}
|
|
}
|
|
const fullWidth = isFullwidthGrapheme(segment, codePoint);
|
|
ret.push({
|
|
type: "char",
|
|
value: segment,
|
|
fullWidth,
|
|
});
|
|
visible += fullWidth ? 2 : 1;
|
|
if (visible >= endChar) {
|
|
break;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
//# sourceMappingURL=tokenize.js.map
|