feat: Integrated Vision & Robust Translation Layer, Secured Repo (removed keys)

This commit is contained in:
Gemini AI
2025-12-15 04:53:51 +04:00
Unverified
parent a8436c91a3
commit 2407c42eb9
38 changed files with 7786 additions and 3776 deletions

View File

@@ -0,0 +1,4 @@
{
"wsEndpoint": "http://127.0.0.1:9222",
"launchTime": 1765752544769
}

File diff suppressed because it is too large Load Diff

192
bin/input.ps1.backup Normal file
View File

@@ -0,0 +1,192 @@
param(
[Parameter(Position=0, Mandatory=$true)]
[string]$Command,
[Parameter(Position=1, ValueFromRemainingArguments=$true)]
[string[]]$Params
)
# Load required assemblies
Add-Type -AssemblyName System.Windows.Forms
Add-Type -AssemblyName System.Drawing
Add-Type -AssemblyName UIAutomationClient
Add-Type -AssemblyName UIAutomationTypes
# C# P/Invoke for advanced Input
$code = @"
using System;
using System.Runtime.InteropServices;
public class Win32 {
[DllImport("user32.dll")]
public static extern void mouse_event(uint dwFlags, uint dx, uint dy, uint dwData, int dwExtraInfo);
[DllImport("user32.dll")]
public static extern void keybd_event(byte bVk, byte bScan, uint dwFlags, uint dwExtraInfo);
public const uint MOUSEEVENTF_LEFTDOWN = 0x02;
public const uint MOUSEEVENTF_LEFTUP = 0x04;
public const uint MOUSEEVENTF_RIGHTDOWN = 0x08;
public const uint MOUSEEVENTF_RIGHTUP = 0x10;
public const uint KEYEVENTF_KEYUP = 0x02;
}
"@
Add-Type -TypeDefinition $code -Language CSharp
switch ($Command.ToLower()) {
"mouse" {
if ($Params.Count -lt 2) { Write-Error "Usage: mouse x y"; exit 1 }
[System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point([int]$Params[0], [int]$Params[1])
Write-Host "Moved mouse to $($Params[0]), $($Params[1])"
}
"click" {
[Win32]::mouse_event([Win32]::MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0)
[Win32]::mouse_event([Win32]::MOUSEEVENTF_LEFTUP, 0, 0, 0, 0)
Write-Host "Clicked"
}
"rightclick" {
[Win32]::mouse_event([Win32]::MOUSEEVENTF_RIGHTDOWN, 0, 0, 0, 0)
[Win32]::mouse_event([Win32]::MOUSEEVENTF_RIGHTUP, 0, 0, 0, 0)
Write-Host "Right Clicked"
}
"type" {
if ($Params.Count -lt 1) { Write-Error "Usage: type 'text'"; exit 1 }
$text = $Params -join " "
[System.Windows.Forms.SendKeys]::SendWait($text)
Write-Host "Typed: $text"
}
"key" {
if ($Params.Count -lt 1) { Write-Error "Usage: key KEYNAME"; exit 1 }
$k = $Params[0].ToUpper()
if ($k -eq "LWIN" -or $k -eq "WIN") {
[Win32]::keybd_event(0x5B, 0, 0, 0)
[Win32]::keybd_event(0x5B, 0, 0x02, 0)
} elseif ($k -eq "ENTER") {
[System.Windows.Forms.SendKeys]::SendWait("{ENTER}")
} elseif ($k -eq "TAB") {
[System.Windows.Forms.SendKeys]::SendWait("{TAB}")
} else {
[System.Windows.Forms.SendKeys]::SendWait("{$k}")
}
Write-Host "Pressed: $k"
}
"screen" {
$w = [System.Windows.Forms.SystemInformation]::VirtualScreen.Width
$h = [System.Windows.Forms.SystemInformation]::VirtualScreen.Height
Write-Host "Screen Resolution: $w x $h"
}
"screenshot" {
if ($Params.Count -lt 1) { $file = "screenshot.png" } else { $file = $Params[0] }
$fullPath = [System.IO.Path]::GetFullPath($file)
$bmp = New-Object System.Drawing.Bitmap ([System.Windows.Forms.SystemInformation]::VirtualScreen.Width, [System.Windows.Forms.SystemInformation]::VirtualScreen.Height)
$g = [System.Drawing.Graphics]::FromImage($bmp)
$g.CopyFromScreen(0, 0, 0, 0, $bmp.Size)
$bmp.Save($fullPath)
$g.Dispose()
$bmp.Dispose()
Write-Host "Screenshot saved to $fullPath"
}
"find" {
if ($Params.Count -lt 1) { Write-Error "Usage: find 'Name'"; exit 1 }
$targetName = $Params -join " "
Write-Host "Searching for VISIBLE UI Element: '$targetName'..."
$root = [System.Windows.Automation.AutomationElement]::RootElement
$cond = New-Object System.Windows.Automation.PropertyCondition([System.Windows.Automation.AutomationElement]::NameProperty, $targetName)
# Find ALL matches, then filter for visibility (to avoid phantom offscreen elements)
$collection = $root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $cond)
$found = $false
if ($collection) {
foreach ($element in $collection) {
try {
if (-not $element.Current.IsOffscreen) {
$rect = $element.Current.BoundingRectangle
if ($rect.Width -gt 0 -and $rect.Height -gt 0) {
$centerX = [int]($rect.X + ($rect.Width / 2))
$centerY = [int]($rect.Y + ($rect.Height / 2))
Write-Host "Found Visible '$targetName' at ($centerX, $centerY)"
Write-Host "COORD:$centerX,$centerY"
$found = $true
break # Stop at first visible match
}
}
} catch {}
}
}
if (-not $found) {
Write-Host "Element '$targetName' not found visible on desktop."
}
}
"uiclick" {
if ($Params.Count -lt 1) { Write-Error "Usage: uiclick 'Name'"; exit 1 }
$targetName = $Params -join " "
Write-Host "Searching & Clicking: '$targetName'..."
$root = [System.Windows.Automation.AutomationElement]::RootElement
$cond = New-Object System.Windows.Automation.PropertyCondition([System.Windows.Automation.AutomationElement]::NameProperty, $targetName)
$collection = $root.FindAll([System.Windows.Automation.TreeScope]::Descendants, $cond)
$found = $false
foreach ($element in $collection) {
try {
if (-not $element.Current.IsOffscreen) {
$rect = $element.Current.BoundingRectangle
if ($rect.Width -gt 0) {
$centerX = [int]($rect.X + ($rect.Width / 2))
$centerY = [int]($rect.Y + ($rect.Height / 2))
# Move & Click
[System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point($centerX, $centerY)
Start-Sleep -Milliseconds 100
[Win32]::mouse_event([Win32]::MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0)
[Win32]::mouse_event([Win32]::MOUSEEVENTF_LEFTUP, 0, 0, 0, 0)
Write-Host "Clicked '$targetName' at ($centerX, $centerY)"
$found = $true
break
}
}
} catch {}
}
if (-not $found) { Write-Host "Could not find visible '$targetName' to click." }
}
"open" {
if ($Params.Count -lt 1) { Write-Error "Usage: open 'Path or URL'"; exit 1 }
$target = $Params -join " "
try {
Start-Process $target
Write-Host "Opened '$target'"
} catch {
Write-Error "Failed to open '$target': $_"
}
}
"apps" {
$apps = Get-Process | Where-Object { $_.MainWindowTitle -ne "" } | Select-Object Id, MainWindowTitle
if ($apps) {
$apps | Format-Table -AutoSize | Out-String | Write-Host
} else {
Write-Host "No visible applications found."
}
}
default {
Write-Host "Commands: mouse, click, rightclick, type, key, screen, screenshot, find, apps"
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

310
bin/playwright-bridge.js Normal file
View File

@@ -0,0 +1,310 @@
#!/usr/bin/env node
/**
* Playwright Bridge for OpenQode TUI - Persistent Session Version
* Uses CDP to maintain browser session across multiple command invocations
*
* Credit: Inspired by browser-use/browser-use (https://github.com/browser-use/browser-use)
* License: MIT
*/
const { chromium } = require('playwright');
const fs = require('fs');
const path = require('path');
const net = require('net');
// State file to persist CDP endpoint between calls
const STATE_FILE = path.join(__dirname, '.playwright-session.json');
const CDP_PORT = 9222;
let browser = null;
let page = null;
/**
* Check if a port is in use
*/
function isPortInUse(port) {
return new Promise((resolve) => {
const server = net.createServer();
server.once('error', () => resolve(true));
server.once('listening', () => {
server.close();
resolve(false);
});
server.listen(port, '127.0.0.1');
});
}
/**
* Load saved session state
*/
function loadState() {
try {
if (fs.existsSync(STATE_FILE)) {
return JSON.parse(fs.readFileSync(STATE_FILE, 'utf8'));
}
} catch (e) { }
return null;
}
/**
* Save session state
*/
function saveState(state) {
fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
}
/**
* Clear session state
*/
function clearState() {
if (fs.existsSync(STATE_FILE)) {
fs.unlinkSync(STATE_FILE);
}
}
/**
* Launch browser with CDP enabled or connect to existing one
*/
async function ensureBrowser() {
const state = loadState();
// Try to connect to existing browser first
if (state && state.wsEndpoint) {
try {
browser = await chromium.connectOverCDP(state.wsEndpoint);
const contexts = browser.contexts();
if (contexts.length > 0) {
const pages = contexts[0].pages();
page = pages.length > 0 ? pages[0] : await contexts[0].newPage();
} else {
const context = await browser.newContext({ viewport: null });
page = await context.newPage();
}
return { browser, page };
} catch (e) {
// Connection failed, browser might have closed
clearState();
}
}
// Check if CDP port is already in use
const portInUse = await isPortInUse(CDP_PORT);
if (portInUse) {
// Try to connect to existing browser on that port
try {
browser = await chromium.connectOverCDP(`http://127.0.0.1:${CDP_PORT}`);
const wsEndpoint = `http://127.0.0.1:${CDP_PORT}`;
saveState({ wsEndpoint });
const contexts = browser.contexts();
if (contexts.length > 0) {
const pages = contexts[0].pages();
page = pages.length > 0 ? pages[0] : await contexts[0].newPage();
} else {
const context = await browser.newContext({ viewport: null });
page = await context.newPage();
}
return { browser, page };
} catch (e) {
console.log('Could not connect to existing browser, launching new one...');
}
}
// Launch new browser with CDP enabled
browser = await chromium.launch({
headless: false,
args: [
'--start-maximized',
`--remote-debugging-port=${CDP_PORT}`
]
});
// Get the WebSocket endpoint
const wsEndpoint = `http://127.0.0.1:${CDP_PORT}`;
saveState({ wsEndpoint, launchTime: Date.now() });
const context = await browser.newContext({ viewport: null });
page = await context.newPage();
console.log('Browser launched with persistent session');
return { browser, page };
}
/**
* Run multiple commands in sequence (for batch execution)
*/
async function runBatch(commands) {
await ensureBrowser();
const results = [];
for (const cmd of commands) {
try {
const result = await executeCommand(cmd.command, cmd.args);
results.push({ success: true, command: cmd.command, result });
} catch (e) {
results.push({ success: false, command: cmd.command, error: e.message });
}
}
return results;
}
/**
* Execute a single command
*/
async function executeCommand(command, args) {
switch (command) {
case 'navigate': {
const url = args[0];
if (!url) throw new Error('URL required');
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
const title = await page.title();
return { navigated: url, title };
}
case 'fill': {
const selector = args[0];
const text = args.slice(1).join(' ');
if (!selector || !text) throw new Error('Selector and text required');
try {
await page.fill(selector, text, { timeout: 5000 });
} catch (e) {
try {
await page.getByPlaceholder(selector).fill(text, { timeout: 5000 });
} catch (e2) {
await page.getByLabel(selector).fill(text, { timeout: 5000 });
}
}
return { filled: selector, text };
}
case 'click': {
const selector = args.join(' ');
if (!selector) throw new Error('Selector required');
try {
await page.click(selector, { timeout: 5000 });
} catch (e) {
try {
await page.click(`text="${selector}"`, { timeout: 5000 });
} catch (e2) {
try {
await page.getByRole('button', { name: selector }).click({ timeout: 5000 });
} catch (e3) {
await page.getByText(selector).first().click({ timeout: 5000 });
}
}
}
return { clicked: selector };
}
case 'press': {
const key = args[0];
if (!key) throw new Error('Key required');
await page.keyboard.press(key);
return { pressed: key };
}
case 'type': {
const text = args.join(' ');
if (!text) throw new Error('Text required');
await page.keyboard.type(text);
return { typed: text };
}
case 'screenshot': {
const filename = args[0] || 'screenshot.png';
const fullPath = path.resolve(filename);
await page.screenshot({ path: fullPath, fullPage: true });
return { screenshot: fullPath };
}
case 'content': {
const content = await page.textContent('body');
return { content: content?.substring(0, 5000) };
}
case 'title': {
return { title: await page.title() };
}
case 'url': {
return { url: page.url() };
}
case 'elements': {
const elements = await page.evaluate(() => {
const els = document.querySelectorAll('button, a, input, textarea, select, [role="button"]');
return Array.from(els).slice(0, 30).map((el, i) => ({
i,
tag: el.tagName.toLowerCase(),
text: el.textContent?.trim().substring(0, 40) || '',
name: el.name || el.id || ''
})).filter(e => e.text || e.name);
});
return { elements };
}
case 'wait': {
const selector = args[0];
const timeout = parseInt(args[1]) || 10000;
await page.waitForSelector(selector, { timeout });
return { waited: selector };
}
case 'close': {
if (browser) {
await browser.close();
browser = null;
page = null;
}
clearState();
return { closed: true };
}
default:
throw new Error(`Unknown command: ${command}`);
}
}
async function main() {
const args = process.argv.slice(2);
const command = args[0]?.toLowerCase();
if (!command) {
console.log('Playwright Bridge - Persistent Session');
console.log('Commands: navigate, fill, click, press, type, screenshot, content, title, url, elements, wait, close');
console.log('');
console.log('Example: node playwright-bridge.js navigate https://google.com');
return;
}
// Special batch mode for multiple commands
if (command === 'batch') {
const batchFile = args[1];
if (batchFile && fs.existsSync(batchFile)) {
const commands = JSON.parse(fs.readFileSync(batchFile, 'utf8'));
const results = await runBatch(commands);
console.log(JSON.stringify(results, null, 2));
}
return;
}
try {
await ensureBrowser();
const result = await executeCommand(command, args.slice(1));
console.log(`RESULT:${JSON.stringify(result)}`);
} catch (error) {
console.error(`ERROR:${error.message}`);
process.exit(1);
}
}
// Keep process alive briefly to allow CDP connection to stabilize
process.on('beforeExit', async () => {
// Don't close browser on exit - keep it persistent!
});
main().catch(console.error);