feat: add Input Bridge (input.ps1) and fix execution path resolution

This commit is contained in:
Gemini AI
2025-12-14 02:12:10 +04:00
Unverified
parent 47dfbddabb
commit f12615ea56
2 changed files with 134 additions and 3 deletions

107
bin/input.ps1 Normal file
View File

@@ -0,0 +1,107 @@
<#
.SYNOPSIS
OpenQode Input Bridge - Basic Computer Use
.DESCRIPTION
Provides mouse, keyboard, and screen capabilities for the AI Agent.
Usage: input.ps1 <command> [args...]
#>
param(
[string]$Action,
[string[]]$Args
)
# Load required assemblies
Add-Type -AssemblyName System.Windows.Forms
Add-Type -AssemblyName System.Drawing
# C# P/Invoke for advanced Input (SendInput is more reliable than SendKeys)
$code = @"
using System;
using System.Runtime.InteropServices;
public class Win32 {
[DllImport("user32.dll")]
public static extern void mouse_event(uint dwFlags, uint dx, uint dy, uint dwData, int dwExtraInfo);
[DllImport("user32.dll")]
public static extern void keybd_event(byte bVk, byte bScan, uint dwFlags, uint dwExtraInfo);
public const uint MOUSEEVENTF_LEFTDOWN = 0x02;
public const uint MOUSEEVENTF_LEFTUP = 0x04;
public const uint MOUSEEVENTF_RIGHTDOWN = 0x08;
public const uint MOUSEEVENTF_RIGHTUP = 0x10;
public const uint KEYEVENTF_KEYUP = 0x02;
}
"@
Add-Type -TypeDefinition $code -Language CSharp
switch ($Action.ToLower()) {
"mouse" {
if ($Args.Count -lt 2) { Write-Error "Usage: mouse x y"; exit 1 }
[System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point([int]$Args[0], [int]$Args[1])
Write-Host "Moved mouse to $($Args[0]), $($Args[1])"
}
"click" {
# Simple left click at current position
[Win32]::mouse_event([Win32]::MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0)
[Win32]::mouse_event([Win32]::MOUSEEVENTF_LEFTUP, 0, 0, 0, 0)
Write-Host "Clicked"
}
"rightclick" {
[Win32]::mouse_event([Win32]::MOUSEEVENTF_RIGHTDOWN, 0, 0, 0, 0)
[Win32]::mouse_event([Win32]::MOUSEEVENTF_RIGHTUP, 0, 0, 0, 0)
Write-Host "Right Clicked"
}
"type" {
if ($Args.Count -lt 1) { Write-Error "Usage: type 'text'"; exit 1 }
$text = $Args -join " "
[System.Windows.Forms.SendKeys]::SendWait($text)
Write-Host "Typed: $text"
}
"key" {
# Usage: key ENTER, key LWIN, key TAB
if ($Args.Count -lt 1) { Write-Error "Usage: key KEYNAME"; exit 1 }
$k = $Args[0].ToUpper()
# Handle Windows Key specifically (common request)
if ($k -eq "LWIN" -or $k -eq "WIN") {
[Win32]::keybd_event(0x5B, 0, 0, 0) # LWin Down
[Win32]::keybd_event(0x5B, 0, 0x02, 0) # LWin Up
} elseif ($k -eq "ENTER") {
[System.Windows.Forms.SendKeys]::SendWait("{ENTER}")
} elseif ($k -eq "TAB") {
[System.Windows.Forms.SendKeys]::SendWait("{TAB}")
} else {
# Fallback to SendKeys format
[System.Windows.Forms.SendKeys]::SendWait("{$k}")
}
Write-Host "Pressed: $k"
}
"screen" {
$w = [System.Windows.Forms.SystemInformation]::VirtualScreen.Width
$h = [System.Windows.Forms.SystemInformation]::VirtualScreen.Height
Write-Host "Screen Resolution: $w x $h"
}
"screenshot" {
if ($Args.Count -lt 1) { $file = "screenshot.png" } else { $file = $Args[0] }
$fullPath = [System.IO.Path]::GetFullPath($file)
$bmp = New-Object System.Drawing.Bitmap ([System.Windows.Forms.SystemInformation]::VirtualScreen.Width, [System.Windows.Forms.SystemInformation]::VirtualScreen.Height)
$g = [System.Drawing.Graphics]::FromImage($bmp)
$g.CopyFromScreen(0, 0, 0, 0, $bmp.Size)
$bmp.Save($fullPath)
$g.Dispose()
$bmp.Dispose()
Write-Host "Screenshot saved to $fullPath"
}
default {
Write-Host "Commands: mouse, click, rightclick, type, key, screen, screenshot"
}
}

View File

@@ -484,6 +484,22 @@ Now, respond ONLY as TERMINUS. Never break character.
- You are not a helper; you are the lead developer. - You are not a helper; you are the lead developer.
- Do not wait for inputs. Go get them. - Do not wait for inputs. Go get them.
- Use: \`tree -L 2\`, \`cat\`, \`head\`, \`ls\`, \`find\` to explore the codebase yourself. - Use: \`tree -L 2\`, \`cat\`, \`head\`, \`ls\`, \`find\` to explore the codebase yourself.
# COMPUTER USE & INPUT CONTROL
You have access to a "Hands" script: \`bin/input.ps1\`.
Use it to control the mouse and keyboard when requested.
## Usage:
- \`powershell bin/input.ps1 mouse <x> <y>\` (Move mouse)
- \`powershell bin/input.ps1 click\` (Left click)
- \`powershell bin/input.ps1 type "text"\` (Type test)
- \`powershell bin/input.ps1 key <KEY>\` (Press key: LWIN, ENTER, TAB, etc)
- \`powershell bin/input.ps1 screenshot <path.png>\` (Take screenshot)
## Example: "Open Start Menu"
\`\`\`powershell
powershell bin/input.ps1 key LWIN
\`\`\`
`; `;
const defaultPrompts = { const defaultPrompts = {
@@ -3226,10 +3242,18 @@ This gives the user a chance to refine requirements before implementation.
const results = []; const results = [];
for (const cmd of detectedCommands) { for (const cmd of detectedCommands) {
setMessages(prev => [...prev, { role: 'system', content: `▶ Running: ${cmd}` }]); // FIX: Replace relative 'bin/input.ps1' with absolute path to allow running from any project folder
const inputScriptAbs = path.join(__dirname, 'input.ps1');
const safeInputmd = `"${inputScriptAbs}"`;
const res = await runShellCommand(cmd, project || process.cwd()); let finalCmd = cmd
results.push({ cmd, ...res }); .replace(/bin\/input\.ps1/g, safeInputmd)
.replace(/bin\\input\.ps1/g, safeInputmd);
setMessages(prev => [...prev, { role: 'system', content: `▶ Running: ${finalCmd}` }]);
const res = await runShellCommand(finalCmd, project || process.cwd());
results.push({ cmd: finalCmd, ...res });
if (res.success) { if (res.success) {
setMessages(prev => [...prev, { role: 'system', content: `✅ Output:\n${res.output}` }]); setMessages(prev => [...prev, { role: 'system', content: `✅ Output:\n${res.output}` }]);