diff --git a/bin/input.ps1 b/bin/input.ps1 new file mode 100644 index 0000000..5a799c3 --- /dev/null +++ b/bin/input.ps1 @@ -0,0 +1,107 @@ +<# +.SYNOPSIS + OpenQode Input Bridge - Basic Computer Use +.DESCRIPTION + Provides mouse, keyboard, and screen capabilities for the AI Agent. + Usage: input.ps1 [args...] +#> +param( + [string]$Action, + [string[]]$Args +) + +# Load required assemblies +Add-Type -AssemblyName System.Windows.Forms +Add-Type -AssemblyName System.Drawing + +# C# P/Invoke for advanced Input (SendInput is more reliable than SendKeys) +$code = @" +using System; +using System.Runtime.InteropServices; + +public class Win32 { + [DllImport("user32.dll")] + public static extern void mouse_event(uint dwFlags, uint dx, uint dy, uint dwData, int dwExtraInfo); + + [DllImport("user32.dll")] + public static extern void keybd_event(byte bVk, byte bScan, uint dwFlags, uint dwExtraInfo); + + public const uint MOUSEEVENTF_LEFTDOWN = 0x02; + public const uint MOUSEEVENTF_LEFTUP = 0x04; + public const uint MOUSEEVENTF_RIGHTDOWN = 0x08; + public const uint MOUSEEVENTF_RIGHTUP = 0x10; + public const uint KEYEVENTF_KEYUP = 0x02; +} +"@ +Add-Type -TypeDefinition $code -Language CSharp + +switch ($Action.ToLower()) { + "mouse" { + if ($Args.Count -lt 2) { Write-Error "Usage: mouse x y"; exit 1 } + [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point([int]$Args[0], [int]$Args[1]) + Write-Host "Moved mouse to $($Args[0]), $($Args[1])" + } + + "click" { + # Simple left click at current position + [Win32]::mouse_event([Win32]::MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0) + [Win32]::mouse_event([Win32]::MOUSEEVENTF_LEFTUP, 0, 0, 0, 0) + Write-Host "Clicked" + } + + "rightclick" { + [Win32]::mouse_event([Win32]::MOUSEEVENTF_RIGHTDOWN, 0, 0, 0, 0) + [Win32]::mouse_event([Win32]::MOUSEEVENTF_RIGHTUP, 0, 0, 0, 0) + Write-Host "Right Clicked" + } + + "type" { + if ($Args.Count -lt 1) { Write-Error "Usage: type 'text'"; exit 1 } + $text = $Args -join " " + [System.Windows.Forms.SendKeys]::SendWait($text) + Write-Host "Typed: $text" + } + + "key" { + # Usage: key ENTER, key LWIN, key TAB + if ($Args.Count -lt 1) { Write-Error "Usage: key KEYNAME"; exit 1 } + $k = $Args[0].ToUpper() + + # Handle Windows Key specifically (common request) + if ($k -eq "LWIN" -or $k -eq "WIN") { + [Win32]::keybd_event(0x5B, 0, 0, 0) # LWin Down + [Win32]::keybd_event(0x5B, 0, 0x02, 0) # LWin Up + } elseif ($k -eq "ENTER") { + [System.Windows.Forms.SendKeys]::SendWait("{ENTER}") + } elseif ($k -eq "TAB") { + [System.Windows.Forms.SendKeys]::SendWait("{TAB}") + } else { + # Fallback to SendKeys format + [System.Windows.Forms.SendKeys]::SendWait("{$k}") + } + Write-Host "Pressed: $k" + } + + "screen" { + $w = [System.Windows.Forms.SystemInformation]::VirtualScreen.Width + $h = [System.Windows.Forms.SystemInformation]::VirtualScreen.Height + Write-Host "Screen Resolution: $w x $h" + } + + "screenshot" { + if ($Args.Count -lt 1) { $file = "screenshot.png" } else { $file = $Args[0] } + $fullPath = [System.IO.Path]::GetFullPath($file) + + $bmp = New-Object System.Drawing.Bitmap ([System.Windows.Forms.SystemInformation]::VirtualScreen.Width, [System.Windows.Forms.SystemInformation]::VirtualScreen.Height) + $g = [System.Drawing.Graphics]::FromImage($bmp) + $g.CopyFromScreen(0, 0, 0, 0, $bmp.Size) + $bmp.Save($fullPath) + $g.Dispose() + $bmp.Dispose() + Write-Host "Screenshot saved to $fullPath" + } + + default { + Write-Host "Commands: mouse, click, rightclick, type, key, screen, screenshot" + } +} diff --git a/bin/opencode-ink.mjs b/bin/opencode-ink.mjs index a1f0bbf..83601a3 100644 --- a/bin/opencode-ink.mjs +++ b/bin/opencode-ink.mjs @@ -484,6 +484,22 @@ Now, respond ONLY as TERMINUS. Never break character. - You are not a helper; you are the lead developer. - Do not wait for inputs. Go get them. - Use: \`tree -L 2\`, \`cat\`, \`head\`, \`ls\`, \`find\` to explore the codebase yourself. + +# COMPUTER USE & INPUT CONTROL +You have access to a "Hands" script: \`bin/input.ps1\`. +Use it to control the mouse and keyboard when requested. + +## Usage: +- \`powershell bin/input.ps1 mouse \` (Move mouse) +- \`powershell bin/input.ps1 click\` (Left click) +- \`powershell bin/input.ps1 type "text"\` (Type test) +- \`powershell bin/input.ps1 key \` (Press key: LWIN, ENTER, TAB, etc) +- \`powershell bin/input.ps1 screenshot \` (Take screenshot) + +## Example: "Open Start Menu" +\`\`\`powershell +powershell bin/input.ps1 key LWIN +\`\`\` `; const defaultPrompts = { @@ -3226,10 +3242,18 @@ This gives the user a chance to refine requirements before implementation. const results = []; for (const cmd of detectedCommands) { - setMessages(prev => [...prev, { role: 'system', content: `▶ Running: ${cmd}` }]); + // FIX: Replace relative 'bin/input.ps1' with absolute path to allow running from any project folder + const inputScriptAbs = path.join(__dirname, 'input.ps1'); + const safeInputmd = `"${inputScriptAbs}"`; - const res = await runShellCommand(cmd, project || process.cwd()); - results.push({ cmd, ...res }); + let finalCmd = cmd + .replace(/bin\/input\.ps1/g, safeInputmd) + .replace(/bin\\input\.ps1/g, safeInputmd); + + setMessages(prev => [...prev, { role: 'system', content: `▶ Running: ${finalCmd}` }]); + + const res = await runShellCommand(finalCmd, project || process.cwd()); + results.push({ cmd: finalCmd, ...res }); if (res.success) { setMessages(prev => [...prev, { role: 'system', content: `✅ Output:\n${res.output}` }]);