feat: add UI Automation 'find' command for true textual vision
This commit is contained in:
@@ -9,6 +9,39 @@ param(
|
|||||||
# Load required assemblies
|
# Load required assemblies
|
||||||
Add-Type -AssemblyName System.Windows.Forms
|
Add-Type -AssemblyName System.Windows.Forms
|
||||||
Add-Type -AssemblyName System.Drawing
|
Add-Type -AssemblyName System.Drawing
|
||||||
|
Add-Type -AssemblyName UIAutomationClient
|
||||||
|
Add-Type -AssemblyName UIAutomationTypes
|
||||||
|
|
||||||
|
# ... (Previous code remains) ...
|
||||||
|
|
||||||
|
switch ($Command.ToLower()) {
|
||||||
|
# ... (Previous cases remain) ...
|
||||||
|
|
||||||
|
"find" {
|
||||||
|
if ($Params.Count -lt 1) { Write-Error "Usage: find 'Name'"; exit 1 }
|
||||||
|
$targetName = $Params -join " "
|
||||||
|
|
||||||
|
Write-Host "Searching for UI Element: '$targetName'..."
|
||||||
|
|
||||||
|
$root = [System.Windows.Automation.AutomationElement]::RootElement
|
||||||
|
$cond = New-Object System.Windows.Automation.PropertyCondition([System.Windows.Automation.AutomationElement]::NameProperty, $targetName)
|
||||||
|
|
||||||
|
# Try finding directly (fast)
|
||||||
|
$element = $root.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $cond)
|
||||||
|
|
||||||
|
if ($element) {
|
||||||
|
$rect = $element.Current.BoundingRectangle
|
||||||
|
$centerX = [int]($rect.X + ($rect.Width / 2))
|
||||||
|
$centerY = [int]($rect.Y + ($rect.Height / 2))
|
||||||
|
Write-Host "Found '$targetName' at ($centerX, $centerY)"
|
||||||
|
Write-Host "Action: mouse $centerX $centerY"
|
||||||
|
} else {
|
||||||
|
Write-Host "Element '$targetName' not found visible on desktop."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
"apps" {
|
||||||
|
# ... (Rest remains) ...
|
||||||
|
|
||||||
# C# P/Invoke for advanced Input (SendInput is more reliable than SendKeys)
|
# C# P/Invoke for advanced Input (SendInput is more reliable than SendKeys)
|
||||||
$code = @"
|
$code = @"
|
||||||
@@ -97,6 +130,31 @@ switch ($Command.ToLower()) {
|
|||||||
Write-Host "Screenshot saved to $fullPath"
|
Write-Host "Screenshot saved to $fullPath"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
"find" {
|
||||||
|
if ($Params.Count -lt 1) { Write-Error "Usage: find 'Name'"; exit 1 }
|
||||||
|
$targetName = $Params -join " "
|
||||||
|
|
||||||
|
Write-Host "Searching for UI Element: '$targetName'..."
|
||||||
|
|
||||||
|
$root = [System.Windows.Automation.AutomationElement]::RootElement
|
||||||
|
$cond = New-Object System.Windows.Automation.PropertyCondition([System.Windows.Automation.AutomationElement]::NameProperty, $targetName)
|
||||||
|
|
||||||
|
# Try finding directly (fast)
|
||||||
|
$element = $root.FindFirst([System.Windows.Automation.TreeScope]::Descendants, $cond)
|
||||||
|
|
||||||
|
if ($element) {
|
||||||
|
$rect = $element.Current.BoundingRectangle
|
||||||
|
$centerX = [int]($rect.X + ($rect.Width / 2))
|
||||||
|
$centerY = [int]($rect.Y + ($rect.Height / 2))
|
||||||
|
Write-Host "Found '$targetName' at ($centerX, $centerY)"
|
||||||
|
|
||||||
|
# Auto-selection support return format
|
||||||
|
Write-Host "COORD:$centerX,$centerY"
|
||||||
|
} else {
|
||||||
|
Write-Host "Element '$targetName' not found visible on desktop."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
"apps" {
|
"apps" {
|
||||||
# List all processes with a window title (visible apps)
|
# List all processes with a window title (visible apps)
|
||||||
$apps = Get-Process | Where-Object { $_.MainWindowTitle -ne "" } | Select-Object Id, MainWindowTitle
|
$apps = Get-Process | Where-Object { $_.MainWindowTitle -ne "" } | Select-Object Id, MainWindowTitle
|
||||||
|
|||||||
@@ -491,14 +491,15 @@ Use it to control the mouse, keyboard, and "see" the system.
|
|||||||
|
|
||||||
## 👁️ VISION & BLINDNESS PROTOCOL:
|
## 👁️ VISION & BLINDNESS PROTOCOL:
|
||||||
You are a TEXT-BASED intelligence. You CANNOT see images/screenshots you take.
|
You are a TEXT-BASED intelligence. You CANNOT see images/screenshots you take.
|
||||||
- **\`input.ps1 screenshot\`**: Creates an image for the **USER** to see. You learn NOTHING from this.
|
- **\`input.ps1 find "Name"\`**: **TRUE VISION**. Finds a UI element (button/window) by text and tells you where it is.
|
||||||
- **\`input.ps1 apps\`**: Your "Eyes" for windows. Returns TEXT list of open apps.
|
- **\`input.ps1 apps\`**: Your "Eyes" for windows. Returns TEXT list of open apps.
|
||||||
- **\`input.ps1 screen\`**: Your "Eyes" for geometry. Returns TEXT resolution (e.g. 1920x1080).
|
- **\`input.ps1 screen\`**: Your "Eyes" for geometry. Returns TEXT resolution.
|
||||||
|
|
||||||
### 📐 THE LAW OF COORDINATES:
|
### 📐 THE LAW OF ACCURACY:
|
||||||
Since you cannot see buttons, you MUST calculate them using \`screen\` dimensions.
|
1. **FIND FIRST**: If you need to click a button, SEARCH FOR IT.
|
||||||
1. Run \`powershell bin/input.ps1 screen\`.
|
- \`powershell bin/input.ps1 find "Start"\` -> Returns "Found at (30, 1190)".
|
||||||
2. Get Output: \`Width x Height\` (e.g. 1920 x 1200).
|
- **THEN** use those coordinates to click.
|
||||||
|
2. **FALLBACK**: Only calculate coordinates manually if \`find\` fails.
|
||||||
3. **Start Menu Logic:** Bottom-Left corner.
|
3. **Start Menu Logic:** Bottom-Left corner.
|
||||||
- X = 0 to 50
|
- X = 0 to 50
|
||||||
- Y = Height - 10 (e.g. 1190).
|
- Y = Height - 10 (e.g. 1190).
|
||||||
|
|||||||
Reference in New Issue
Block a user