v1.5.0: Add Full GUI Automation with Playwright

This commit is contained in:
admin
2026-02-26 16:20:27 +04:00
Unverified
parent 07b47fa952
commit c700d84f4f
5 changed files with 1056 additions and 7 deletions

536
src/tools/gui-automation.ts Normal file
View File

@@ -0,0 +1,536 @@
/**
* GUI Automation Tool for QwenClaw
*
* Provides full GUI automation capabilities using Playwright
* - Web browser automation
* - Screenshot capture
* - Element interaction (click, type, select)
* - Form filling
* - Navigation
* - Data extraction
*/
import { chromium, firefox, webkit, type Browser, type Page } from 'playwright';
import { writeFile, mkdir } from 'fs/promises';
import { join } from 'path';
const SCREENSHOTS_DIR = join(process.cwd(), '.qwen', 'qwenclaw', 'screenshots');
export interface GUIAutomationConfig {
browserType: 'chromium' | 'firefox' | 'webkit';
headless: boolean;
viewport?: { width: number; height: number };
userAgent?: string;
}
const DEFAULT_CONFIG: GUIAutomationConfig = {
browserType: 'chromium',
headless: true,
viewport: { width: 1920, height: 1080 },
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
};
export class GUIAutomation {
private browser: Browser | null = null;
private page: Page | null = null;
private config: GUIAutomationConfig;
constructor(config: Partial<GUIAutomationConfig> = {}) {
this.config = { ...DEFAULT_CONFIG, ...config };
}
/**
* Launch browser
*/
async launch(): Promise<void> {
const browserType = this.config.browserType;
switch (browserType) {
case 'chromium':
this.browser = await chromium.launch({
headless: this.config.headless,
});
break;
case 'firefox':
this.browser = await firefox.launch({
headless: this.config.headless,
});
break;
case 'webkit':
this.browser = await webkit.launch({
headless: this.config.headless,
});
break;
}
this.page = await this.browser.newPage({
viewport: this.config.viewport,
userAgent: this.config.userAgent,
});
console.log(`[GUI] Browser launched: ${browserType}`);
}
/**
* Close browser
*/
async close(): Promise<void> {
if (this.browser) {
await this.browser.close();
this.browser = null;
this.page = null;
console.log('[GUI] Browser closed');
}
}
/**
* Navigate to URL
*/
async goto(url: string, options?: { waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'commit' }): Promise<void> {
if (!this.page) {
await this.launch();
}
await this.page!.goto(url, options);
console.log(`[GUI] Navigated to: ${url}`);
}
/**
* Take screenshot
*/
async screenshot(name?: string): Promise<string> {
if (!this.page) {
throw new Error('Browser not launched');
}
await mkdir(SCREENSHOTS_DIR, { recursive: true });
const filename = name || `screenshot-${Date.now()}.png`;
const filepath = join(SCREENSHOTS_DIR, filename);
await this.page.screenshot({ path: filepath, fullPage: true });
console.log(`[GUI] Screenshot saved: ${filepath}`);
return filepath;
}
/**
* Click element
*/
async click(selector: string): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.click(selector);
console.log(`[GUI] Clicked: ${selector}`);
}
/**
* Type text into element
*/
async type(selector: string, text: string, options?: { delay?: number }): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.type(selector, text, options);
console.log(`[GUI] Typed into: ${selector}`);
}
/**
* Fill form field
*/
async fill(selector: string, value: string): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.fill(selector, value);
console.log(`[GUI] Filled: ${selector}`);
}
/**
* Select option from dropdown
*/
async select(selector: string, value: string): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.selectOption(selector, value);
console.log(`[GUI] Selected: ${selector} = ${value}`);
}
/**
* Check checkbox
*/
async check(selector: string): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.check(selector);
console.log(`[GUI] Checked: ${selector}`);
}
/**
* Get element text
*/
async getText(selector: string): Promise<string> {
if (!this.page) {
throw new Error('Browser not launched');
}
const text = await this.page.textContent(selector);
return text || '';
}
/**
* Get element attribute
*/
async getAttribute(selector: string, attribute: string): Promise<string> {
if (!this.page) {
throw new Error('Browser not launched');
}
const value = await this.page.getAttribute(selector, attribute);
return value || '';
}
/**
* Wait for element
*/
async waitFor(selector: string, options?: { timeout?: number }): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.waitForSelector(selector, options);
console.log(`[GUI] Waited for: ${selector}`);
}
/**
* Wait for navigation
*/
async waitForNavigation(options?: { timeout?: number, waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'commit' }): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.waitForNavigation(options);
console.log('[GUI] Navigation completed');
}
/**
* Execute JavaScript
*/
async evaluate(script: string): Promise<any> {
if (!this.page) {
throw new Error('Browser not launched');
}
const result = await this.page.evaluate(script);
return result;
}
/**
* Scroll to element
*/
async scrollTo(selector: string): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.locator(selector).scrollIntoViewIfNeeded();
console.log(`[GUI] Scrolled to: ${selector}`);
}
/**
* Hover over element
*/
async hover(selector: string): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.hover(selector);
console.log(`[GUI] Hovered: ${selector}`);
}
/**
* Press key
*/
async press(key: string): Promise<void> {
if (!this.page) {
throw new Error('Browser not launched');
}
await this.page.keyboard.press(key);
console.log(`[GUI] Pressed: ${key}`);
}
/**
* Download file
*/
async downloadFile(selector: string, savePath: string): Promise<string> {
if (!this.page) {
throw new Error('Browser not launched');
}
const [download] = await Promise.all([
this.page.waitForEvent('download'),
this.page.click(selector),
]);
await download.saveAs(savePath);
console.log(`[GUI] Downloaded: ${savePath}`);
return savePath;
}
/**
* Get page HTML
*/
async getHTML(): Promise<string> {
if (!this.page) {
throw new Error('Browser not launched');
}
return await this.page.content();
}
/**
* Get page title
*/
async getTitle(): Promise<string> {
if (!this.page) {
throw new Error('Browser not launched');
}
return await this.page.title();
}
/**
* Get page URL
*/
async getUrl(): Promise<string> {
if (!this.page) {
throw new Error('Browser not launched');
}
return this.page.url();
}
/**
* Find all elements matching selector
*/
async findAll(selector: string): Promise<Array<{ text: string; html: string }>> {
if (!this.page) {
throw new Error('Browser not launched');
}
const elements = await this.page.$$(selector);
const results = [];
for (const element of elements) {
const text = await element.textContent();
const html = await element.innerHTML();
results.push({ text: text || '', html });
}
return results;
}
/**
* Extract data from page
*/
async extractData(selectors: Record<string, string>): Promise<Record<string, string>> {
const result: Record<string, string> = {};
for (const [key, selector] of Object.entries(selectors)) {
result[key] = await this.getText(selector);
}
return result;
}
}
/**
* Quick GUI automation function
*/
export async function automateGUI(
task: string,
config?: Partial<GUIAutomationConfig>
): Promise<string> {
const automation = new GUIAutomation(config);
try {
await automation.launch();
// Parse task and execute
const taskLower = task.toLowerCase();
if (taskLower.includes('screenshot')) {
const urlMatch = task.match(/https?:\/\/[^\s]+/);
if (urlMatch) {
await automation.goto(urlMatch[0]);
const path = await automation.screenshot();
return `Screenshot saved to: ${path}`;
}
}
if (taskLower.includes('navigate') || taskLower.includes('go to')) {
const urlMatch = task.match(/https?:\/\/[^\s]+/);
if (urlMatch) {
await automation.goto(urlMatch[0]);
const title = await automation.getTitle();
return `Navigated to: ${urlMatch[0]}\nPage title: ${title}`;
}
}
if (taskLower.includes('click')) {
const selectorMatch = task.match(/click\s+["']?([^"'\s]+)["']?/);
if (selectorMatch) {
await automation.click(selectorMatch[1]);
return `Clicked: ${selectorMatch[1]}`;
}
}
if (taskLower.includes('type') || taskLower.includes('enter')) {
const typeMatch = task.match(/type\s+["']([^"']+)["']\s+into\s+["']?([^"'\s]+)["']?/);
if (typeMatch) {
await automation.type(typeMatch[2], typeMatch[1]);
return `Typed "${typeMatch[1]}" into: ${typeMatch[2]}`;
}
}
if (taskLower.includes('extract') || taskLower.includes('get')) {
const selectorMatch = task.match(/["']?([^"'\s]+)["']?/g);
if (selectorMatch && selectorMatch.length > 0) {
const text = await automation.getText(selectorMatch[0]);
return `Extracted from ${selectorMatch[0]}: ${text}`;
}
}
return `Task executed. Current URL: ${await automation.getUrl()}`;
} catch (err) {
return `[ERROR] GUI automation failed: ${err instanceof Error ? err.message : String(err)}`;
} finally {
await automation.close();
}
}
/**
* Command-line interface for GUI automation
*/
export async function guiCommand(args: string[]): Promise<void> {
console.log('🖥️ QwenClaw GUI Automation\n');
if (args.length === 0) {
console.log('Usage: qwenclaw gui <command> [options]');
console.log('');
console.log('Commands:');
console.log(' screenshot <url> Take screenshot of webpage');
console.log(' navigate <url> Navigate to webpage');
console.log(' click <selector> Click element');
console.log(' type <text> <sel> Type text into element');
console.log(' extract <selector> Extract text from element');
console.log(' html Get page HTML');
console.log(' title Get page title');
console.log('');
console.log('Examples:');
console.log(' qwenclaw gui screenshot https://example.com');
console.log(' qwenclaw gui navigate https://github.com');
console.log(' qwenclaw gui click "#login-button"');
console.log(' qwenclaw gui type "hello" "#search-input"');
return;
}
const command = args[0];
const automation = new GUIAutomation();
try {
await automation.launch();
switch (command) {
case 'screenshot': {
const url = args[1];
if (!url) {
console.log('[ERROR] URL required');
return;
}
await automation.goto(url);
const path = await automation.screenshot();
console.log(`✅ Screenshot saved: ${path}`);
break;
}
case 'navigate': {
const url = args[1];
if (!url) {
console.log('[ERROR] URL required');
return;
}
await automation.goto(url);
const title = await automation.getTitle();
console.log(`✅ Navigated to: ${url}`);
console.log(` Title: ${title}`);
break;
}
case 'click': {
const selector = args[1];
if (!selector) {
console.log('[ERROR] Selector required');
return;
}
await automation.click(selector);
console.log(`✅ Clicked: ${selector}`);
break;
}
case 'type': {
const text = args[1];
const selector = args[2];
if (!text || !selector) {
console.log('[ERROR] Text and selector required');
return;
}
await automation.type(selector, text);
console.log(`✅ Typed "${text}" into: ${selector}`);
break;
}
case 'extract': {
const selector = args[1];
if (!selector) {
console.log('[ERROR] Selector required');
return;
}
const text = await automation.getText(selector);
console.log(`✅ Extracted from ${selector}:`);
console.log(` ${text}`);
break;
}
case 'html': {
const html = await automation.getHTML();
console.log(html.substring(0, 1000) + '...');
break;
}
case 'title': {
const title = await automation.getTitle();
console.log(`Page title: ${title}`);
break;
}
default:
console.log(`[ERROR] Unknown command: ${command}`);
}
} catch (err) {
console.log(`[ERROR] ${err instanceof Error ? err.message : String(err)}`);
} finally {
await automation.close();
}
}