From 49a6e2442135a55dc5ad5e083caf54e92dcd0cda Mon Sep 17 00:00:00 2001
From: Oleh Luchkiv <olesho@gmail.com>
Date: Thu, 25 Dec 2025 18:54:18 -0800
Subject: [PATCH 1/6] Added tracing metadata for LiteLLM provider

---
 front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts b/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts
index 4e313892ae..66a5e10655 100644
--- a/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts
+++ b/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts
@@ -464,6 +464,7 @@ export class EvaluationAgent {
       logger.info('DevTools Langfuse tracing configured successfully from request');
     }
 
+
     // Create a trace for this evaluation - use tracing from request if available
     const traceId = requestTracing.trace_id || `eval-${params.evaluationId}-${Date.now()}`;
     const sessionId = requestTracing.session_id || `eval-session-${Date.now()}`;

From 356fb31a2afcc91240261aaa2f6c22105a5b9ad7 Mon Sep 17 00:00:00 2001
From: Daedaelius <oleh@browseroperator.io>
Date: Sat, 27 Dec 2025 10:09:17 -0800
Subject: [PATCH 2/6] Added lightweight deployment: dockerized devtools panel +
 external browser

---
 docker/Makefile                       | 54 +++++++++++++++++++++++++++
 docker/docker-compose.lightweight.yml | 46 +++++++++++++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 docker/Makefile
 create mode 100644 docker/docker-compose.lightweight.yml

diff --git a/docker/Makefile b/docker/Makefile
new file mode 100644
index 0000000000..a7af2f468c
--- /dev/null
+++ b/docker/Makefile
@@ -0,0 +1,54 @@
+# Makefile for Browser Operator Core
+# Provides DevTools frontend + Agent Server deployments
+
+.PHONY: help build devtools-up up down logs status chrome
+
+help: ## Show this help
+	@echo "Browser Operator Core - Docker Deployments"
+	@echo "==========================================="
+	@echo ""
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  %-12s %s\n", $$1, $$2}'
+	@echo ""
+	@echo "Deployment options:"
+	@echo "  Type 1: make devtools-up  # DevTools only (AUTOMATED_MODE=true by default)"
+	@echo "  Type 2: make up           # DevTools + Agent Server (lightweight)"
+	@echo ""
+	@echo "Full build (includes agent-server in image):"
+	@echo "  make build && docker-compose up -d"
+	@echo ""
+	@echo "For manual debugging mode (Type 1 without API key bypass):"
+	@echo "  docker-compose build --build-arg AUTOMATED_MODE=false && make devtools-up"
+
+build: ## Build full image (DevTools + Agent Server baked in)
+	docker-compose build
+
+devtools-up: ## Start DevTools only (Type 1)
+	docker-compose up -d
+	@echo ""
+	@echo "DevTools UI: http://localhost:8000"
+
+up: ## Start DevTools + Agent Server (Type 2 - lightweight)
+	docker-compose -f docker-compose.lightweight.yml up -d
+	@echo ""
+	@echo "Services:"
+	@echo "  DevTools:   http://localhost:8000"
+	@echo "  Agent API:  http://localhost:8080"
+	@echo "  Agent WS:   ws://localhost:8082"
+
+down: ## Stop all containers
+	docker-compose -f docker-compose.lightweight.yml down 2>/dev/null || true
+	docker-compose down 2>/dev/null || true
+
+logs: ## Show logs
+	docker-compose -f docker-compose.lightweight.yml logs -f 2>/dev/null || docker-compose logs -f
+
+status: ## Show container status
+	@docker ps --filter "name=browser-operator"
+
+chrome: ## Launch Chrome Canary with custom DevTools
+	"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary" \
+		--remote-debugging-port=9222 \
+		--remote-allow-origins="*" \
+		--auto-open-devtools-for-tabs \
+		--user-data-dir=/tmp/chrome-debug-profile \
+		--custom-devtools-frontend=http://localhost:8000/
diff --git a/docker/docker-compose.lightweight.yml b/docker/docker-compose.lightweight.yml
new file mode 100644
index 0000000000..3fc5e9fe84
--- /dev/null
+++ b/docker/docker-compose.lightweight.yml
@@ -0,0 +1,46 @@
+# Lightweight deployment: DevTools + Agent Server (no full build required)
+# Uses pre-built DevTools image + separate Agent Server container
+#
+# Usage:
+#   docker-compose -f docker-compose.lightweight.yml up -d
+
+services:
+  devtools:
+    image: browser-operator-devtools:latest
+    container_name: browser-operator-devtools
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
+    restart: unless-stopped
+    networks:
+      - devtools-network
+
+  agent-server:
+    image: node:18-alpine
+    container_name: browser-operator-agent
+    working_dir: /app
+    volumes:
+      - ../agent-server/nodejs:/app:ro
+      - agent-data:/app/clients
+    command: ["node", "start.js"]
+    ports:
+      - "8080:8080"
+      - "8082:8082"
+    environment:
+      - NODE_ENV=production
+      - HOST=0.0.0.0
+      - PORT=8082
+      - API_PORT=8080
+      - CDP_HOST=host.docker.internal
+      - CDP_PORT=9222
+    restart: unless-stopped
+    networks:
+      - devtools-network
+
+volumes:
+  agent-data:
+
+networks:
+  devtools-network:
+    driver: bridge

From 742a6b60b6dbe4c67bd77b876a75350681f22f69 Mon Sep 17 00:00:00 2001
From: Daedaelius <oleh@browseroperator.io>
Date: Mon, 29 Dec 2025 23:00:32 -0600
Subject: [PATCH 3/6] Tool calls expose as Agent Server API

---
 agent-server/nodejs/src/api-server.js         | 421 ++++++++++++++++++
 .../nodejs/src/lib/BrowserAgentServer.js      |  49 ++
 .../ai_chat/evaluation/EvaluationAgent.ts     |  96 +++-
 .../ai_chat/evaluation/EvaluationProtocol.ts  |  81 ++++
 .../evaluation/remote/EvaluationAgent.ts      |  92 +++-
 .../evaluation/remote/EvaluationProtocol.ts   |  85 ++++
 6 files changed, 822 insertions(+), 2 deletions(-)

diff --git a/agent-server/nodejs/src/api-server.js b/agent-server/nodejs/src/api-server.js
index 8bdf0b3277..9a1d440511 100644
--- a/agent-server/nodejs/src/api-server.js
+++ b/agent-server/nodejs/src/api-server.js
@@ -163,6 +163,71 @@ class APIServer {
             result = await this.getDOMSnapshot(JSON.parse(body));
             break;
 
+          case '/v1/tools/execute':
+            if (method !== 'POST') {
+              this.sendError(res, 405, 'Method not allowed');
+              return;
+            }
+            result = await this.executeToolDirect(JSON.parse(body));
+            break;
+
+          case '/v1/tools':
+            if (method !== 'GET') {
+              this.sendError(res, 405, 'Method not allowed');
+              return;
+            }
+            result = this.getAvailableTools();
+            break;
+
+          // Convenience endpoints for common actions
+          case '/actions/click':
+            if (method !== 'POST') {
+              this.sendError(res, 405, 'Method not allowed');
+              return;
+            }
+            result = await this.handleActionClick(JSON.parse(body));
+            break;
+
+          case '/actions/type':
+            if (method !== 'POST') {
+              this.sendError(res, 405, 'Method not allowed');
+              return;
+            }
+            result = await this.handleActionType(JSON.parse(body));
+            break;
+
+          case '/actions/scroll':
+            if (method !== 'POST') {
+              this.sendError(res, 405, 'Method not allowed');
+              return;
+            }
+            result = await this.handleActionScroll(JSON.parse(body));
+            break;
+
+          case '/actions/navigate':
+            if (method !== 'POST') {
+              this.sendError(res, 405, 'Method not allowed');
+              return;
+            }
+            result = await this.handleActionNavigate(JSON.parse(body));
+            break;
+
+          case '/actions/hover':
+            if (method !== 'POST') {
+              this.sendError(res, 405, 'Method not allowed');
+              return;
+            }
+            result = await this.handleActionHover(JSON.parse(body));
+            break;
+
+          case '/page/accessibility-tree':
+            if (method !== 'GET' && method !== 'POST') {
+              this.sendError(res, 405, 'Method not allowed');
+              return;
+            }
+            result = await this.getAccessibilityTree(method === 'POST' ? JSON.parse(body) : parsedUrl.query);
+            break;
+
           default:
             this.sendError(res, 404, 'Not found');
             return;
@@ -432,6 +497,362 @@ class APIServer {
     };
   }
 
+  /**
+   * Execute a tool directly without LLM orchestration
+   * POST /v1/tools/execute
+   *
+   * Request format:
+   * {
+   *   "clientId": "xxx",
+   *   "tabId": "yyy",
+   *   "tool": "perform_action",
+   *   "args": { "method": "click", "nodeId": 123 },
+   *   "timeout": 30000
+   * }
+   */
+  async executeToolDirect(payload) {
+    const { clientId, tabId, tool, args, timeout = 30000 } = payload;
+
+    if (!clientId) {
+      throw new Error('Client ID is required');
+    }
+
+    if (!tabId) {
+      throw new Error('Tab ID is required');
+    }
+
+    if (!tool) {
+      throw new Error('Tool name is required');
+    }
+
+    const baseClientId = clientId.split(':')[0];
+    const compositeClientId = `${baseClientId}:${tabId}`;
+
+    logger.info('Executing tool directly', {
+      baseClientId,
+      tabId,
+      tool,
+      timeout,
+      hasArgs: !!args
+    });
+
+    // Find the connection for this tab
+    const connection = this.browserAgentServer.connectedClients.get(compositeClientId);
+
+    if (!connection) {
+      throw new Error(`No DevTools connection found for tab ${tabId}. Ensure the tab's DevTools is connected.`);
+    }
+
+    if (!connection.ready) {
+      throw new Error(`DevTools connection for tab ${tabId} is not ready.`);
+    }
+
+    // Execute the tool via RPC
+    const result = await this.browserAgentServer.executeToolDirect(
+      connection,
+      tool,
+      args,
+      timeout
+    );
+
+    return {
+      clientId: baseClientId,
+      tabId,
+      tool,
+      success: result?.result?.success ?? false,
+      output: result?.result?.output,
+      executionTime: result?.result?.executionTime,
+      error: result?.error?.message,
+      timestamp: Date.now()
+    };
+  }
+
+  /**
+   * Get list of available tools
+   * GET /v1/tools
+   *
+   * Note: This returns a static list of known tools since the tool registry
+   * lives in the DevTools frontend. For dynamic tool discovery, use the
+   * DevTools connection.
+   */
+  getAvailableTools() {
+    // Static list of commonly available tools
+    // The actual registry is in the DevTools frontend (TypeScript)
+    const tools = [
+      {
+        name: 'perform_action',
+        description: 'Execute DOM actions on elements (click, type, hover, etc.)',
+        schema: {
+          type: 'object',
+          properties: {
+            method: {
+              type: 'string',
+              enum: ['click', 'fill', 'type', 'press', 'hover', 'scrollIntoView', 'selectOption', 'check', 'uncheck', 'setChecked'],
+              description: 'Action method to execute'
+            },
+            nodeId: {
+              type: ['number', 'string'],
+              description: 'Accessibility tree node ID'
+            },
+            args: {
+              type: 'array',
+              description: 'Arguments for the action (e.g., text to type)'
+            },
+            reasoning: {
+              type: 'string',
+              description: 'Reasoning for the action'
+            }
+          },
+          required: ['method', 'nodeId']
+        }
+      },
+      {
+        name: 'navigate_url',
+        description: 'Navigate to a URL',
+        schema: {
+          type: 'object',
+          properties: {
+            url: {
+              type: 'string',
+              description: 'URL to navigate to'
+            },
+            reasoning: {
+              type: 'string',
+              description: 'Reasoning for navigation'
+            }
+          },
+          required: ['url']
+        }
+      },
+      {
+        name: 'scroll_page',
+        description: 'Scroll the page',
+        schema: {
+          type: 'object',
+          properties: {
+            direction: {
+              type: 'string',
+              enum: ['up', 'down', 'left', 'right'],
+              description: 'Scroll direction'
+            },
+            amount: {
+              type: 'number',
+              description: 'Scroll amount in pixels'
+            },
+            pages: {
+              type: 'number',
+              description: 'Number of pages to scroll'
+            }
+          }
+        }
+      },
+      {
+        name: 'get_accessibility_tree',
+        description: 'Get the accessibility tree for the current page',
+        schema: {
+          type: 'object',
+          properties: {
+            maxDepth: {
+              type: 'number',
+              description: 'Maximum depth to traverse'
+            }
+          }
+        }
+      },
+      {
+        name: 'take_screenshot',
+        description: 'Capture a screenshot of the page',
+        schema: {
+          type: 'object',
+          properties: {
+            fullPage: {
+              type: 'boolean',
+              description: 'Capture full page or viewport only'
+            }
+          }
+        }
+      },
+      {
+        name: 'get_page_html',
+        description: 'Get the HTML content of the page',
+        schema: {
+          type: 'object',
+          properties: {}
+        }
+      },
+      {
+        name: 'execute_javascript',
+        description: 'Execute JavaScript in the page context',
+        schema: {
+          type: 'object',
+          properties: {
+            code: {
+              type: 'string',
+              description: 'JavaScript code to execute'
+            }
+          },
+          required: ['code']
+        }
+      }
+    ];
+
+    return {
+      tools,
+      count: tools.length,
+      timestamp: Date.now()
+    };
+  }
+
+  // ============================================
+  // Convenience Action Endpoints
+  // ============================================
+
+  /**
+   * Click on an element
+   * POST /actions/click
+   *
+   * Request: { clientId, tabId, nodeId, timeout? }
+   */
+  async handleActionClick(payload) {
+    const { clientId, tabId, nodeId, timeout = 30000 } = payload;
+
+    return this.executeToolDirect({
+      clientId,
+      tabId,
+      tool: 'perform_action',
+      args: {
+        method: 'click',
+        nodeId,
+        reasoning: 'Click action via API'
+      },
+      timeout
+    });
+  }
+
+  /**
+   * Type text into an element
+   * POST /actions/type
+   *
+   * Request: { clientId, tabId, nodeId, text, timeout? }
+   */
+  async handleActionType(payload) {
+    const { clientId, tabId, nodeId, text, timeout = 30000 } = payload;
+
+    if (!text) {
+      throw new Error('Text is required for type action');
+    }
+
+    return this.executeToolDirect({
+      clientId,
+      tabId,
+      tool: 'perform_action',
+      args: {
+        method: 'fill',
+        nodeId,
+        args: [text],
+        reasoning: 'Type action via API'
+      },
+      timeout
+    });
+  }
+
+  /**
+   * Scroll the page
+   * POST /actions/scroll
+   *
+   * Request: { clientId, tabId, direction?, amount?, pages?, timeout? }
+   */
+  async handleActionScroll(payload) {
+    const { clientId, tabId, direction = 'down', amount, pages, timeout = 30000 } = payload;
+
+    return this.executeToolDirect({
+      clientId,
+      tabId,
+      tool: 'scroll_page',
+      args: {
+        direction,
+        amount,
+        pages
+      },
+      timeout
+    });
+  }
+
+  /**
+   * Navigate to a URL
+   * POST /actions/navigate
+   *
+   * Request: { clientId, tabId, url, timeout? }
+   */
+  async handleActionNavigate(payload) {
+    const { clientId, tabId, url, timeout = 30000 } = payload;
+
+    if (!url) {
+      throw new Error('URL is required for navigate action');
+    }
+
+    return this.executeToolDirect({
+      clientId,
+      tabId,
+      tool: 'navigate_url',
+      args: {
+        url,
+        reasoning: 'Navigate action via API'
+      },
+      timeout
+    });
+  }
+
+  /**
+   * Hover over an element
+   * POST /actions/hover
+   *
+   * Request: { clientId, tabId, nodeId, timeout? }
+   */
+  async handleActionHover(payload) {
+    const { clientId, tabId, nodeId, timeout = 30000 } = payload;
+
+    return this.executeToolDirect({
+      clientId,
+      tabId,
+      tool: 'perform_action',
+      args: {
+        method: 'hover',
+        nodeId,
+        reasoning: 'Hover action via API'
+      },
+      timeout
+    });
+  }
+
+  /**
+   * Get the accessibility tree for a page
+   * GET/POST /page/accessibility-tree
+   *
+   * Request: { clientId, tabId, maxDepth? }
+   */
+  async getAccessibilityTree(payload) {
+    const { clientId, tabId, maxDepth } = payload;
+
+    if (!clientId) {
+      throw new Error('Client ID is required');
+    }
+
+    if (!tabId) {
+      throw new Error('Tab ID is required');
+    }
+
+    return this.executeToolDirect({
+      clientId,
+      tabId,
+      tool: 'get_accessibility_tree',
+      args: {
+        maxDepth
+      },
+      timeout: 30000
+    });
+  }
+
   /**
    * Handle OpenAI Responses API compatible requests with nested model format
    */
diff --git a/agent-server/nodejs/src/lib/BrowserAgentServer.js b/agent-server/nodejs/src/lib/BrowserAgentServer.js
index 2d0c7a7458..a55e427c68 100644
--- a/agent-server/nodejs/src/lib/BrowserAgentServer.js
+++ b/agent-server/nodejs/src/lib/BrowserAgentServer.js
@@ -1418,6 +1418,55 @@ export class BrowserAgentServer extends EventEmitter {
     }
   }
 
+  /**
+   * Execute a tool directly on a connected DevTools client
+   * This bypasses LLM orchestration and calls the tool directly
+   * @param {Object} connection - DevTools WebSocket connection
+   * @param {string} tool - Tool name (e.g., 'perform_action', 'navigate_url')
+   * @param {Object} args - Tool-specific arguments
+   * @param {number} timeout - Execution timeout in milliseconds
+   * @returns {Promise<Object>} Tool execution result
+   */
+  async executeToolDirect(connection, tool, args, timeout = 30000) {
+    const rpcId = `tool-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
+
+    logger.info('Executing tool directly', {
+      clientId: connection.clientId,
+      tool,
+      timeout
+    });
+
+    try {
+      // Prepare RPC request for execute_tool method
+      const response = await connection.rpcClient.callMethod(
+        connection.ws,
+        'execute_tool',
+        {
+          tool,
+          args,
+          timeout
+        },
+        timeout + 5000 // Add buffer for network overhead
+      );
+
+      logger.info('Tool execution completed', {
+        clientId: connection.clientId,
+        tool,
+        success: response?.result?.success
+      });
+
+      return response;
+
+    } catch (error) {
+      logger.error('Tool execution failed', {
+        clientId: connection.clientId,
+        tool,
+        error: error.message
+      });
+      throw error;
+    }
+  }
+
   /**
    * Execute JavaScript in a browser tab
    * @param {string} tabId - Tab ID (target ID)
diff --git a/front_end/panels/ai_chat/evaluation/EvaluationAgent.ts b/front_end/panels/ai_chat/evaluation/EvaluationAgent.ts
index 395d8a8141..3ac3e4a14b 100644
--- a/front_end/panels/ai_chat/evaluation/EvaluationAgent.ts
+++ b/front_end/panels/ai_chat/evaluation/EvaluationAgent.ts
@@ -20,17 +20,23 @@ import {
   EvaluationRequest,
   EvaluationSuccessResponse,
   EvaluationErrorResponse,
+  ToolExecutionRequest,
+  ToolExecutionSuccessResponse,
+  ToolExecutionErrorResponse,
   ErrorCodes,
   isWelcomeMessage,
   isRegistrationAckMessage,
   isEvaluationRequest,
+  isToolExecutionRequest,
   isPongMessage,
   createRegisterMessage,
   createReadyMessage,
   createAuthVerifyMessage,
   createStatusMessage,
   createSuccessResponse,
-  createErrorResponse
+  createErrorResponse,
+  createToolExecutionSuccessResponse,
+  createToolExecutionErrorResponse
 } from './EvaluationProtocol.js';
 
 const logger = createLogger('EvaluationAgent');
@@ -171,6 +177,9 @@ export class EvaluationAgent {
       else if (isEvaluationRequest(message)) {
         await this.handleEvaluationRequest(message);
       }
+      else if (isToolExecutionRequest(message)) {
+        await this.handleToolExecutionRequest(message);
+      }
       else if (isPongMessage(message)) {
         logger.debug('Received pong');
       }
@@ -599,6 +608,91 @@ export class EvaluationAgent {
     }
   }
 
+  /**
+   * Handle direct tool execution request (no LLM orchestration)
+   * This allows calling browser automation tools directly via API
+   */
+  private async handleToolExecutionRequest(request: ToolExecutionRequest): Promise<void> {
+    const { params, id } = request;
+    const startTime = Date.now();
+
+    logger.info('Received tool execution request', {
+      tool: params.tool,
+      hasArgs: !!params.args,
+      timeout: params.timeout
+    });
+
+    try {
+      // Get the tool from registry
+      const tool = ToolRegistry.getRegisteredTool(params.tool);
+      if (!tool) {
+        const errorResponse = createToolExecutionErrorResponse(
+          id,
+          ErrorCodes.INVALID_TOOL,
+          `Tool not found: ${params.tool}`,
+          params.tool,
+          `Tool '${params.tool}' is not registered in the ToolRegistry`
+        );
+        if (this.client) {
+          this.client.send(errorResponse);
+        }
+        return;
+      }
+
+      // Execute the tool directly (no LLM, no navigation, no retries)
+      const timeout = params.timeout || 30000;
+      const result = await this.executeToolWithTimeout(
+        tool,
+        params.args,
+        timeout,
+        undefined, // No tracing context for direct tool calls
+        params.tool
+      );
+
+      const executionTime = Date.now() - startTime;
+
+      // Send success response
+      const successResponse = createToolExecutionSuccessResponse(
+        id,
+        params.tool,
+        result,
+        executionTime
+      );
+
+      if (this.client) {
+        this.client.send(successResponse);
+      }
+
+      logger.info('Tool execution completed', {
+        tool: params.tool,
+        executionTime,
+        success: true
+      });
+
+    } catch (error) {
+      const executionTime = Date.now() - startTime;
+      const errorMessage = error instanceof Error ? error.message : 'Unknown error';
+
+      logger.error(`Tool execution failed: ${errorMessage}`, {
+        tool: params.tool,
+        executionTime
+      });
+
+      // Send error response
+      const errorResponse = createToolExecutionErrorResponse(
+        id,
+        ErrorCodes.TOOL_EXECUTION_ERROR,
+        'Tool execution failed',
+        params.tool,
+        errorMessage
+      );
+
+      if (this.client) {
+        this.client.send(errorResponse);
+      }
+    }
+  }
+
   private async executeToolWithTimeout(
     tool: any,
     input: any,
diff --git a/front_end/panels/ai_chat/evaluation/EvaluationProtocol.ts b/front_end/panels/ai_chat/evaluation/EvaluationProtocol.ts
index 40ea10078e..886fdf883a 100644
--- a/front_end/panels/ai_chat/evaluation/EvaluationProtocol.ts
+++ b/front_end/panels/ai_chat/evaluation/EvaluationProtocol.ts
@@ -79,6 +79,44 @@ export interface EvaluationRequest {
   id: string;
 }
 
+// Direct tool execution request (no LLM orchestration)
+export interface ToolExecutionRequest {
+  jsonrpc: '2.0';
+  method: 'execute_tool';
+  params: ToolExecutionParams;
+  id: string;
+}
+
+export interface ToolExecutionParams {
+  tool: string;           // Tool name (e.g., 'perform_action', 'navigate_url')
+  args: any;              // Tool-specific arguments
+  timeout?: number;       // Optional timeout (default 30000ms)
+}
+
+export interface ToolExecutionSuccessResponse {
+  jsonrpc: '2.0';
+  result: {
+    success: true;
+    output: any;
+    executionTime: number;
+    tool: string;
+  };
+  id: string;
+}
+
+export interface ToolExecutionErrorResponse {
+  jsonrpc: '2.0';
+  error: {
+    code: number;
+    message: string;
+    data?: {
+      tool: string;
+      error: string;
+    };
+  };
+  id: string;
+}
+
 export interface EvaluationParams {
   evaluationId: string;
   name: string;
@@ -170,6 +208,10 @@ export function isEvaluationRequest(msg: any): msg is EvaluationRequest {
   return msg?.jsonrpc === '2.0' && msg?.method === 'evaluate';
 }
 
+export function isToolExecutionRequest(msg: any): msg is ToolExecutionRequest {
+  return msg?.jsonrpc === '2.0' && msg?.method === 'execute_tool';
+}
+
 export function isPongMessage(msg: any): msg is PongMessage {
   return msg?.type === 'pong';
 }
@@ -254,4 +296,43 @@ export function createErrorResponse(
     },
     id
   };
+}
+
+export function createToolExecutionSuccessResponse(
+  id: string,
+  tool: string,
+  output: any,
+  executionTime: number
+): ToolExecutionSuccessResponse {
+  return {
+    jsonrpc: '2.0',
+    result: {
+      success: true,
+      output,
+      executionTime,
+      tool
+    },
+    id
+  };
+}
+
+export function createToolExecutionErrorResponse(
+  id: string,
+  code: number,
+  message: string,
+  tool: string,
+  error: string
+): ToolExecutionErrorResponse {
+  return {
+    jsonrpc: '2.0',
+    error: {
+      code,
+      message,
+      data: {
+        tool,
+        error
+      }
+    },
+    id
+  };
 }
\ No newline at end of file
diff --git a/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts b/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts
index 66a5e10655..1aff675e57 100644
--- a/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts
+++ b/front_end/panels/ai_chat/evaluation/remote/EvaluationAgent.ts
@@ -29,11 +29,13 @@ import {
   EvaluationErrorResponse,
   LLMConfigurationRequest,
   LLMConfigurationResponse,
+  ToolExecutionRequest,
   ErrorCodes,
   isWelcomeMessage,
   isRegistrationAckMessage,
   isEvaluationRequest,
   isLLMConfigurationRequest,
+  isToolExecutionRequest,
   isPongMessage,
   createRegisterMessage,
   createReadyMessage,
@@ -41,7 +43,9 @@ import {
   createStatusMessage,
   createSuccessResponse,
   createErrorResponse,
-  createLLMConfigurationResponse
+  createLLMConfigurationResponse,
+  createToolExecutionSuccessResponse,
+  createToolExecutionErrorResponse
 } from './EvaluationProtocol.js';
 
 const logger = createLogger('EvaluationAgent');
@@ -198,6 +202,9 @@ export class EvaluationAgent {
       else if (isLLMConfigurationRequest(message)) {
         await this.handleLLMConfigurationRequest(message);
       }
+      else if (isToolExecutionRequest(message)) {
+        await this.handleToolExecutionRequest(message);
+      }
       else if (isPongMessage(message)) {
         logger.debug('Received pong');
       }
@@ -1350,4 +1357,87 @@ export class EvaluationAgent {
       }
     }
   }
+
+  /**
+   * Handle direct tool execution requests without LLM orchestration
+   */
+  private async handleToolExecutionRequest(request: ToolExecutionRequest): Promise<void> {
+    const { params, id } = request;
+    const startTime = Date.now();
+
+    logger.info('Received tool execution request', {
+      tool: params.tool,
+      hasArgs: !!params.args,
+      timeout: params.timeout || 30000
+    });
+
+    try {
+      // Get the tool from registry
+      const tool = ToolRegistry.getRegisteredTool(params.tool);
+      if (!tool) {
+        const errorResponse = createToolExecutionErrorResponse(
+          id,
+          ErrorCodes.INVALID_PARAMS,
+          `Tool not found: ${params.tool}`,
+          params.tool,
+          `Tool '${params.tool}' is not registered`
+        );
+        if (this.client) {
+          this.client.send(errorResponse);
+        }
+        logger.error('Tool not found', { tool: params.tool });
+        return;
+      }
+
+      // Execute the tool with timeout
+      const timeout = params.timeout || 30000;
+      const result = await this.executeToolWithTimeout(
+        tool,
+        params.args || {},
+        timeout,
+        undefined,
+        params.tool
+      );
+
+      const executionTime = Date.now() - startTime;
+
+      // Send success response
+      const successResponse = createToolExecutionSuccessResponse(
+        id,
+        params.tool,
+        result,
+        executionTime
+      );
+
+      if (this.client) {
+        this.client.send(successResponse);
+      }
+
+      logger.info('Tool execution completed', {
+        tool: params.tool,
+        executionTime,
+        hasResult: !!result
+      });
+
+    } catch (error) {
+      const executionTime = Date.now() - startTime;
+      logger.error('Tool execution failed', {
+        tool: params.tool,
+        error: error instanceof Error ? error.message : String(error),
+        executionTime
+      });
+
+      const errorResponse = createToolExecutionErrorResponse(
+        id,
+        ErrorCodes.INTERNAL_ERROR,
+        'Tool execution failed',
+        params.tool,
+        error instanceof Error ? error.message : String(error)
+      );
+
+      if (this.client) {
+        this.client.send(errorResponse);
+      }
+    }
+  }
 }
diff --git a/front_end/panels/ai_chat/evaluation/remote/EvaluationProtocol.ts b/front_end/panels/ai_chat/evaluation/remote/EvaluationProtocol.ts
index 3bf90ed2b9..196545ddd8 100644
--- a/front_end/panels/ai_chat/evaluation/remote/EvaluationProtocol.ts
+++ b/front_end/panels/ai_chat/evaluation/remote/EvaluationProtocol.ts
@@ -351,4 +351,89 @@ export function createLLMConfigurationResponse(
     },
     id
   };
+}
+
+// Direct Tool Execution Messages (bypass LLM orchestration)
+
+export interface ToolExecutionRequest {
+  jsonrpc: '2.0';
+  method: 'execute_tool';
+  params: ToolExecutionParams;
+  id: string;
+}
+
+export interface ToolExecutionParams {
+  tool: string;
+  args?: Record<string, any>;
+  timeout?: number;
+}
+
+export interface ToolExecutionSuccessResponse {
+  jsonrpc: '2.0';
+  result: {
+    success: true;
+    output: any;
+    executionTime: number;
+    tool: string;
+  };
+  id: string;
+}
+
+export interface ToolExecutionErrorResponse {
+  jsonrpc: '2.0';
+  error: {
+    code: number;
+    message: string;
+    data: {
+      tool: string;
+      error: string;
+    };
+  };
+  id: string;
+}
+
+// Type guard for tool execution
+export function isToolExecutionRequest(msg: any): msg is ToolExecutionRequest {
+  return msg !== null &&
+         typeof msg === 'object' &&
+         msg.jsonrpc === '2.0' &&
+         msg.method === 'execute_tool' &&
+         msg.params !== undefined;
+}
+
+// Helper functions for tool execution
+export function createToolExecutionSuccessResponse(
+  id: string,
+  tool: string,
+  output: any,
+  executionTime: number
+): ToolExecutionSuccessResponse {
+  return {
+    jsonrpc: '2.0',
+    result: {
+      success: true,
+      output,
+      executionTime,
+      tool
+    },
+    id
+  };
+}
+
+export function createToolExecutionErrorResponse(
+  id: string,
+  code: number,
+  message: string,
+  tool: string,
+  error: string
+): ToolExecutionErrorResponse {
+  return {
+    jsonrpc: '2.0',
+    error: {
+      code,
+      message,
+      data: { tool, error }
+    },
+    id
+  };
 }
\ No newline at end of file

From 49216cfbbe37c3b354fd2aa22b9c57887f160f60 Mon Sep 17 00:00:00 2001
From: Daedaelius <oleh@browseroperator.io>
Date: Mon, 5 Jan 2026 20:10:21 -0600
Subject: [PATCH 4/6] Fix tool name

---
 agent-server/nodejs/src/api-server.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agent-server/nodejs/src/api-server.js b/agent-server/nodejs/src/api-server.js
index 9a1d440511..1d010f70cf 100644
--- a/agent-server/nodejs/src/api-server.js
+++ b/agent-server/nodejs/src/api-server.js
@@ -845,7 +845,7 @@ class APIServer {
     return this.executeToolDirect({
       clientId,
       tabId,
-      tool: 'get_accessibility_tree',
+      tool: 'get_page_content',
       args: {
         maxDepth
       },

From 5dd5464e2066002ff88dc01da1084244cd90a5f0 Mon Sep 17 00:00:00 2001
From: Daedaelius <oleh@browseroperator.io>
Date: Fri, 16 Jan 2026 12:45:01 -0600
Subject: [PATCH 5/6] Fix build error: fix ActionAgent references in GRD file

The GRD file had incorrect references:
- Removed: ActionAgentV0.js (file doesn't exist)
- Added: ActionAgentV1.js (file exists but was missing from list)

This fixes the GN build error:
  ERROR Input to target not generated by a dependency.
  The file: ActionAgentV0.js is listed but no targets generate that file.

And the assert_grd error:
  File ActionAgentV1.js is not listed in devtools_grd_files.gni.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 config/gni/devtools_grd_files.gni | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/gni/devtools_grd_files.gni b/config/gni/devtools_grd_files.gni
index 6d90ed50b2..a7186ea4c4 100644
--- a/config/gni/devtools_grd_files.gni
+++ b/config/gni/devtools_grd_files.gni
@@ -867,7 +867,7 @@ grd_files_bundled_sources = [
   "front_end/panels/ai_chat/agent_framework/implementation/agents/ScrollActionAgent.js",
   "front_end/panels/ai_chat/agent_framework/implementation/agents/WebTaskAgent.js",
   "front_end/panels/ai_chat/agent_framework/implementation/agents/SearchAgent.js",
-  "front_end/panels/ai_chat/agent_framework/implementation/agents/ActionAgentV0.js",
+  "front_end/panels/ai_chat/agent_framework/implementation/agents/ActionAgentV1.js",
   "front_end/panels/ai_chat/agent_framework/implementation/agents/ActionAgentV2.js",
   "front_end/panels/ai_chat/common/MarkdownViewerUtil.js",
   "front_end/panels/ai_chat/evaluation/runner/VisionAgentEvaluationRunner.js",

From 3f204a28e96f28c118e6d5c1eb0ce7dacea4512d Mon Sep 17 00:00:00 2001
From: Daedaelius <oleh@browseroperator.io>
Date: Wed, 21 Jan 2026 09:59:16 -0600
Subject: [PATCH 6/6] Fixed Docker image and agent server to support
 tool-calling through API for better debugging from Claude Code

---
 agent-server/nodejs/src/api-server.js | 94 +++++++++++++++++++++++++--
 docker/Dockerfile                     | 11 +++-
 2 files changed, 98 insertions(+), 7 deletions(-)

diff --git a/agent-server/nodejs/src/api-server.js b/agent-server/nodejs/src/api-server.js
index 1d010f70cf..c7150bc6bd 100644
--- a/agent-server/nodejs/src/api-server.js
+++ b/agent-server/nodejs/src/api-server.js
@@ -555,14 +555,18 @@ class APIServer {
       timeout
     );
 
+    // RPC response wraps the tool result in a 'result' field
+    // e.g., { result: { success: true, output: {...} } }
+    const toolResult = result?.result ?? result;
+
     return {
       clientId: baseClientId,
       tabId,
       tool,
-      success: result?.result?.success ?? false,
-      output: result?.result?.output,
-      executionTime: result?.result?.executionTime,
-      error: result?.error?.message,
+      success: toolResult?.success ?? false,
+      output: toolResult?.output,
+      executionTime: toolResult?.executionTime,
+      error: toolResult?.error,
       timestamp: Date.now()
     };
   }
@@ -716,6 +720,23 @@ class APIServer {
   async handleActionClick(payload) {
     const { clientId, tabId, nodeId, timeout = 30000 } = payload;
 
+    if (!clientId) {
+      throw new Error('Client ID is required');
+    }
+
+    if (!tabId) {
+      throw new Error('Tab ID is required');
+    }
+
+    if (nodeId === undefined || nodeId === null) {
+      throw new Error('Node ID is required for click action');
+    }
+
+    // Ensure DevTools connection is ready before executing
+    const baseClientId = clientId.split(':')[0];
+    const compositeClientId = `${baseClientId}:${tabId}`;
+    await this.waitForClientConnection(compositeClientId);
+
     return this.executeToolDirect({
       clientId,
       tabId,
@@ -738,10 +759,27 @@ class APIServer {
   async handleActionType(payload) {
     const { clientId, tabId, nodeId, text, timeout = 30000 } = payload;
 
+    if (!clientId) {
+      throw new Error('Client ID is required');
+    }
+
+    if (!tabId) {
+      throw new Error('Tab ID is required');
+    }
+
+    if (nodeId === undefined || nodeId === null) {
+      throw new Error('Node ID is required for type action');
+    }
+
     if (!text) {
       throw new Error('Text is required for type action');
     }
 
+    // Ensure DevTools connection is ready before executing
+    const baseClientId = clientId.split(':')[0];
+    const compositeClientId = `${baseClientId}:${tabId}`;
+    await this.waitForClientConnection(compositeClientId);
+
     return this.executeToolDirect({
       clientId,
       tabId,
@@ -765,6 +803,19 @@ class APIServer {
   async handleActionScroll(payload) {
     const { clientId, tabId, direction = 'down', amount, pages, timeout = 30000 } = payload;
 
+    if (!clientId) {
+      throw new Error('Client ID is required');
+    }
+
+    if (!tabId) {
+      throw new Error('Tab ID is required');
+    }
+
+    // Ensure DevTools connection is ready before executing
+    const baseClientId = clientId.split(':')[0];
+    const compositeClientId = `${baseClientId}:${tabId}`;
+    await this.waitForClientConnection(compositeClientId);
+
     return this.executeToolDirect({
       clientId,
       tabId,
@@ -787,10 +838,23 @@ class APIServer {
   async handleActionNavigate(payload) {
     const { clientId, tabId, url, timeout = 30000 } = payload;
 
+    if (!clientId) {
+      throw new Error('Client ID is required');
+    }
+
+    if (!tabId) {
+      throw new Error('Tab ID is required');
+    }
+
     if (!url) {
       throw new Error('URL is required for navigate action');
     }
 
+    // Ensure DevTools connection is ready before executing
+    const baseClientId = clientId.split(':')[0];
+    const compositeClientId = `${baseClientId}:${tabId}`;
+    await this.waitForClientConnection(compositeClientId);
+
     return this.executeToolDirect({
       clientId,
       tabId,
@@ -812,6 +876,23 @@ class APIServer {
   async handleActionHover(payload) {
     const { clientId, tabId, nodeId, timeout = 30000 } = payload;
 
+    if (!clientId) {
+      throw new Error('Client ID is required');
+    }
+
+    if (!tabId) {
+      throw new Error('Tab ID is required');
+    }
+
+    if (nodeId === undefined || nodeId === null) {
+      throw new Error('Node ID is required for hover action');
+    }
+
+    // Ensure DevTools connection is ready before executing
+    const baseClientId = clientId.split(':')[0];
+    const compositeClientId = `${baseClientId}:${tabId}`;
+    await this.waitForClientConnection(compositeClientId);
+
     return this.executeToolDirect({
       clientId,
       tabId,
@@ -842,6 +923,11 @@ class APIServer {
       throw new Error('Tab ID is required');
     }
 
+    // Ensure DevTools connection is ready before executing
+    const baseClientId = clientId.split(':')[0];
+    const compositeClientId = `${baseClientId}:${tabId}`;
+    await this.waitForClientConnection(compositeClientId);
+
     return this.executeToolDirect({
       clientId,
       tabId,
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 5305b7f2e3..8ba4f9b930 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -43,14 +43,19 @@ RUN /workspace/depot_tools/ensure_bootstrap
 RUN npm run build
 
 # Add Browser Operator fork and switch to it
+# Branch is configurable via build arg (default: main)
+ARG BROWSER_OPERATOR_BRANCH=main
 RUN git remote add upstream https://github.com/BrowserOperator/browser-operator-core.git
 RUN git fetch upstream
-RUN git checkout upstream/main
+RUN git checkout upstream/${BROWSER_OPERATOR_BRANCH}
 
 # Copy local LLM changes on top of the upstream code
-# This allows iterative development without breaking BUILD.gn compatibility
+# This allows iterative development of LLM providers without rebuilding everything
 COPY front_end/panels/ai_chat/LLM /workspace/devtools/devtools-frontend/front_end/panels/ai_chat/LLM
 
+# Regenerate GN build files
+RUN gn gen out/Default
+
 # AUTOMATED_MODE: Default true for API mode (Type 2/3). Override with --build-arg AUTOMATED_MODE=false for Type 1.
 ARG AUTOMATED_MODE=true
 RUN if [ "$AUTOMATED_MODE" = "true" ]; then \
@@ -58,7 +63,7 @@ RUN if [ "$AUTOMATED_MODE" = "true" ]; then \
       front_end/panels/ai_chat/core/BuildConfig.ts; \
     fi
 
-# Build Browser Operator version with local changes
+# Build Browser Operator version with local changes (fresh GN generation)
 RUN npm run build
 
 # ==============================================================================