Skip to main content

Basic Examples

Here are some practical examples of how to use the Computer Use API in different programming languages.

Using cURL

# Move to Firefox/Chrome icon in the dock and click it curl -X POST http://localhost:9990/computer-use \  -H "Content-Type: application/json" \  -d '{"action": "move_mouse", "coordinates": {"x": 100, "y": 960}}'  curl -X POST http://localhost:9990/computer-use \  -H "Content-Type: application/json" \  -d '{"action": "click_mouse", "button": "left", "clickCount": 1}'  

Python Examples

import requests import json import base64 import time from io import BytesIO from PIL import Image  def control_computer(action, **params): url = "http://localhost:9990/computer-use" data = {"action": action, **params} response = requests.post(url, json=data) return response.json()  # Open a web browser by clicking an icon  control_computer("move_mouse", coordinates={"x": 100, "y": 960}) control_computer("click_mouse", button="left")  # Wait for the browser to open  control_computer("wait", duration=2000)  # Type a URL  control_computer("type_text", text="https://example.com") control_computer("press_keys", key="enter")  

JavaScript/Node.js Examples

const axios = require('axios');  async function controlComputer(action, params = {}) { const url = "http://localhost:9990/computer-use"; const data = { action, ...params };  try { const response = await axios.post(url, data); return response.data; } catch (error) { console.error('Error:', error.message); return { success: false, error: error.message }; } }  // Example: Automate opening an application and typing async function automateTextEditor() { try { // Open text editor by clicking its icon await controlComputer("move_mouse", { coordinates: { x: 150, y: 960 } }); await controlComputer("click_mouse", { button: "left" });   // Wait for it to open  await controlComputer("wait", { duration: 2000 });   // Type some text  await controlComputer("type_text", {  text: "This is an automated test using Node.js and Bytebot",  delay: 30  });   console.log("Automation completed successfully");  } catch (error) { console.error("Automation failed:", error); } }  automateTextEditor();  

File Operations

Writing Files

These examples show how to write files to the desktop environment:
import requests import base64  def write_file(path, content):  url = "http://localhost:9990/computer-use"    # Encode content to base64  encoded_content = base64.b64encode(content.encode('utf-8')).decode('utf-8')    data = {  "action": "write_file",  "path": path,  "data": encoded_content  }    response = requests.post(url, json=data)  return response.json()  # Write a text file result = write_file("/home/user/hello.txt", "Hello, Bytebot!") print(result) # {'success': True, 'message': 'File written successfully...'}  # Write to desktop (relative path) result = write_file("report.txt", "Daily report content") print(result) # File will be written to /home/user/Desktop/report.txt 

Reading Files

These examples show how to read files from the desktop environment:
import requests import base64  def read_file(path):  url = "http://localhost:9990/computer-use"    data = {  "action": "read_file",  "path": path  }    response = requests.post(url, json=data)  result = response.json()    if result['success']:  # Decode the base64 content  content = base64.b64decode(result['data']).decode('utf-8')  return {  'content': content,  'name': result['name'],  'size': result['size'],  'mediaType': result['mediaType']  }  else:  return result  # Read a text file file_data = read_file("/home/user/hello.txt") print(f"Content: {file_data['content']}") print(f"Size: {file_data['size']} bytes") print(f"Type: {file_data['mediaType']}") 

Automation Recipes

Browser Automation

This example demonstrates how to automate browser interactions:
import requests import time  def control_computer(action, **params):  url = "http://localhost:9990/computer-use"  data = {"action": action, **params}  response = requests.post(url, json=data)  return response.json()  def automate_browser():  # Open browser (assuming browser icon is at position x=100, y=960)  control_computer("move_mouse", coordinates={"x": 100, "y": 960})  control_computer("click_mouse", button="left")  time.sleep(3) # Wait for browser to open   # Type URL  control_computer("type_text", text="https://example.com")  control_computer("press_keys", key="enter")  time.sleep(2) # Wait for page to load   # Take screenshot of the loaded page  screenshot = control_computer("screenshot")   # Click on a link (coordinates would need to be adjusted for your target)  control_computer("move_mouse", coordinates={"x": 300, "y": 400})  control_computer("click_mouse", button="left")  time.sleep(2)   # Scroll down  control_computer("scroll", direction="down", scrollCount=5)  automate_browser() 

Form Filling Automation

This example shows how to automate filling out a form in a web application:
const axios = require("axios");  async function controlComputer(action, params = {}) {  const url = "http://localhost:9990/computer-use";  const data = { action, ...params };  const response = await axios.post(url, data);  return response.data; }  async function fillForm() {  // Click first input field  await controlComputer("move_mouse", { coordinates: { x: 400, y: 300 } });  await controlComputer("click_mouse", { button: "left" });   // Type name  await controlComputer("type_text", { text: "John Doe" });   // Tab to next field  await controlComputer("press_keys", { key: "tab" });   // Type email  await controlComputer("type_text", { text: "[email protected]" });   // Tab to next field  await controlComputer("press_keys", { key: "tab" });   // Type message  await controlComputer("type_text", {  text: "This is an automated message sent using Bytebot's Computer Use API",  delay: 30,  });   // Tab to submit button  await controlComputer("press_keys", { key: "tab" });   // Press Enter to submit  await controlComputer("press_keys", { key: "enter" }); }  fillForm().catch(console.error); 

Integration with Testing Frameworks

The Computer Use API can be integrated with popular testing frameworks:

Selenium Alternative

Bytebot can serve as an alternative to Selenium for web testing:
import requests import time import json  class BytebotWebDriver:  def __init__(self, base_url="http://localhost:9990"):  self.base_url = base_url   def control_computer(self, action, **params):  url = f"{self.base_url}/computer-use"  data = {"action": action, **params}  response = requests.post(url, json=data)  return response.json()   def open_browser(self, browser_icon_coords):  self.control_computer("move_mouse", coordinates=browser_icon_coords)  self.control_computer("click_mouse", button="left")  time.sleep(3) # Wait for browser to open   def navigate_to(self, url):  self.control_computer("type_text", text=url)  self.control_computer("press_keys", key="enter")  time.sleep(2) # Wait for page to load   def click_element(self, coords):  self.control_computer("move_mouse", coordinates=coords)  self.control_computer("click_mouse", button="left")   def type_text(self, text):  self.control_computer("type_text", text=text)   def press_keys(self, key, modifiers=None):  params = {"key": key}  if modifiers:  params["modifiers"] = modifiers  self.control_computer("press_keys", **params)   def take_screenshot(self):  return self.control_computer("screenshot")  # Usage example driver = BytebotWebDriver() driver.open_browser({"x": 100, "y": 960}) driver.navigate_to("https://example.com") driver.click_element({"x": 300, "y": 400}) driver.type_text("Hello Bytebot!") 
⌘I