Bsetzer1
diff --git a/‎operate/main.py‎
Lines changed: 29 additions & 14 deletions b/‎operate/main.py‎
Lines changed: 29 additions & 14 deletions
diff --git a/‎requirements.txt‎
Lines changed: 3 additions & 5 deletions b/‎requirements.txt‎
Lines changed: 3 additions & 5 deletions
@@ -10,6 +10,7 @@
 import subprocess
 import pyautogui
 import argparse
+import platform
 
 from prompt_toolkit import prompt
 from prompt_toolkit.shortcuts import message_dialog
@@ -155,7 +156,13 @@ def main(model):
  style=style,
  ).run()
 
- os.system("clear") # Clears the terminal screen
+ print("SYSTEM", platform.system())
+
+ if platform.system() == "Windows":
+ os.system("cls")
+ else:
+ os.system("clear")
+
 
  print(f"{ANSI_GREEN}[Self-Operating Computer]\n{ANSI_RESET}{USER_QUESTION}")
  print(f"{ANSI_YELLOW}[User]{ANSI_RESET}")
@@ -215,7 +222,7 @@ def main(model):
 
  function_response = ""
  if action_type == "SEARCH":
- function_response = mac_search(action_detail)
+ function_response = search(action_detail)
  elif action_type == "TYPE":
  function_response = keyboard_type(action_detail)
  elif action_type == "CLICK":
@@ -303,7 +310,7 @@ def get_next_action_from_openai(messages, objective):
  # Call the function to capture the screen with the cursor
  capture_screen_with_cursor(screenshot_filename)
 
- new_screenshot_filename = "screenshots/screenshot_with_grid.png"
+ new_screenshot_filename = os.path.join("screenshots", "screenshot_with_grid.png")
 
  add_grid_to_image(screenshot_filename, new_screenshot_filename, 500)
  # sleep for a second
@@ -477,9 +484,10 @@ def add_grid_to_image(original_image_path, new_image_path, grid_interval):
  # Filter for specific font name (e.g., 'Arial.ttf')
  font_path = next((path for path in font_paths if "Arial" in path), None)
  if not font_path:
- raise RuntimeError(
- "Specific TrueType font not found; install the font or check the font name."
- )
+ if len(font_paths) > 0:
+ font_path = font_paths[0]
+ else:
+ raise RuntimeError("No TrueType fonts found on the system.")
 
  # Reduce the font size a bit
  font_size = int(grid_interval / 10) # Reduced font size
@@ -535,11 +543,15 @@ def keyboard_type(text):
  return "Type: " + text
 
 
-def mac_search(text):
- # Press and release Command and Space separately
- pyautogui.keyDown("command")
- pyautogui.press("space")
- pyautogui.keyUp("command")
+def search(text):
+ if platform.system() == "Windows": 
+ pyautogui.press('win')
+ else: 
+ # Press and release Command and Space separately
+ pyautogui.keyDown("command")
+ pyautogui.press("space")
+ pyautogui.keyUp("command")
+
  # Now type the text
  for char in text:
  pyautogui.write(char)
@@ -548,10 +560,13 @@ def mac_search(text):
  return "Open program: " + text
 
 
-def capture_screen_with_cursor(file_path="screenshots/screenshot_with_cursor.png"):
+def capture_screen_with_cursor(file_path=os.path.join("screenshots", "screenshot.png")):
  # Use the screencapture utility to capture the screen with the cursor
- subprocess.run(["screencapture", "-C", file_path])
-
+ if platform.system() == "Windows":
+ screenshot = pyautogui.screenshot()
+ screenshot.save(file_path)
+ else:
+ subprocess.run(["screencapture", "-C", file_path])
 
 def extract_json_from_string(s):
  # print("extracting json from string", s)
 
@@ -26,12 +26,9 @@ Pillow==10.1.0
 prompt-toolkit==3.0.39
 PyAutoGUI==0.9.54
 pydantic==2.4.2
-pydantic-core==2.10.1
+pydantic_core==2.10.1
 PyGetWindow==0.0.9
 PyMsgBox==1.0.9
-pyobjc-core==10.0
-pyobjc-framework-Cocoa==10.0
-pyobjc-framework-Quartz==10.0
 pyparsing==3.1.1
 pyperclip==1.8.2
 PyRect==0.2.0
@@ -42,10 +39,11 @@ python-dotenv==1.0.0
 pytweening==1.0.7
 requests==2.31.0
 rubicon-objc==0.4.7
+self-operating-computer==1.0.0
 six==1.16.0
 sniffio==1.3.0
 tqdm==4.66.1
-typing-extensions==4.8.0
+typing_extensions==4.8.0
 urllib3==2.0.7
 wcwidth==0.2.9
 zipp==3.17.0