import asyncio
import base64
import logging
from typing import Any
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import TextContent, Tool
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("axterminator")
try:
import axterminator as ax
AXTERMINATOR_AVAILABLE = True
except ImportError:
ax = None AXTERMINATOR_AVAILABLE = False
logger.warning("axterminator not installed - install with: pip install axterminator")
_connected_apps: dict[str, Any] = {}
server = Server("axterminator")
def _get_app(app_name: str) -> Any:
if app_name not in _connected_apps:
raise ValueError(f"App '{app_name}' not connected. Use ax_connect first.")
return _connected_apps[app_name]
@server.list_tools()
async def list_tools() -> list[Tool]:
return [
Tool(
name="ax_connect",
description="""Connect to a macOS application for GUI testing.
Connect by bundle ID (recommended), name, or PID.
The app must be running and accessibility must be enabled.
Returns: Connection status and app info.""",
inputSchema={
"type": "object",
"properties": {
"app": {
"type": "string",
"description": "App identifier: bundle ID (com.apple.Safari), name (Safari), or PID (12345)",
},
"alias": {
"type": "string",
"description": "Optional alias to reference this app later (defaults to app name)",
},
},
"required": ["app"],
},
),
Tool(
name="ax_find",
description="""Find UI elements in a connected app.
Search by text, role, or attributes. Uses 7-strategy self-healing locators.
Returns element info including role, title, position, and available actions.
Query syntax:
- Simple text: "Save" (finds element with text "Save")
- By role: "role:AXButton" (finds buttons)
- Combined: "role:AXButton title:Save" (finds Save button)
- XPath-like: "//AXButton[@AXTitle='OK']" """,
inputSchema={
"type": "object",
"properties": {
"app": {
"type": "string",
"description": "App name/alias from ax_connect",
},
"query": {
"type": "string",
"description": "Element query (text, role:X, title:X, or XPath-like)",
},
"timeout_ms": {
"type": "integer",
"description": "Max time to wait for element (default: 5000ms)",
"default": 5000,
},
},
"required": ["app", "query"],
},
),
Tool(
name="ax_click",
description="""Click a UI element - IN BACKGROUND (no focus stealing!).
WORLD FIRST: Click without stealing focus from user's active work.
Use mode=focus only if the element requires it (rare).
Returns: Click result and element state after click.""",
inputSchema={
"type": "object",
"properties": {
"app": {
"type": "string",
"description": "App name/alias from ax_connect",
},
"query": {
"type": "string",
"description": "Element to click (text, role:X, or XPath-like)",
},
"mode": {
"type": "string",
"description": "Action mode: 'background' (default, no focus steal) or 'focus'",
"enum": ["background", "focus"],
"default": "background",
},
"click_type": {
"type": "string",
"description": "Click type: 'single' (default), 'double', or 'right'",
"enum": ["single", "double", "right"],
"default": "single",
},
},
"required": ["app", "query"],
},
),
Tool(
name="ax_type",
description="""Type text into an element.
Note: Text input typically requires focus mode.
For setting values directly without typing, use ax_set_value instead.
Returns: Typing result.""",
inputSchema={
"type": "object",
"properties": {
"app": {
"type": "string",
"description": "App name/alias from ax_connect",
},
"query": {
"type": "string",
"description": "Element to type into (text field query)",
},
"text": {
"type": "string",
"description": "Text to type",
},
"mode": {
"type": "string",
"description": "Action mode: 'focus' (default for typing) or 'background'",
"enum": ["background", "focus"],
"default": "focus",
},
},
"required": ["app", "query", "text"],
},
),
Tool(
name="ax_set_value",
description="""Set the value of an element directly (no keystroke simulation).
Faster than ax_type and works in background mode.
Use for text fields, sliders, etc.
Returns: Set result and new value.""",
inputSchema={
"type": "object",
"properties": {
"app": {
"type": "string",
"description": "App name/alias from ax_connect",
},
"query": {
"type": "string",
"description": "Element to set value on",
},
"value": {
"type": "string",
"description": "Value to set",
},
},
"required": ["app", "query", "value"],
},
),
Tool(
name="ax_get_value",
description="""Get the current value of an element.
Works for text fields, labels, checkboxes, sliders, etc.
Returns the accessibility value attribute.
Returns: Element value and additional attributes.""",
inputSchema={
"type": "object",
"properties": {
"app": {
"type": "string",
"description": "App name/alias from ax_connect",
},
"query": {
"type": "string",
"description": "Element to get value from",
},
},
"required": ["app", "query"],
},
),
Tool(
name="ax_list_windows",
description="""List all windows of a connected app.
Returns window titles, positions, sizes, and states.
Returns: List of window information.""",
inputSchema={
"type": "object",
"properties": {
"app": {
"type": "string",
"description": "App name/alias from ax_connect",
},
},
"required": ["app"],
},
),
Tool(
name="ax_screenshot",
description="""Take a screenshot of an app or element.
Captures without stealing focus.
Returns base64-encoded PNG image.
Returns: Base64 screenshot data.""",
inputSchema={
"type": "object",
"properties": {
"app": {
"type": "string",
"description": "App name/alias from ax_connect",
},
"query": {
"type": "string",
"description": "Optional: element to screenshot (entire app if not specified)",
},
},
"required": ["app"],
},
),
Tool(
name="ax_wait_idle",
description="""Wait for an app to become idle (no pending UI updates).
Useful before asserting state or taking screenshots.
Returns: Idle status and wait time.""",
inputSchema={
"type": "object",
"properties": {
"app": {
"type": "string",
"description": "App name/alias from ax_connect",
},
"timeout_ms": {
"type": "integer",
"description": "Max time to wait (default: 5000ms)",
"default": 5000,
},
},
"required": ["app"],
},
),
Tool(
name="ax_is_accessible",
description="""Check if accessibility is enabled and working.
Returns: Accessibility status and any setup instructions needed.""",
inputSchema={
"type": "object",
"properties": {},
},
),
]
@server.call_tool()
async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
if not AXTERMINATOR_AVAILABLE:
return [
TextContent(
type="text",
text="Error: axterminator not installed. Install with: pip install axterminator",
)
]
try:
if name == "ax_is_accessible":
is_enabled = ax.is_accessibility_enabled()
if is_enabled:
return [
TextContent(
type="text",
text="✅ Accessibility is enabled and working.",
)
]
else:
return [
TextContent(
type="text",
text="❌ Accessibility not enabled.\n\nTo enable:\n1. Open System Settings > Privacy & Security > Accessibility\n2. Add and enable the terminal app (Terminal, iTerm2, etc.)\n3. Restart the terminal",
)
]
elif name == "ax_connect":
app_id = arguments["app"]
alias = arguments.get("alias")
if app_id.isdigit():
app_obj = ax.app(pid=int(app_id))
elif "." in app_id and app_id.count(".") >= 2:
app_obj = ax.app(bundle_id=app_id)
else:
app_obj = ax.app(name=app_id)
key = alias or app_id
_connected_apps[key] = app_obj
return [
TextContent(
type="text",
text=f"✅ Connected to '{key}'\nReady for GUI testing (background mode by default).",
)
]
elif name == "ax_find":
app = _get_app(arguments["app"])
query = arguments["query"]
timeout = arguments.get("timeout_ms", 5000)
element = app.wait_for_element(query, timeout_ms=timeout)
if element:
info = {
"found": True,
"role": getattr(element, "role", "unknown"),
"title": getattr(element, "title", ""),
"value": getattr(element, "value", ""),
"enabled": getattr(element, "enabled", True),
"position": getattr(element, "position", None),
"size": getattr(element, "size", None),
}
return [
TextContent(
type="text",
text=f"✅ Found element:\n{info}",
)
]
else:
return [
TextContent(
type="text",
text=f"❌ Element not found: '{query}' (timeout: {timeout}ms)",
)
]
elif name == "ax_click":
app = _get_app(arguments["app"])
query = arguments["query"]
mode_str = arguments.get("mode", "background")
click_type = arguments.get("click_type", "single")
mode = ax.BACKGROUND if mode_str == "background" else ax.FOCUS
element = app.find(query)
if not element:
return [TextContent(type="text", text=f"❌ Element not found: '{query}'")]
if click_type == "double":
element.double_click(mode=mode)
elif click_type == "right":
element.right_click(mode=mode)
else:
element.click(mode=mode)
return [
TextContent(
type="text",
text=f"✅ Clicked '{query}' ({click_type}, {mode_str} mode)",
)
]
elif name == "ax_type":
app = _get_app(arguments["app"])
query = arguments["query"]
text = arguments["text"]
mode_str = arguments.get("mode", "focus")
mode = ax.BACKGROUND if mode_str == "background" else ax.FOCUS
element = app.find(query)
if not element:
return [TextContent(type="text", text=f"❌ Element not found: '{query}'")]
element.type_text(text, mode=mode)
return [
TextContent(
type="text",
text=f"✅ Typed {len(text)} chars into '{query}' ({mode_str} mode)",
)
]
elif name == "ax_set_value":
app = _get_app(arguments["app"])
query = arguments["query"]
value = arguments["value"]
element = app.find(query)
if not element:
return [TextContent(type="text", text=f"❌ Element not found: '{query}'")]
element.set_value(value)
return [
TextContent(
type="text",
text=f"✅ Set value on '{query}' to: {value}",
)
]
elif name == "ax_get_value":
app = _get_app(arguments["app"])
query = arguments["query"]
element = app.find(query)
if not element:
return [TextContent(type="text", text=f"❌ Element not found: '{query}'")]
value = getattr(element, "value", None)
return [
TextContent(
type="text",
text=f"Value of '{query}': {value}",
)
]
elif name == "ax_list_windows":
app = _get_app(arguments["app"])
windows = app.windows() if hasattr(app, "windows") else []
if not windows:
return [TextContent(type="text", text="No windows found")]
result = "Windows:\n"
for i, w in enumerate(windows):
title = getattr(w, "title", f"Window {i}")
result += f" {i + 1}. {title}\n"
return [TextContent(type="text", text=result)]
elif name == "ax_screenshot":
app = _get_app(arguments["app"])
query = arguments.get("query")
if query:
element = app.find(query)
if not element:
return [TextContent(type="text", text=f"❌ Element not found: '{query}'")]
screenshot_data = element.screenshot()
else:
screenshot_data = app.screenshot()
if screenshot_data:
b64 = base64.b64encode(screenshot_data).decode("utf-8")
return [
TextContent(
type="text",
text=f"Screenshot captured ({len(screenshot_data)} bytes)\nBase64: {b64[:100]}...",
)
]
else:
return [TextContent(type="text", text="❌ Failed to capture screenshot")]
elif name == "ax_wait_idle":
app = _get_app(arguments["app"])
timeout = arguments.get("timeout_ms", 5000)
if hasattr(app, "wait_idle"):
app.wait_idle(timeout_ms=timeout)
return [TextContent(type="text", text="✅ App is idle")]
else:
await asyncio.sleep(0.1) return [
TextContent(type="text", text="✅ Wait complete (no native idle detection)")
]
else:
return [TextContent(type="text", text=f"Unknown tool: {name}")]
except Exception as e:
logger.exception(f"Error in {name}")
return [TextContent(type="text", text=f"❌ Error: {e}")]
async def main():
async with stdio_server() as (read_stream, write_stream):
await server.run(read_stream, write_stream, server.create_initialization_options())
if __name__ == "__main__":
asyncio.run(main())