use serde_json::{json, Value};
use crate::llm::capabilities::{Capabilities, ComputerUseStyle, ScreenshotScaling};
#[allow(dead_code)]
pub(crate) const COMPUTER_USE_AUDIT_TOPIC: &str = "audit.computer_use";
pub(crate) const COMPUTER_TOOL_NAME: &str = "computer";
pub(crate) const DEFAULT_DISPLAY_WIDTH: u32 = 1024;
pub(crate) const DEFAULT_DISPLAY_HEIGHT: u32 = 768;
fn tool_name(tool: &Value) -> Option<&str> {
tool.get("name")
.or_else(|| tool.get("function").and_then(|f| f.get("name")))
.and_then(Value::as_str)
}
pub(crate) fn is_computer_function_tool(tool: &Value) -> bool {
let ty = tool.get("type").and_then(Value::as_str);
if ty.is_some_and(|ty| ty.starts_with("computer")) {
return false;
}
tool_name(tool) == Some(COMPUTER_TOOL_NAME)
}
pub(crate) fn environment_for_os() -> &'static str {
match std::env::consts::OS {
"macos" => "mac",
"windows" => "windows",
_ => "ubuntu",
}
}
pub(crate) fn anthropic_computer_tool(display_width_px: u32, display_height_px: u32) -> Value {
json!({
"type": "computer_20251124",
"name": COMPUTER_TOOL_NAME,
"display_width_px": display_width_px,
"display_height_px": display_height_px,
"display_number": 1,
"enable_zoom": true,
})
}
pub(crate) fn openai_computer_tool(
display_width: u32,
display_height: u32,
environment: &str,
) -> Value {
json!({
"type": "computer",
"display_width": display_width,
"display_height": display_height,
"environment": environment,
})
}
fn native_computer_projection_enabled() -> bool {
matches!(
std::env::var("BURIN_COMPUTER_USE_NATIVE")
.unwrap_or_default()
.trim()
.to_ascii_lowercase()
.as_str(),
"1" | "on" | "true"
)
}
pub(crate) fn project_computer_tools(
caps: &Capabilities,
native_tools: &mut Option<Vec<Value>>,
provider_tools: &mut Vec<Value>,
) {
project_computer_tools_with(
caps,
native_tools,
provider_tools,
native_computer_projection_enabled(),
);
}
fn project_computer_tools_with(
caps: &Capabilities,
native_tools: &mut Option<Vec<Value>>,
provider_tools: &mut Vec<Value>,
enable_native: bool,
) {
if !enable_native {
return;
}
let style = match caps.computer_use_style {
Some(style @ (ComputerUseStyle::NativeAnthropic | ComputerUseStyle::NativeOpenai)) => style,
_ => return,
};
let Some(tools) = native_tools.as_mut() else {
return;
};
if !tools.iter().any(is_computer_function_tool) {
return;
}
tools.retain(|tool| !is_computer_function_tool(tool));
let (width, height) = (DEFAULT_DISPLAY_WIDTH, DEFAULT_DISPLAY_HEIGHT);
let native = match style {
ComputerUseStyle::NativeAnthropic => anthropic_computer_tool(width, height),
_ => openai_computer_tool(width, height, environment_for_os()),
};
provider_tools.push(native);
}
#[allow(dead_code)] fn fit_within(width: u32, height: u32, max_w: u32, max_h: u32) -> (u32, u32) {
if width == 0 || height == 0 {
return (width, height);
}
if width <= max_w && height <= max_h {
return (width, height);
}
let scale = (f64::from(max_w) / f64::from(width)).min(f64::from(max_h) / f64::from(height));
let scaled_w = ((f64::from(width) * scale).round() as u32).max(1);
let scaled_h = ((f64::from(height) * scale).round() as u32).max(1);
(scaled_w, scaled_h)
}
#[allow(dead_code)]
pub(crate) fn scale_screenshot(
width: u32,
height: u32,
style: Option<ScreenshotScaling>,
) -> (u32, u32) {
match style {
Some(ScreenshotScaling::Xga) => {
fit_within(width, height, DEFAULT_DISPLAY_WIDTH, DEFAULT_DISPLAY_HEIGHT)
}
_ => (width, height),
}
}
#[allow(dead_code)]
pub(crate) fn map_coord_to_target(
native_xy: (i32, i32),
native_dims: (u32, u32),
target_dims: (u32, u32),
) -> (i32, i32) {
let (nx, ny) = native_xy;
let (nw, nh) = native_dims;
let (tw, th) = target_dims;
let mx = if nw == 0 {
nx
} else {
(f64::from(nx) * f64::from(tw) / f64::from(nw)).round() as i32
};
let my = if nh == 0 {
ny
} else {
(f64::from(ny) * f64::from(th) / f64::from(nh)).round() as i32
};
(mx, my)
}
#[allow(dead_code)]
pub(crate) fn map_coord_back(
model_xy: (i32, i32),
target_dims: (u32, u32),
native_dims: (u32, u32),
) -> (i32, i32) {
let (mx, my) = model_xy;
let (tw, th) = target_dims;
let (nw, nh) = native_dims;
let nx = if tw == 0 {
mx
} else {
(f64::from(mx) * f64::from(nw) / f64::from(tw)).round() as i32
};
let ny = if th == 0 {
my
} else {
(f64::from(my) * f64::from(nh) / f64::from(th)).round() as i32
};
(nx, ny)
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
pub(crate) struct GroundingElement {
pub reference: String,
pub role: String,
pub name: String,
pub x: i32,
pub y: i32,
pub width: i32,
pub height: i32,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
pub(crate) enum GroundingTarget {
Element {
reference: String,
},
Mark {
id: String,
},
Point {
x: i32,
y: i32,
},
}
#[allow(dead_code)]
fn bbox_center(element: &GroundingElement) -> (i32, i32) {
(
element.x + element.width / 2,
element.y + element.height / 2,
)
}
#[allow(dead_code)]
pub(crate) fn resolve_grounding(
elements: &[GroundingElement],
target: &GroundingTarget,
) -> Option<(i32, i32)> {
match target {
GroundingTarget::Point { x, y } => Some((*x, *y)),
GroundingTarget::Element { reference } => elements
.iter()
.find(|element| &element.reference == reference)
.map(bbox_center),
GroundingTarget::Mark { id } => elements
.iter()
.find(|element| &element.reference == id)
.or_else(|| {
id.parse::<usize>()
.ok()
.filter(|index| *index >= 1)
.and_then(|index| elements.get(index - 1))
})
.map(bbox_center),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn caps_with_style(style: ComputerUseStyle) -> Capabilities {
Capabilities {
computer_use_style: Some(style),
..Capabilities::default()
}
}
fn function_tool(name: &str) -> Value {
json!({"type": "function", "function": {"name": name}})
}
#[test]
fn anthropic_native_tool_golden_shape() {
assert_eq!(
anthropic_computer_tool(1024, 768),
json!({
"type": "computer_20251124",
"name": "computer",
"display_width_px": 1024,
"display_height_px": 768,
"display_number": 1,
"enable_zoom": true,
})
);
}
#[test]
fn openai_native_tool_golden_shape() {
assert_eq!(
openai_computer_tool(1440, 900, "mac"),
json!({
"type": "computer",
"display_width": 1440,
"display_height": 900,
"environment": "mac",
})
);
}
#[test]
fn projects_native_anthropic_and_suppresses_function_copy() {
let caps = caps_with_style(ComputerUseStyle::NativeAnthropic);
let mut native = Some(vec![function_tool("read_file"), function_tool("computer")]);
let mut provider = Vec::new();
project_computer_tools_with(&caps, &mut native, &mut provider, true);
let remaining = native.unwrap();
assert_eq!(remaining.len(), 1);
assert_eq!(tool_name(&remaining[0]), Some("read_file"));
assert_eq!(provider.len(), 1);
assert_eq!(provider[0]["type"], "computer_20251124");
assert_eq!(provider[0]["display_width_px"], 1024);
}
#[test]
fn projects_native_openai_and_suppresses_function_copy() {
let caps = caps_with_style(ComputerUseStyle::NativeOpenai);
let mut native = Some(vec![function_tool("computer")]);
let mut provider = Vec::new();
project_computer_tools_with(&caps, &mut native, &mut provider, true);
assert!(native.unwrap().is_empty());
assert_eq!(provider.len(), 1);
assert_eq!(provider[0]["type"], "computer");
assert!(provider[0].get("environment").is_some());
}
#[test]
fn function_style_leaves_tool_untouched() {
for style in [ComputerUseStyle::Function, ComputerUseStyle::Grounded] {
let caps = caps_with_style(style);
let mut native = Some(vec![function_tool("computer")]);
let mut provider = Vec::new();
project_computer_tools_with(&caps, &mut native, &mut provider, true);
assert_eq!(native.as_ref().unwrap().len(), 1, "{style:?}");
assert!(provider.is_empty(), "{style:?}");
}
}
#[test]
fn projection_is_idempotent() {
let caps = caps_with_style(ComputerUseStyle::NativeAnthropic);
let mut native = Some(vec![function_tool("computer")]);
let mut provider = Vec::new();
project_computer_tools_with(&caps, &mut native, &mut provider, true);
project_computer_tools_with(&caps, &mut native, &mut provider, true);
assert!(native.unwrap().is_empty());
assert_eq!(provider.len(), 1);
}
#[test]
fn xga_scaling_fits_and_original_is_identity() {
assert_eq!(
scale_screenshot(1920, 1080, Some(ScreenshotScaling::Xga)),
(1024, 576)
);
assert_eq!(
scale_screenshot(800, 600, Some(ScreenshotScaling::Xga)),
(800, 600)
);
assert_eq!(
scale_screenshot(1920, 1080, Some(ScreenshotScaling::Original)),
(1920, 1080)
);
assert_eq!(scale_screenshot(1920, 1080, None), (1920, 1080));
}
#[test]
fn coordinate_roundtrip_within_one_pixel() {
let native_dims = (1920, 1080);
let target_dims =
scale_screenshot(native_dims.0, native_dims.1, Some(ScreenshotScaling::Xga));
for native in [(0, 0), (960, 540), (1919, 1079), (100, 999)] {
let model = map_coord_to_target(native, native_dims, target_dims);
let back = map_coord_back(model, target_dims, native_dims);
assert!(
(back.0 - native.0).abs() <= 1 && (back.1 - native.1).abs() <= 1,
"native {native:?} -> model {model:?} -> back {back:?}"
);
}
}
#[test]
fn original_scaling_coordinate_identity() {
let dims = (1440, 900);
let target = scale_screenshot(dims.0, dims.1, Some(ScreenshotScaling::Original));
assert_eq!(target, dims);
assert_eq!(map_coord_back((123, 456), target, dims), (123, 456));
}
#[test]
fn grounding_resolves_element_mark_and_point() {
let elements = vec![
GroundingElement {
reference: "el-a".to_string(),
role: "AXButton".to_string(),
name: "OK".to_string(),
x: 100,
y: 200,
width: 40,
height: 20,
},
GroundingElement {
reference: "el-b".to_string(),
role: "AXTextField".to_string(),
name: "Search".to_string(),
x: 0,
y: 0,
width: 10,
height: 10,
},
];
assert_eq!(
resolve_grounding(
&elements,
&GroundingTarget::Element {
reference: "el-a".to_string()
}
),
Some((120, 210))
);
assert_eq!(
resolve_grounding(
&elements,
&GroundingTarget::Mark {
id: "2".to_string()
}
),
Some((5, 5))
);
assert_eq!(
resolve_grounding(&elements, &GroundingTarget::Point { x: 7, y: 9 }),
Some((7, 9))
);
assert_eq!(
resolve_grounding(
&elements,
&GroundingTarget::Element {
reference: "nope".to_string()
}
),
None
);
}
#[test]
fn audit_topic_is_stable() {
assert_eq!(COMPUTER_USE_AUDIT_TOPIC, "audit.computer_use");
}
}