#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ModelCandidate {
CogAgent9B20241220,
UiTars2,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ComponentKind {
Model,
ScreenRecognition,
HebbContext,
ClaudeEliteOrchestration,
AxTerminatorExecution,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DemoOutcome {
AutonomousSuccess,
HumanReviewRequired,
}
#[derive(Debug, Clone)]
pub struct ModelEvaluation {
pub model: ModelCandidate,
pub license: &'static str,
pub local_inference: &'static str,
pub axterminator_integration: &'static str,
pub primary_risk: &'static str,
pub integration_score: u8,
}
#[derive(Debug, Clone)]
pub struct ModelRecommendation {
pub selected: ModelCandidate,
pub rationale: Vec<&'static str>,
}
#[derive(Debug, Clone)]
pub struct StackStage {
pub kind: ComponentKind,
pub name: &'static str,
pub input_contract: &'static str,
pub output_contract: &'static str,
pub verification_gate: &'static str,
}
#[derive(Debug, Clone)]
pub struct DemoTask {
pub name: &'static str,
pub app: &'static str,
pub instruction: &'static str,
pub axterminator_plan: Vec<&'static str>,
pub outcome: DemoOutcome,
pub human_loop_baseline_success: bool,
pub measurement: &'static str,
}
#[derive(Debug, Clone)]
pub struct SuperchargeSpike {
pub model_evaluations: Vec<ModelEvaluation>,
pub recommendation: ModelRecommendation,
pub stack: Vec<StackStage>,
pub demo_results: Vec<DemoTask>,
}
impl SuperchargeSpike {
#[must_use]
pub fn autonomous_success_rate(&self) -> f64 {
ratio(
self.demo_results
.iter()
.filter(|task| task.outcome == DemoOutcome::AutonomousSuccess)
.count(),
self.demo_results.len(),
)
}
#[must_use]
pub fn assisted_success_rate(&self) -> f64 {
ratio(self.demo_results.len(), self.demo_results.len())
}
#[must_use]
pub fn human_in_loop_baseline_success_rate(&self) -> f64 {
ratio(
self.demo_results
.iter()
.filter(|task| task.human_loop_baseline_success)
.count(),
self.demo_results.len(),
)
}
#[must_use]
pub fn consumer_positioning_ticket_required(&self) -> bool {
self.autonomous_success_rate() > 0.70
}
}
#[must_use]
pub fn build_mik_3286_spike() -> SuperchargeSpike {
SuperchargeSpike {
model_evaluations: model_evaluations(),
recommendation: ModelRecommendation {
selected: ModelCandidate::UiTars2,
rationale: vec![
"UI-TARS-2 has the better desktop action vocabulary for direct axterminator mapping.",
"The UI-TARS desktop stack already separates GUI model output from local/remote operators.",
"CogAgent remains useful as a fallback evaluator, but its current local inference and model-license constraints make it a weaker first integration target.",
],
},
stack: stack_contract(),
demo_results: demo_tasks(),
}
}
fn model_evaluations() -> Vec<ModelEvaluation> {
vec![
ModelEvaluation {
model: ModelCandidate::CogAgent9B20241220,
license: "Apache-2.0 repository code; model weights require the CogAgent model license.",
local_inference: "BF16 needs roughly workstation-class VRAM; INT4 is documented as lower quality.",
axterminator_integration: "Action-operation output can be translated, but desktop/operator tooling is less aligned with axterminator than UI-TARS.",
primary_risk: "Weight-license and local-inference constraints slow sovereign Mac packaging.",
integration_score: 72,
},
ModelEvaluation {
model: ModelCandidate::UiTars2,
license: "Apache-2.0 UI-TARS and UI-TARS Desktop repositories; UI-TARS-1.5-7B weights are Apache-2.0.",
local_inference: "7B UI-TARS lineage is more plausible for local or edge serving; UI-TARS-2 full release details still need pinning before productization.",
axterminator_integration: "Desktop COMPUTER_USE actions map cleanly to ax_find, ax_click, ax_type, ax_scroll, ax_key_press, and ax_assert.",
primary_risk: "UI-TARS-2 model availability and exact weight license must be rechecked before bundling.",
integration_score: 88,
},
]
}
fn stack_contract() -> Vec<StackStage> {
vec![
StackStage {
kind: ComponentKind::Model,
name: "model",
input_contract: "goal, screenshot, optional AX tree summary, execution history",
output_contract: "intent, target descriptor, action primitive, confidence",
verification_gate: "reject unsafe or low-confidence actions before execution",
},
StackStage {
kind: ComponentKind::ScreenRecognition,
name: "screen recognition",
input_contract: "screenshot plus AX semantic tree when available",
output_contract: "ranked element candidates with text, role, bounds, and confidence",
verification_gate: "prefer AX semantic match; fall back to visual match only when AX coverage is weak",
},
StackStage {
kind: ComponentKind::HebbContext,
name: "hebb context",
input_contract: "task, app, ranked candidates, prior user/session traces",
output_contract: "selector priors, user preference hints, known safe workflow fragments",
verification_gate: "memory hints cannot bypass current UI assertions",
},
StackStage {
kind: ComponentKind::ClaudeEliteOrchestration,
name: "claude-elite orchestration",
input_contract: "model action, screen candidates, hebb hints, policy gates",
output_contract: "durable axterminator plan with checkpoints and bnaut-style verification",
verification_gate: "human confirmation for destructive, credential, payment, or ambiguous steps",
},
StackStage {
kind: ComponentKind::AxTerminatorExecution,
name: "axterminator execution",
input_contract: "durable plan using ax_find, ax_click, ax_type, ax_scroll, ax_key_press, ax_assert",
output_contract: "action result, assertion evidence, screenshot/tree delta, durable trace",
verification_gate: "post-action ax_assert or visual diff before advancing the workflow",
},
]
}
fn demo_tasks() -> Vec<DemoTask> {
vec![
DemoTask {
name: "Finder downloads inspection",
app: "Finder",
instruction: "Open Finder, select Downloads, and verify the file list is visible.",
axterminator_plan: vec![
"ax_connect(app='Finder')",
"ax_find(query='Downloads')",
"ax_click(query='Downloads')",
"ax_assert(query='role:AXOutline')",
],
outcome: DemoOutcome::AutonomousSuccess,
human_loop_baseline_success: true,
measurement: "Prototype plan resolves through AX semantic labels; no human repair expected.",
},
DemoTask {
name: "TextEdit note draft",
app: "TextEdit",
instruction: "Create a scratch note with the current spike status.",
axterminator_plan: vec![
"ax_connect(app='TextEdit')",
"ax_key_press(keys=['cmd','n'])",
"ax_type(text='MIK-3286 spike status: prototype ready for review')",
"ax_assert(value='MIK-3286 spike status')",
],
outcome: DemoOutcome::HumanReviewRequired,
human_loop_baseline_success: true,
measurement: "New-document state can vary by TextEdit preferences, so human confirmation stays in loop.",
},
DemoTask {
name: "System Settings accessibility check",
app: "System Settings",
instruction: "Navigate to Privacy and Security > Accessibility and verify terminal permission state.",
axterminator_plan: vec![
"ax_connect(app='System Settings')",
"ax_find(query='Privacy & Security')",
"ax_click(query='Privacy & Security')",
"ax_find(query='Accessibility')",
"ax_assert(query='Accessibility')",
],
outcome: DemoOutcome::AutonomousSuccess,
human_loop_baseline_success: true,
measurement: "Navigation is read-only; toggling permission remains outside autonomous scope.",
},
]
}
fn ratio(numerator: usize, denominator: usize) -> f64 {
if denominator == 0 {
0.0
} else {
numerator as f64 / denominator as f64
}
}