Skip to main content

bifp_core/
plan.rs

1//! Teach-then-handoff task decomposition — the mechanic behind BIFP's core complaint:
2//! an agent should neither refuse a step outright nor silently complete it invisibly.
3//! Confident steps proceed; steps needing human authority or context are surfaced
4//! explicitly as things to teach or hand back, not silently skipped.
5
6use crate::trit::{decide, Trit};
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Subtask {
11    pub label: String,
12    /// Evidence on [-1.0, 1.0] for how confidently the agent can proceed alone.
13    pub confidence: f64,
14}
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
17#[cfg_attr(feature = "mcp", derive(schemars::JsonSchema))]
18pub struct PlannedTask {
19    pub label: String,
20    pub confidence: f64,
21    pub trit: i8,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
25#[cfg_attr(feature = "mcp", derive(schemars::JsonSchema))]
26pub struct Plan {
27    pub goal: String,
28    /// Steps the agent proceeds on directly.
29    pub action_queue: Vec<PlannedTask>,
30    /// Steps that need human input, authority, or teaching before they can proceed —
31    /// not a dead end: the explicit list of what to explain or hand back.
32    pub hold_queue: Vec<PlannedTask>,
33    pub overall_trit: i8,
34}
35
36/// Splits subtasks into an action queue and a hold queue. A subtask lands in the hold
37/// queue only when its own confidence classifies as Reject — Tend and Affirm steps
38/// proceed. `confidence` is a 0..1 "can I do this alone" score, remapped to the -1..1
39/// evidence scale via `(c - 0.5) * 2` before classification.
40///
41/// The remapping was reverse-engineered from the reference tool's live output on
42/// 2026-07-02 (confidence 0.95/0.85/0.80 -> affirm, 0.55/0.40 -> tend, 0.30/0.15 ->
43/// reject) rather than from its docstring, which claims a plain >=0.7 cutoff that the
44/// observed output does not actually match. This reimplementation follows what was
45/// observed, not what was documented, and says so.
46pub fn plan(goal: impl Into<String>, subtasks: &[Subtask]) -> Plan {
47    let mut action_queue = Vec::new();
48    let mut hold_queue = Vec::new();
49    let mut trits = Vec::new();
50
51    for s in subtasks {
52        let evidence = (s.confidence - 0.5) * 2.0;
53        let (trit, _conf) = decide(&[evidence]);
54        trits.push(evidence);
55        let pt = PlannedTask {
56            label: s.label.clone(),
57            confidence: s.confidence,
58            trit: trit.as_i8(),
59        };
60        if trit == Trit::Reject {
61            hold_queue.push(pt);
62        } else {
63            action_queue.push(pt);
64        }
65    }
66
67    let (overall, _c) = decide(&trits);
68    Plan {
69        goal: goal.into(),
70        action_queue,
71        hold_queue,
72        overall_trit: overall.as_i8(),
73    }
74}
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79
80    #[test]
81    fn low_confidence_steps_go_to_hold_queue() {
82        let subtasks = vec![
83            Subtask { label: "decompile".into(), confidence: 0.95 },
84            Subtask { label: "get human sign-off".into(), confidence: 0.15 },
85        ];
86        let p = plan("ship it", &subtasks);
87        assert_eq!(p.action_queue.len(), 1);
88        assert_eq!(p.hold_queue.len(), 1);
89        assert_eq!(p.hold_queue[0].label, "get human sign-off");
90    }
91
92    /// Reproduces the exact 7-subtask real-world case from the 2026-07-02 runbook.
93    #[test]
94    fn matches_observed_runbook_split() {
95        let subtasks = vec![
96            Subtask { label: "decompile".into(), confidence: 0.95 },
97            Subtask { label: "classify severity".into(), confidence: 0.85 },
98            Subtask { label: "red-flag vs magenkraempfe".into(), confidence: 0.55 },
99            Subtask { label: "pick price tier".into(), confidence: 0.40 },
100            Subtask { label: "drei fragen block".into(), confidence: 0.80 },
101            Subtask { label: "thread consolidation".into(), confidence: 0.30 },
102            Subtask { label: "human sign-off".into(), confidence: 0.15 },
103        ];
104        let p = plan("ship a companion R1", &subtasks);
105        assert_eq!(p.action_queue.len(), 5);
106        assert_eq!(p.hold_queue.len(), 2);
107        assert_eq!(p.hold_queue[0].label, "thread consolidation");
108        assert_eq!(p.hold_queue[1].label, "human sign-off");
109    }
110}