Skip to main content

caliban_output_styles/
learning.rs

1//! Post-processor for the `Learning` style.
2//!
3//! Walks the assistant's text and inserts `TODO(human)` markers as comments
4//! inside fenced code blocks immediately after each function-definition
5//! line. The heuristic is intentionally conservative — we only mark
6//! definition lines we can identify by simple lexical patterns. Anything we
7//! can't identify is left untouched.
8//!
9//! Markers the model already emitted on prose lines are preserved verbatim;
10//! we do not re-tag them. (A future v2 may add a `<learning-todo>` span for
11//! richer TUI highlighting.)
12
13use std::borrow::Cow;
14
15use caliban_agent_core::AssistantPostProcessor;
16
17/// Best-effort `TODO(human)` injector for the Learning style.
18///
19/// For each fenced code block in `text`, we look for lines that begin a
20/// function definition in one of the languages we recognise (Rust, Go,
21/// Python, JavaScript/TypeScript, C/C++/Java). When we find one, we
22/// emit a `TODO(human): ...` comment line immediately afterward, using
23/// the language's comment syntax.
24#[derive(Debug, Clone, Default)]
25pub struct LearningPostProcessor;
26
27impl LearningPostProcessor {
28    /// Construct a new post-processor.
29    #[must_use]
30    pub const fn new() -> Self {
31        Self
32    }
33}
34
35impl AssistantPostProcessor for LearningPostProcessor {
36    fn process<'a>(&self, text: &'a str) -> Cow<'a, str> {
37        let processed = insert_todo_human_markers(text);
38        if processed == text {
39            Cow::Borrowed(text)
40        } else {
41            Cow::Owned(processed)
42        }
43    }
44}
45
46/// Inspect `text` (a completed assistant turn) and insert `TODO(human)`
47/// markers after each function-definition line inside a fenced code block.
48///
49/// Returns the original text unchanged when nothing matches.
50#[must_use]
51pub fn insert_todo_human_markers(text: &str) -> String {
52    let mut out = String::with_capacity(text.len() + 32);
53    let mut in_fence = false;
54    let mut fence_lang: Option<String> = None;
55    for line in text.split_inclusive('\n') {
56        out.push_str(line);
57
58        // Detect fence open / close. The fence marker is "```" optionally
59        // followed by a language tag on the same line.
60        let trimmed = line.trim_end_matches('\n').trim_start();
61        if let Some(rest) = trimmed.strip_prefix("```") {
62            if in_fence {
63                // closing fence
64                in_fence = false;
65                fence_lang = None;
66            } else {
67                in_fence = true;
68                let lang = rest.trim().to_string();
69                fence_lang = if lang.is_empty() { None } else { Some(lang) };
70            }
71            continue;
72        }
73
74        if !in_fence {
75            continue;
76        }
77
78        // We are inside a fenced code block. Check whether this line is a
79        // function definition we recognise.
80        let lang = fence_lang.as_deref().unwrap_or("");
81        if is_function_definition(lang, trimmed) {
82            // Emit a TODO(human) marker on the next line, using the
83            // appropriate comment syntax.
84            let comment_prefix = match lang {
85                "py" | "python" | "sh" | "bash" | "ruby" | "rb" | "yaml" | "yml" | "toml" => "# ",
86                _ => "// ",
87            };
88            // Preserve the indentation of the function-definition line so the
89            // marker lands cleanly inside the body.
90            let indent: String = line
91                .chars()
92                .take_while(|c| *c == ' ' || *c == '\t')
93                .collect();
94            // Add one extra indent step (4 spaces / 1 tab) so the marker
95            // appears inside the function body when the brace-on-same-line
96            // convention is used. For languages where the body lives on the
97            // next line indented further (Python), the indent below still
98            // looks reasonable.
99            let extra = if indent.contains('\t') { "\t" } else { "    " };
100            out.push_str(&indent);
101            out.push_str(extra);
102            out.push_str(comment_prefix);
103            out.push_str("TODO(human): fill in this implementation\n");
104        }
105    }
106
107    out
108}
109
110/// Cheap lexical check: does `trimmed_line` start a function definition in
111/// `lang`?
112fn is_function_definition(lang: &str, trimmed_line: &str) -> bool {
113    // Strip trailing whitespace so the brace/colon detection is robust.
114    let line = trimmed_line.trim_end();
115    if line.is_empty() {
116        return false;
117    }
118    match lang {
119        // Rust: `fn name(...)` possibly preceded by visibility / `async` /
120        // `unsafe` / `const` / `pub(crate)` etc. The line must end with `{`
121        // for us to be confident the body opens on the next line.
122        "rs" | "rust" => line.contains(" fn ") || line.starts_with("fn ") || line.contains("\tfn "),
123        // Go: `func name(...)` or `func (recv T) name(...)`, ending with `{`.
124        "go" => line.starts_with("func ") && line.ends_with('{'),
125        // Python: `def name(...)` or `async def name(...)`, ending with `:`.
126        "py" | "python" => {
127            (line.starts_with("def ") || line.starts_with("async def ")) && line.ends_with(':')
128        }
129        // JavaScript / TypeScript: `function name(...)` or arrow assigned to
130        // a `const`/`let`. Conservative — we only catch the `function`
131        // keyword form so we don't misfire on object literals.
132        "js" | "ts" | "jsx" | "tsx" | "javascript" | "typescript" => {
133            (line.starts_with("function ")
134                || line.contains(" function ")
135                || line.starts_with("async function "))
136                && line.ends_with('{')
137        }
138        // C / C++ / Java: best-effort — a non-comment line ending in `{`
139        // that contains `(` and `)` is likely a function head. We require
140        // `lang` to be one of these so we don't misfire on Rust blocks.
141        "c" | "cpp" | "cc" | "h" | "hpp" | "java" | "kotlin" | "kt" => {
142            line.ends_with('{') && line.contains('(') && line.contains(')')
143        }
144        _ => {
145            // Default: only fire on an unambiguous Rust-style `fn` keyword
146            // even when no language tag is present, since `fn` is rare
147            // enough in prose that false positives are unlikely.
148            line.starts_with("fn ") && (line.ends_with('{') || line.ends_with("{ "))
149        }
150    }
151}
152
153/// Identity post-processor — used by all non-Learning styles. Returns the
154/// input unchanged via [`Cow::Borrowed`].
155#[derive(Debug, Clone, Copy, Default)]
156pub struct IdentityPostProcessor;
157
158impl IdentityPostProcessor {
159    /// Construct a new identity post-processor.
160    #[must_use]
161    pub const fn new() -> Self {
162        Self
163    }
164}
165
166impl AssistantPostProcessor for IdentityPostProcessor {
167    fn process<'a>(&self, text: &'a str) -> Cow<'a, str> {
168        Cow::Borrowed(text)
169    }
170}