caliban_output_styles/learning.rs
1//! Post-processor for the `Learning` style.
2//!
3//! Walks the assistant's text and inserts `TODO(human)` markers as comments
4//! inside fenced code blocks immediately after each function-definition
5//! line. The heuristic is intentionally conservative — we only mark
6//! definition lines we can identify by simple lexical patterns. Anything we
7//! can't identify is left untouched.
8//!
9//! Markers the model already emitted on prose lines are preserved verbatim;
10//! we do not re-tag them. (A future v2 may add a `<learning-todo>` span for
11//! richer TUI highlighting.)
12
13use std::borrow::Cow;
14
15use caliban_agent_core::AssistantPostProcessor;
16
17/// Best-effort `TODO(human)` injector for the Learning style.
18///
19/// For each fenced code block in `text`, we look for lines that begin a
20/// function definition in one of the languages we recognise (Rust, Go,
21/// Python, JavaScript/TypeScript, C/C++/Java). When we find one, we
22/// emit a `TODO(human): ...` comment line immediately afterward, using
23/// the language's comment syntax.
24#[derive(Debug, Clone, Default)]
25pub struct LearningPostProcessor;
26
27impl LearningPostProcessor {
28 /// Construct a new post-processor.
29 #[must_use]
30 pub const fn new() -> Self {
31 Self
32 }
33}
34
35impl AssistantPostProcessor for LearningPostProcessor {
36 fn process<'a>(&self, text: &'a str) -> Cow<'a, str> {
37 let processed = insert_todo_human_markers(text);
38 if processed == text {
39 Cow::Borrowed(text)
40 } else {
41 Cow::Owned(processed)
42 }
43 }
44}
45
46/// Inspect `text` (a completed assistant turn) and insert `TODO(human)`
47/// markers after each function-definition line inside a fenced code block.
48///
49/// Returns the original text unchanged when nothing matches.
50#[must_use]
51pub fn insert_todo_human_markers(text: &str) -> String {
52 let mut out = String::with_capacity(text.len() + 32);
53 let mut in_fence = false;
54 let mut fence_lang: Option<String> = None;
55 for line in text.split_inclusive('\n') {
56 out.push_str(line);
57
58 // Detect fence open / close. The fence marker is "```" optionally
59 // followed by a language tag on the same line.
60 let trimmed = line.trim_end_matches('\n').trim_start();
61 if let Some(rest) = trimmed.strip_prefix("```") {
62 if in_fence {
63 // closing fence
64 in_fence = false;
65 fence_lang = None;
66 } else {
67 in_fence = true;
68 let lang = rest.trim().to_string();
69 fence_lang = if lang.is_empty() { None } else { Some(lang) };
70 }
71 continue;
72 }
73
74 if !in_fence {
75 continue;
76 }
77
78 // We are inside a fenced code block. Check whether this line is a
79 // function definition we recognise.
80 let lang = fence_lang.as_deref().unwrap_or("");
81 if is_function_definition(lang, trimmed) {
82 // Emit a TODO(human) marker on the next line, using the
83 // appropriate comment syntax.
84 let comment_prefix = match lang {
85 "py" | "python" | "sh" | "bash" | "ruby" | "rb" | "yaml" | "yml" | "toml" => "# ",
86 _ => "// ",
87 };
88 // Preserve the indentation of the function-definition line so the
89 // marker lands cleanly inside the body.
90 let indent: String = line
91 .chars()
92 .take_while(|c| *c == ' ' || *c == '\t')
93 .collect();
94 // Add one extra indent step (4 spaces / 1 tab) so the marker
95 // appears inside the function body when the brace-on-same-line
96 // convention is used. For languages where the body lives on the
97 // next line indented further (Python), the indent below still
98 // looks reasonable.
99 let extra = if indent.contains('\t') { "\t" } else { " " };
100 out.push_str(&indent);
101 out.push_str(extra);
102 out.push_str(comment_prefix);
103 out.push_str("TODO(human): fill in this implementation\n");
104 }
105 }
106
107 out
108}
109
110/// Cheap lexical check: does `trimmed_line` start a function definition in
111/// `lang`?
112fn is_function_definition(lang: &str, trimmed_line: &str) -> bool {
113 // Strip trailing whitespace so the brace/colon detection is robust.
114 let line = trimmed_line.trim_end();
115 if line.is_empty() {
116 return false;
117 }
118 match lang {
119 // Rust: `fn name(...)` possibly preceded by visibility / `async` /
120 // `unsafe` / `const` / `pub(crate)` etc. The line must end with `{`
121 // for us to be confident the body opens on the next line.
122 "rs" | "rust" => line.contains(" fn ") || line.starts_with("fn ") || line.contains("\tfn "),
123 // Go: `func name(...)` or `func (recv T) name(...)`, ending with `{`.
124 "go" => line.starts_with("func ") && line.ends_with('{'),
125 // Python: `def name(...)` or `async def name(...)`, ending with `:`.
126 "py" | "python" => {
127 (line.starts_with("def ") || line.starts_with("async def ")) && line.ends_with(':')
128 }
129 // JavaScript / TypeScript: `function name(...)` or arrow assigned to
130 // a `const`/`let`. Conservative — we only catch the `function`
131 // keyword form so we don't misfire on object literals.
132 "js" | "ts" | "jsx" | "tsx" | "javascript" | "typescript" => {
133 (line.starts_with("function ")
134 || line.contains(" function ")
135 || line.starts_with("async function "))
136 && line.ends_with('{')
137 }
138 // C / C++ / Java: best-effort — a non-comment line ending in `{`
139 // that contains `(` and `)` is likely a function head. We require
140 // `lang` to be one of these so we don't misfire on Rust blocks.
141 "c" | "cpp" | "cc" | "h" | "hpp" | "java" | "kotlin" | "kt" => {
142 line.ends_with('{') && line.contains('(') && line.contains(')')
143 }
144 _ => {
145 // Default: only fire on an unambiguous Rust-style `fn` keyword
146 // even when no language tag is present, since `fn` is rare
147 // enough in prose that false positives are unlikely.
148 line.starts_with("fn ") && (line.ends_with('{') || line.ends_with("{ "))
149 }
150 }
151}
152
153/// Identity post-processor — used by all non-Learning styles. Returns the
154/// input unchanged via [`Cow::Borrowed`].
155#[derive(Debug, Clone, Copy, Default)]
156pub struct IdentityPostProcessor;
157
158impl IdentityPostProcessor {
159 /// Construct a new identity post-processor.
160 #[must_use]
161 pub const fn new() -> Self {
162 Self
163 }
164}
165
166impl AssistantPostProcessor for IdentityPostProcessor {
167 fn process<'a>(&self, text: &'a str) -> Cow<'a, str> {
168 Cow::Borrowed(text)
169 }
170}