Skip to main content

tj_core/
finalize.rs

1//! Finalize — bring a legacy task to a finished shape.
2//!
3//! One LLM call reads a task's full event history and returns a judgment:
4//! a human-readable title, a one-sentence outcome, and whether the events
5//! clearly show the task was finished (so `complete` may close it). The
6//! model decides — same word in different contexts misleads heuristics, and
7//! a title like "пТак обясни…" is natural language yet a useless title, so
8//! only a reader of the whole history can call it.
9
10use crate::llm::LlmBackend;
11use anyhow::Context;
12use serde::Deserialize;
13
14/// The model's verdict on a task, distilled from its events.
15#[derive(Debug, Clone, PartialEq, Deserialize)]
16pub struct FinalizeJudgment {
17    /// True when the current title is a poor description of the task and
18    /// should be replaced by `title`. False echoes a good human title back.
19    #[serde(default)]
20    pub retitle: bool,
21    /// A short human-readable title (≈5–10 words).
22    #[serde(default)]
23    pub title: String,
24    /// True only when the events clearly show the task was finished. When
25    /// unclear, the model leaves it false and `complete` keeps it open.
26    #[serde(default)]
27    pub done: bool,
28    /// `done` | `abandoned` | `superseded` — only used when `done` is true.
29    #[serde(default)]
30    pub outcome_tag: String,
31    /// One sentence: what actually happened / where the task ended.
32    #[serde(default)]
33    pub outcome: String,
34    /// Short rationale for done / still-open — shown to the user.
35    #[serde(default)]
36    pub reason: String,
37}
38
39impl FinalizeJudgment {
40    /// Apply the proposed title only when the model flagged the current one
41    /// as poor AND offered a non-empty, genuinely different replacement.
42    pub fn should_apply_title(&self, current_title: &str) -> bool {
43        self.retitle && !self.title.trim().is_empty() && self.title.trim() != current_title.trim()
44    }
45
46    /// Map the model's tag to the validated close enum; falls back to `done`
47    /// for an empty/unknown tag so the close path never rejects it.
48    pub fn normalized_tag(&self) -> &str {
49        match self.outcome_tag.trim() {
50            "abandoned" => "abandoned",
51            "superseded" => "superseded",
52            _ => "done",
53        }
54    }
55}
56
57/// Build the judge prompt from a task's current title and its event lines
58/// (each line pre-formatted as `[type] text` by the caller).
59pub fn build_prompt(current_title: &str, event_lines: &[String]) -> String {
60    let history = event_lines.join("\n");
61    format!(
62        "You are finalizing a software task's journal. Read its full history \
63and reply with ONE JSON object, nothing else.\n\n\
64Current title: {current_title}\n\n\
65Event history (oldest first):\n{history}\n\n\
66Return exactly this JSON shape:\n\
67{{\n\
68  \"retitle\": <true if the current title is a poor description of the task \
69(a log line, a chat echo, a URL, a file path, a question fragment) and should \
70be replaced; false if it already names the task well>,\n\
71  \"title\": \"<a short, human-readable task title, 5-10 words, in the language \
72of the history; echo the current title if retitle is false>\",\n\
73  \"done\": <true ONLY if the events clearly show the task was finished \
74(fix shipped, question answered, decision carried out); false if it is \
75unclear or still in progress>,\n\
76  \"outcome_tag\": \"<done | abandoned | superseded>\",\n\
77  \"outcome\": \"<one sentence: what actually happened or where it ended>\",\n\
78  \"reason\": \"<short: why you judged it done or still open>\"\n\
79}}\n\
80Be conservative about \"done\": if the history does not clearly show the task \
81was completed, set done=false."
82    )
83}
84
85/// Parse the model reply (a JSON object, possibly inside a ```json fence).
86pub fn parse_judgment(text: &str) -> anyhow::Result<FinalizeJudgment> {
87    let json_str = text
88        .trim()
89        .trim_start_matches("```json")
90        .trim_start_matches("```")
91        .trim_end_matches("```")
92        .trim();
93    // Tolerate leading/trailing prose by slicing to the outermost braces.
94    let slice = match (json_str.find('{'), json_str.rfind('}')) {
95        (Some(a), Some(b)) if b > a => &json_str[a..=b],
96        _ => json_str,
97    };
98    serde_json::from_str(slice)
99        .with_context(|| format!("finalize JSON parse failed; got: {json_str}"))
100}
101
102/// One judge call: prompt → model → parsed judgment.
103pub fn judge(
104    current_title: &str,
105    event_lines: &[String],
106    backend: &dyn LlmBackend,
107) -> anyhow::Result<FinalizeJudgment> {
108    let prompt = build_prompt(current_title, event_lines);
109    let reply = backend.complete(&prompt, 512)?;
110    parse_judgment(&reply)
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    struct MockBackend(String);
118    impl LlmBackend for MockBackend {
119        fn complete(&self, _prompt: &str, _max_tokens: u32) -> anyhow::Result<String> {
120            Ok(self.0.clone())
121        }
122        fn name(&self) -> &'static str {
123            "mock"
124        }
125    }
126
127    #[test]
128    fn parses_plain_json() {
129        let j = parse_judgment(
130            r#"{"retitle":true,"title":"Fix voucher refund","done":true,
131                "outcome_tag":"done","outcome":"Refunded the missing 50%.","reason":"Fix shipped."}"#,
132        )
133        .unwrap();
134        assert!(j.retitle);
135        assert_eq!(j.title, "Fix voucher refund");
136        assert!(j.done);
137        assert_eq!(j.normalized_tag(), "done");
138    }
139
140    #[test]
141    fn parses_fenced_json_with_prose() {
142        let reply = "Here is the result:\n```json\n{\"retitle\":false,\"title\":\"Keep me\",\
143\"done\":false,\"outcome_tag\":\"\",\"outcome\":\"\",\"reason\":\"still investigating\"}\n```\n";
144        let j = parse_judgment(reply).unwrap();
145        assert!(!j.retitle);
146        assert!(!j.done);
147        assert_eq!(j.reason, "still investigating");
148    }
149
150    #[test]
151    fn unknown_tag_falls_back_to_done() {
152        let j = FinalizeJudgment {
153            retitle: false,
154            title: String::new(),
155            done: true,
156            outcome_tag: "weird".into(),
157            outcome: String::new(),
158            reason: String::new(),
159        };
160        assert_eq!(j.normalized_tag(), "done");
161    }
162
163    #[test]
164    fn should_apply_title_only_when_flagged_and_different() {
165        let mut j = FinalizeJudgment {
166            retitle: true,
167            title: "Good title".into(),
168            done: false,
169            outcome_tag: String::new(),
170            outcome: String::new(),
171            reason: String::new(),
172        };
173        assert!(j.should_apply_title("#: 5"));
174        // Same title → no churn.
175        assert!(!j.should_apply_title("Good title"));
176        // Model says keep → never replace, even if different.
177        j.retitle = false;
178        assert!(!j.should_apply_title("#: 5"));
179        // Empty proposal → never replace.
180        j.retitle = true;
181        j.title = "   ".into();
182        assert!(!j.should_apply_title("#: 5"));
183    }
184
185    #[test]
186    fn prompt_includes_title_and_history() {
187        let p = build_prompt(
188            "#: 5",
189            &["[open] #: 5".into(), "[decision] use SQL pack".into()],
190        );
191        assert!(p.contains("Current title: #: 5"));
192        assert!(p.contains("[decision] use SQL pack"));
193        assert!(p.contains("\"done\""));
194    }
195
196    #[test]
197    fn judge_routes_through_backend() {
198        let backend = MockBackend(
199            r#"{"retitle":true,"title":"T","done":false,"outcome_tag":"","outcome":"","reason":"r"}"#
200                .into(),
201        );
202        let j = judge("old", &["[open] old".into()], &backend).unwrap();
203        assert_eq!(j.title, "T");
204        assert!(!j.done);
205    }
206}