tj_core/
finalize.rs

1//! Finalize — bring a legacy task to a finished shape.
2//!
3//! One LLM call reads a task's full event history and returns a judgment:
4//! a human-readable title, a one-sentence outcome, and whether the events
5//! clearly show the task was finished (so `complete` may close it). The
6//! model decides — same word in different contexts misleads heuristics, and
7//! a title like "пТак обясни…" is natural language yet a useless title, so
8//! only a reader of the whole history can call it.
9
10use crate::llm::LlmBackend;
11use anyhow::Context;
12use serde::Deserialize;
13
14/// The model's verdict on a task, distilled from its events.
15#[derive(Debug, Clone, PartialEq, Deserialize)]
16pub struct FinalizeJudgment {
17    /// True when the current title is a poor description of the task and
18    /// should be replaced by `title`. False echoes a good human title back.
19    #[serde(default)]
20    pub retitle: bool,
21    /// A short human-readable title (≈5–10 words).
22    #[serde(default)]
23    pub title: String,
24    /// True only when the events clearly show the task was finished. When
25    /// unclear, the model leaves it false and `complete` keeps it open.
26    #[serde(default)]
27    pub done: bool,
28    /// `done` | `abandoned` | `superseded` — only used when `done` is true.
29    #[serde(default)]
30    pub outcome_tag: String,
31    /// One sentence: what actually happened / where the task ended.
32    #[serde(default)]
33    pub outcome: String,
34    /// Short rationale for done / still-open — shown to the user.
35    #[serde(default)]
36    pub reason: String,
37}
38
39impl FinalizeJudgment {
40    /// Apply the proposed title only when the model flagged the current one
41    /// as poor AND offered a non-empty, genuinely different replacement.
42    pub fn should_apply_title(&self, current_title: &str) -> bool {
43        self.retitle && !self.title.trim().is_empty() && self.title.trim() != current_title.trim()
44    }
45
46    /// Map the model's tag to the validated close enum; falls back to `done`
47    /// for an empty/unknown tag so the close path never rejects it.
48    pub fn normalized_tag(&self) -> &str {
49        match self.outcome_tag.trim() {
50            "abandoned" => "abandoned",
51            "superseded" => "superseded",
52            _ => "done",
53        }
54    }
55}
56
57/// Build the judge prompt from a task's current title and its event lines
58/// (each line pre-formatted as `[type] text` by the caller).
59pub fn build_prompt(current_title: &str, event_lines: &[String]) -> String {
60    let history = event_lines.join("\n");
61    format!(
62        "You are finalizing a software task's journal. Read its full history \
63and reply with ONE JSON object, nothing else.\n\n\
64Current title: {current_title}\n\n\
65Event history (oldest first):\n{history}\n\n\
66Return exactly this JSON shape:\n\
67{{\n\
68  \"retitle\": <true if the current title is a poor description of the task \
69(a log line, a chat echo, a URL, a file path, a question fragment) and should \
70be replaced; false if it already names the task well>,\n\
71  \"title\": \"<a short, human-readable task title, 5-10 words, in the language \
72of the history; echo the current title if retitle is false>\",\n\
73  \"done\": <true ONLY if the events clearly show the task was finished \
74(fix shipped, question answered, decision carried out); false if it is \
75unclear or still in progress>,\n\
76  \"outcome_tag\": \"<done | abandoned | superseded>\",\n\
77  \"outcome\": \"<one sentence: what actually happened or where it ended>\",\n\
78  \"reason\": \"<short: why you judged it done or still open>\"\n\
79}}\n\
80Be conservative about \"done\": if the history does not clearly show the task \
81was completed, set done=false."
82    )
83}
84
85/// Parse the model reply (a JSON object, possibly inside a ```json fence).
86pub fn parse_judgment(text: &str) -> anyhow::Result<FinalizeJudgment> {
87    let json_str = text
88        .trim()
89        .trim_start_matches("```json")
90        .trim_start_matches("```")
91        .trim_end_matches("```")
92        .trim();
93    // Tolerate leading/trailing prose by slicing to the outermost braces.
94    let slice = match (json_str.find('{'), json_str.rfind('}')) {
95        (Some(a), Some(b)) if b > a => &json_str[a..=b],
96        _ => json_str,
97    };
98    serde_json::from_str(slice)
99        .with_context(|| format!("finalize JSON parse failed; got: {json_str}"))
100}
101
102/// One judge call: prompt → model → parsed judgment, with the token usage the
103/// backend reported for the call.
104pub fn judge(
105    current_title: &str,
106    event_lines: &[String],
107    backend: &dyn LlmBackend,
108) -> anyhow::Result<(FinalizeJudgment, crate::llm::LlmUsage)> {
109    let prompt = build_prompt(current_title, event_lines);
110    let (reply, usage) = backend.complete_usage(&prompt, 512)?;
111    Ok((parse_judgment(&reply)?, usage))
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117
118    struct MockBackend(String);
119    impl LlmBackend for MockBackend {
120        fn complete(&self, _prompt: &str, _max_tokens: u32) -> anyhow::Result<String> {
121            Ok(self.0.clone())
122        }
123        fn name(&self) -> &'static str {
124            "mock"
125        }
126    }
127
128    #[test]
129    fn parses_plain_json() {
130        let j = parse_judgment(
131            r#"{"retitle":true,"title":"Fix voucher refund","done":true,
132                "outcome_tag":"done","outcome":"Refunded the missing 50%.","reason":"Fix shipped."}"#,
133        )
134        .unwrap();
135        assert!(j.retitle);
136        assert_eq!(j.title, "Fix voucher refund");
137        assert!(j.done);
138        assert_eq!(j.normalized_tag(), "done");
139    }
140
141    #[test]
142    fn parses_fenced_json_with_prose() {
143        let reply = "Here is the result:\n```json\n{\"retitle\":false,\"title\":\"Keep me\",\
144\"done\":false,\"outcome_tag\":\"\",\"outcome\":\"\",\"reason\":\"still investigating\"}\n```\n";
145        let j = parse_judgment(reply).unwrap();
146        assert!(!j.retitle);
147        assert!(!j.done);
148        assert_eq!(j.reason, "still investigating");
149    }
150
151    #[test]
152    fn unknown_tag_falls_back_to_done() {
153        let j = FinalizeJudgment {
154            retitle: false,
155            title: String::new(),
156            done: true,
157            outcome_tag: "weird".into(),
158            outcome: String::new(),
159            reason: String::new(),
160        };
161        assert_eq!(j.normalized_tag(), "done");
162    }
163
164    #[test]
165    fn should_apply_title_only_when_flagged_and_different() {
166        let mut j = FinalizeJudgment {
167            retitle: true,
168            title: "Good title".into(),
169            done: false,
170            outcome_tag: String::new(),
171            outcome: String::new(),
172            reason: String::new(),
173        };
174        assert!(j.should_apply_title("#: 5"));
175        // Same title → no churn.
176        assert!(!j.should_apply_title("Good title"));
177        // Model says keep → never replace, even if different.
178        j.retitle = false;
179        assert!(!j.should_apply_title("#: 5"));
180        // Empty proposal → never replace.
181        j.retitle = true;
182        j.title = "   ".into();
183        assert!(!j.should_apply_title("#: 5"));
184    }
185
186    #[test]
187    fn prompt_includes_title_and_history() {
188        let p = build_prompt(
189            "#: 5",
190            &["[open] #: 5".into(), "[decision] use SQL pack".into()],
191        );
192        assert!(p.contains("Current title: #: 5"));
193        assert!(p.contains("[decision] use SQL pack"));
194        assert!(p.contains("\"done\""));
195    }
196
197    #[test]
198    fn judge_routes_through_backend() {
199        let backend = MockBackend(
200            r#"{"retitle":true,"title":"T","done":false,"outcome_tag":"","outcome":"","reason":"r"}"#
201                .into(),
202        );
203        let (j, _usage) = judge("old", &["[open] old".into()], &backend).unwrap();
204        assert_eq!(j.title, "T");
205        assert!(!j.done);
206    }
207}
tj_core/finalize.rs

tj_core/
finalize.rs