Skip to main content

open_loops/
distill.rs

1//! Distillation: builds the prompt with evidence (git + sessions) and calls the
2//! LLM via a configurable command (default "claude -p"). Injectable command means
3//! tests use `cat` and users can swap LLMs without changing code.
4use crate::output;
5use crate::scanner::OpenLoop;
6use crate::sessions::SessionExcerpt;
7use anyhow::{bail, Context, Result};
8use std::io::Write;
9use std::process::{Command, Stdio};
10
11/// How well AI sessions align with git evidence for a branch.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum Confidence {
14    /// Sessions overlap commit window and mention the branch name.
15    High,
16    /// Sessions matched heuristically but alignment is uncertain.
17    Medium,
18    /// No AI sessions — context comes from git only.
19    Low,
20}
21
22/// Derives a confidence level from matched session excerpts.
23pub fn compute_confidence(excerpts: &[SessionExcerpt]) -> Confidence {
24    if excerpts.is_empty() {
25        return Confidence::Low;
26    }
27    if excerpts.iter().any(|e| e.in_window && e.mentions_branch) {
28        Confidence::High
29    } else {
30        Confidence::Medium
31    }
32}
33
34fn confidence_label(c: Confidence) -> &'static str {
35    match c {
36        Confidence::High => "high",
37        Confidence::Medium => "medium",
38        Confidence::Low => "low",
39    }
40}
41
42fn confidence_explanation(c: Confidence) -> &'static str {
43    match c {
44        Confidence::High => "AI sessions align with branch commits",
45        Confidence::Medium => {
46            "AI sessions found but alignment uncertain — audit Sources before trusting"
47        }
48        Confidence::Low => "no AI sessions matched — context from git only",
49    }
50}
51
52fn format_confidence_line(c: Confidence) -> String {
53    format!(
54        "**Confidence:** {} — {}",
55        confidence_label(c),
56        confidence_explanation(c)
57    )
58}
59
60/// Builds the context-reconstruction prompt for an open loop.
61///
62/// Includes branch, commits, diffstat, and AI session excerpts.
63/// When there are no sessions, explicitly declares none found.
64pub fn build_prompt(
65    lp: &OpenLoop,
66    default_branch: &str,
67    commits: &str,
68    diffstat: &str,
69    excerpts: &[SessionExcerpt],
70) -> String {
71    let mut p = format!(
72        "You reconstruct the context of a paused work branch.\n\
73         Answer in markdown, in English, with exactly these sections:\n\n\
74         ## Why\n## Done\n## Remaining\n## Next step\n\n\
75         Be concrete and direct. Rely ONLY on the evidence below.\n\
76         If the evidence is insufficient for a section, write \"insufficient evidence\".\n\n\
77         # Branch\n{key} (base: {default_branch})\n\n\
78         # Commits (base..branch)\n{commits}\n\n\
79         # Diffstat\n{diffstat}\n",
80        key = lp.key(),
81    );
82    if excerpts.is_empty() {
83        p.push_str("\n# AI sessions\nnone found\n");
84    } else {
85        for e in excerpts {
86            p.push_str(&format!(
87                "\n# Session {} (modified {})\n{}\n",
88                e.source,
89                e.modified.format("%Y-%m-%d"),
90                e.text
91            ));
92        }
93    }
94    p
95}
96
97/// Runs the LLM command with the prompt on stdin and returns stdout.
98///
99/// The command is interpreted via `sh -c`, so it may contain pipes and
100/// redirections (e.g. `"claude -p | tee /tmp/output.md"`).
101///
102/// # Errors
103///
104/// Returns `Err` if the process cannot be started or exits with a non-zero
105/// status (e.g. LLM not installed, missing credential).
106pub fn run_llm(llm_command: &str, prompt: &str) -> Result<String> {
107    let mut child = Command::new("sh")
108        .arg("-c")
109        .arg(llm_command)
110        .stdin(Stdio::piped())
111        .stdout(Stdio::piped())
112        .stderr(Stdio::piped())
113        .spawn()
114        .with_context(|| {
115            format!(
116                "failed to run the LLM command `{llm_command}` — \
117                 is it installed? Adjust llm_command in config.toml"
118            )
119        })?;
120    child
121        .stdin
122        .take()
123        .ok_or_else(|| anyhow::anyhow!("stdin not available for the LLM process"))?
124        .write_all(prompt.as_bytes())
125        .or_else(|e| {
126            // broken pipe means the LLM exited before reading all of stdin — that's fine
127            if e.kind() == std::io::ErrorKind::BrokenPipe {
128                Ok(())
129            } else {
130                Err(e).context("failed to write the prompt to the LLM stdin")
131            }
132        })?;
133    let out = child
134        .wait_with_output()
135        .context("failed to wait for the LLM process")?;
136    if !out.status.success() {
137        bail!(
138            "LLM command failed (`{llm_command}`): {}",
139            String::from_utf8_lossy(&out.stderr).trim()
140        );
141    }
142    Ok(String::from_utf8_lossy(&out.stdout).into_owned())
143}
144
145/// Appends the `## Sources` section to the LLM-generated document.
146///
147/// Lets the user audit the evidence used in the reconstruction
148/// (mitigates hallucination risk — see spec §Risks).
149pub fn with_sources(
150    answer: &str,
151    lp: &OpenLoop,
152    excerpts: &[SessionExcerpt],
153    confidence: Confidence,
154) -> String {
155    let short_sha = &lp.head_sha[..7.min(lp.head_sha.len())];
156    let mut doc = format!(
157        "# {}\n\n{}\n\n{}\n\n## Sources\n\n- git: branch {} (HEAD {})\n",
158        lp.key(),
159        format_confidence_line(confidence),
160        answer.trim(),
161        lp.branch,
162        short_sha
163    );
164    for e in excerpts {
165        doc.push_str(&format!(
166            "- AI session: {} (modified {})\n",
167            e.source,
168            e.modified.format("%Y-%m-%d")
169        ));
170    }
171    doc
172}
173
174fn session_match_tags(e: &SessionExcerpt) -> String {
175    let mut tags = Vec::new();
176    if e.in_window {
177        tags.push("in commit window");
178    }
179    if e.mentions_branch {
180        tags.push("mentions branch");
181    }
182    if tags.is_empty() {
183        "matched by heuristic".into()
184    } else {
185        tags.join(", ")
186    }
187}
188
189fn format_ab(ahead: Option<u32>, behind: Option<u32>) -> String {
190    format!(
191        "{}, behind: {}",
192        output::fmt_count(ahead),
193        output::fmt_count(behind)
194    )
195}
196
197/// Shows git and session evidence that would feed distillation, without calling the LLM.
198pub fn format_dry_run(
199    lp: &OpenLoop,
200    default_branch: &str,
201    commits: &str,
202    diffstat: &str,
203    excerpts: &[SessionExcerpt],
204    confidence: Confidence,
205) -> String {
206    let short_sha = &lp.head_sha[..7.min(lp.head_sha.len())];
207    let mut out = format!(
208        "# {}\n\n{}\n\n\
209         ## Git\n\n\
210         - branch: {} (HEAD {})\n\
211         - base: {}\n\
212         - ahead: {}\n\n\
213         ### Commits (base..branch)\n{}\n\n\
214         ### Diffstat\n{}\n\n\
215         ## AI sessions\n",
216        lp.key(),
217        format_confidence_line(confidence),
218        lp.branch,
219        short_sha,
220        default_branch,
221        format_ab(lp.ahead, lp.behind),
222        commits.trim_end(),
223        diffstat.trim_end(),
224    );
225    if excerpts.is_empty() {
226        out.push_str("none matched\n");
227    } else {
228        for e in excerpts {
229            out.push_str(&format!(
230                "- {} (modified {}) [{}]\n",
231                e.source,
232                e.modified.format("%Y-%m-%d"),
233                session_match_tags(e),
234            ));
235        }
236    }
237    out.push_str("\n---\nDry run — LLM not invoked. Run without `--dry-run` to distill.\n");
238    out
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244    use crate::scanner::OpenLoop;
245    use crate::sessions::SessionExcerpt;
246    use chrono::Utc;
247    use std::path::PathBuf;
248
249    fn fake_loop() -> OpenLoop {
250        OpenLoop {
251            root_label: "app".into(),
252            repo_name: "app".into(),
253            repo_path: PathBuf::from("/tmp/app"),
254            branch: "feat/login".into(),
255            head_sha: "abcdef1234567890".into(),
256            last_commit: Utc::now(),
257            ahead: Some(2),
258            behind: Some(1),
259        }
260    }
261
262    fn fake_excerpt() -> SessionExcerpt {
263        SessionExcerpt {
264            source: "sessao1.jsonl".into(),
265            modified: Utc::now(),
266            text: "[user] implementa login".into(),
267            in_window: true,
268            mentions_branch: true,
269        }
270    }
271
272    #[test]
273    fn compute_confidence_levels() {
274        assert_eq!(compute_confidence(&[]), Confidence::Low);
275        let medium = SessionExcerpt {
276            in_window: true,
277            mentions_branch: false,
278            ..fake_excerpt()
279        };
280        assert_eq!(compute_confidence(&[medium]), Confidence::Medium);
281        assert_eq!(compute_confidence(&[fake_excerpt()]), Confidence::High);
282    }
283
284    #[test]
285    fn build_prompt_includes_evidence_and_sections() {
286        let p = build_prompt(
287            &fake_loop(),
288            "main",
289            "abc feat: wip",
290            "x.txt | 2 +",
291            &[fake_excerpt()],
292        );
293        assert!(p.contains("## Why"));
294        assert!(p.contains("## Next step"));
295        assert!(p.contains("app/feat/login"));
296        assert!(p.contains("abc feat: wip"));
297        assert!(p.contains("[user] implementa login"));
298    }
299
300    #[test]
301    fn build_prompt_without_sessions_declares_absence() {
302        let p = build_prompt(&fake_loop(), "main", "", "", &[]);
303        assert!(p.contains("none found"));
304    }
305
306    #[test]
307    fn run_llm_passes_prompt_via_stdin() {
308        // `cat` echoes stdin: validates the contract without a real LLM
309        let out = run_llm("cat", "test prompt").unwrap();
310        assert_eq!(out.trim(), "test prompt");
311    }
312
313    #[test]
314    fn run_llm_contextual_error_when_command_fails() {
315        let err = run_llm("false", "x").unwrap_err();
316        assert!(err.to_string().contains("LLM command failed"));
317    }
318
319    #[test]
320    fn with_sources_appends_git_and_sessions() {
321        let doc = with_sources(
322            "## Why\nlogin",
323            &fake_loop(),
324            &[fake_excerpt()],
325            Confidence::High,
326        );
327        assert!(doc.contains("## Sources"));
328        assert!(doc.contains("**Confidence:** high"));
329        assert!(doc.contains("abcdef1")); // short sha
330        assert!(doc.contains("sessao1.jsonl"));
331    }
332
333    #[test]
334    fn format_dry_run_lists_evidence_without_llm_sections() {
335        let doc = format_dry_run(
336            &fake_loop(),
337            "main",
338            "abc feat: wip",
339            "x.txt | 2 +",
340            &[fake_excerpt()],
341            Confidence::High,
342        );
343        assert!(doc.contains("**Confidence:** high"));
344        assert!(doc.contains("abc feat: wip"));
345        assert!(doc.contains("sessao1.jsonl"));
346        assert!(doc.contains("in commit window, mentions branch"));
347        assert!(doc.contains("Dry run — LLM not invoked"));
348        assert!(!doc.contains("## Why"));
349    }
350
351    #[test]
352    fn with_sources_short_sha_when_head_sha_under_7_chars() {
353        let lp = OpenLoop {
354            root_label: "app".into(),
355            repo_name: "app".into(),
356            repo_path: PathBuf::from("/tmp/app"),
357            branch: "feat/x".into(),
358            head_sha: "ab1".into(), // 3 chars < 7
359            last_commit: Utc::now(),
360            ahead: Some(0),
361            behind: Some(0),
362        };
363        let doc = with_sources("## Why\nconteudo", &lp, &[], Confidence::Low);
364        assert!(doc.contains("ab1"));
365        assert!(!doc.contains("ab1\0")); // no extra bytes
366    }
367}