Skip to main content

open_loops/
distill.rs

1//! Distillation: builds the prompt with evidence (git + sessions) and calls the
2//! LLM via a configurable command (default "claude -p"). Injectable command means
3//! tests use `cat` and users can swap LLMs without changing code.
4use crate::scanner::OpenLoop;
5use crate::sessions::SessionExcerpt;
6use anyhow::{bail, Context, Result};
7use std::io::Write;
8use std::process::{Command, Stdio};
9
10/// How well AI sessions align with git evidence for a branch.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Confidence {
13    /// Sessions overlap commit window and mention the branch name.
14    High,
15    /// Sessions matched heuristically but alignment is uncertain.
16    Medium,
17    /// No AI sessions — context comes from git only.
18    Low,
19}
20
21/// Derives a confidence level from matched session excerpts.
22pub fn compute_confidence(excerpts: &[SessionExcerpt]) -> Confidence {
23    if excerpts.is_empty() {
24        return Confidence::Low;
25    }
26    if excerpts.iter().any(|e| e.in_window && e.mentions_branch) {
27        Confidence::High
28    } else {
29        Confidence::Medium
30    }
31}
32
33fn confidence_label(c: Confidence) -> &'static str {
34    match c {
35        Confidence::High => "high",
36        Confidence::Medium => "medium",
37        Confidence::Low => "low",
38    }
39}
40
41fn confidence_explanation(c: Confidence) -> &'static str {
42    match c {
43        Confidence::High => "AI sessions align with branch commits",
44        Confidence::Medium => {
45            "AI sessions found but alignment uncertain — audit Sources before trusting"
46        }
47        Confidence::Low => "no AI sessions matched — context from git only",
48    }
49}
50
51fn format_confidence_line(c: Confidence) -> String {
52    format!(
53        "**Confidence:** {} — {}",
54        confidence_label(c),
55        confidence_explanation(c)
56    )
57}
58
59/// Builds the context-reconstruction prompt for an open loop.
60///
61/// Includes branch, commits, diffstat, and AI session excerpts.
62/// When there are no sessions, explicitly declares none found.
63pub fn build_prompt(
64    lp: &OpenLoop,
65    default_branch: &str,
66    commits: &str,
67    diffstat: &str,
68    excerpts: &[SessionExcerpt],
69) -> String {
70    let mut p = format!(
71        "You reconstruct the context of a paused work branch.\n\
72         Answer in markdown, in English, with exactly these sections:\n\n\
73         ## Why\n## Done\n## Remaining\n## Next step\n\n\
74         Be concrete and direct. Rely ONLY on the evidence below.\n\
75         If the evidence is insufficient for a section, write \"insufficient evidence\".\n\n\
76         # Branch\n{key} (base: {default_branch})\n\n\
77         # Commits (base..branch)\n{commits}\n\n\
78         # Diffstat\n{diffstat}\n",
79        key = lp.key(),
80    );
81    if excerpts.is_empty() {
82        p.push_str("\n# AI sessions\nnone found\n");
83    } else {
84        for e in excerpts {
85            p.push_str(&format!(
86                "\n# Session {} (modified {})\n{}\n",
87                e.source,
88                e.modified.format("%Y-%m-%d"),
89                e.text
90            ));
91        }
92    }
93    p
94}
95
96/// Runs the LLM command with the prompt on stdin and returns stdout.
97///
98/// The command is interpreted via `sh -c`, so it may contain pipes and
99/// redirections (e.g. `"claude -p | tee /tmp/output.md"`).
100///
101/// # Errors
102///
103/// Returns `Err` if the process cannot be started or exits with a non-zero
104/// status (e.g. LLM not installed, missing credential).
105pub fn run_llm(llm_command: &str, prompt: &str) -> Result<String> {
106    let mut child = Command::new("sh")
107        .arg("-c")
108        .arg(llm_command)
109        .stdin(Stdio::piped())
110        .stdout(Stdio::piped())
111        .stderr(Stdio::piped())
112        .spawn()
113        .with_context(|| {
114            format!(
115                "failed to run the LLM command `{llm_command}` — \
116                 is it installed? Adjust llm_command in config.toml"
117            )
118        })?;
119    child
120        .stdin
121        .take()
122        .ok_or_else(|| anyhow::anyhow!("stdin not available for the LLM process"))?
123        .write_all(prompt.as_bytes())
124        .or_else(|e| {
125            // broken pipe means the LLM exited before reading all of stdin — that's fine
126            if e.kind() == std::io::ErrorKind::BrokenPipe {
127                Ok(())
128            } else {
129                Err(e).context("failed to write the prompt to the LLM stdin")
130            }
131        })?;
132    let out = child
133        .wait_with_output()
134        .context("failed to wait for the LLM process")?;
135    if !out.status.success() {
136        bail!(
137            "LLM command failed (`{llm_command}`): {}",
138            String::from_utf8_lossy(&out.stderr).trim()
139        );
140    }
141    Ok(String::from_utf8_lossy(&out.stdout).into_owned())
142}
143
144/// Appends the `## Sources` section to the LLM-generated document.
145///
146/// Lets the user audit the evidence used in the reconstruction
147/// (mitigates hallucination risk — see spec §Risks).
148pub fn with_sources(
149    answer: &str,
150    lp: &OpenLoop,
151    excerpts: &[SessionExcerpt],
152    confidence: Confidence,
153) -> String {
154    let short_sha = &lp.head_sha[..7.min(lp.head_sha.len())];
155    let mut doc = format!(
156        "# {}\n\n{}\n\n{}\n\n## Sources\n\n- git: branch {} (HEAD {})\n",
157        lp.key(),
158        format_confidence_line(confidence),
159        answer.trim(),
160        lp.branch,
161        short_sha
162    );
163    for e in excerpts {
164        doc.push_str(&format!(
165            "- AI session: {} (modified {})\n",
166            e.source,
167            e.modified.format("%Y-%m-%d")
168        ));
169    }
170    doc
171}
172
173fn session_match_tags(e: &SessionExcerpt) -> String {
174    let mut tags = Vec::new();
175    if e.in_window {
176        tags.push("in commit window");
177    }
178    if e.mentions_branch {
179        tags.push("mentions branch");
180    }
181    if tags.is_empty() {
182        "matched by heuristic".into()
183    } else {
184        tags.join(", ")
185    }
186}
187
188/// Shows git and session evidence that would feed distillation, without calling the LLM.
189pub fn format_dry_run(
190    lp: &OpenLoop,
191    default_branch: &str,
192    commits: &str,
193    diffstat: &str,
194    excerpts: &[SessionExcerpt],
195    confidence: Confidence,
196) -> String {
197    let short_sha = &lp.head_sha[..7.min(lp.head_sha.len())];
198    let mut out = format!(
199        "# {}\n\n{}\n\n\
200         ## Git\n\n\
201         - branch: {} (HEAD {})\n\
202         - base: {}\n\
203         - ahead: {}, behind: {}\n\n\
204         ### Commits (base..branch)\n{}\n\n\
205         ### Diffstat\n{}\n\n\
206         ## AI sessions\n",
207        lp.key(),
208        format_confidence_line(confidence),
209        lp.branch,
210        short_sha,
211        default_branch,
212        lp.ahead,
213        lp.behind,
214        commits.trim_end(),
215        diffstat.trim_end(),
216    );
217    if excerpts.is_empty() {
218        out.push_str("none matched\n");
219    } else {
220        for e in excerpts {
221            out.push_str(&format!(
222                "- {} (modified {}) [{}]\n",
223                e.source,
224                e.modified.format("%Y-%m-%d"),
225                session_match_tags(e),
226            ));
227        }
228    }
229    out.push_str("\n---\nDry run — LLM not invoked. Run without `--dry-run` to distill.\n");
230    out
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236    use crate::scanner::OpenLoop;
237    use crate::sessions::SessionExcerpt;
238    use chrono::Utc;
239    use std::path::PathBuf;
240
241    fn fake_loop() -> OpenLoop {
242        OpenLoop {
243            root_label: "app".into(),
244            repo_name: "app".into(),
245            repo_path: PathBuf::from("/tmp/app"),
246            branch: "feat/login".into(),
247            head_sha: "abcdef1234567890".into(),
248            last_commit: Utc::now(),
249            ahead: 2,
250            behind: 1,
251        }
252    }
253
254    fn fake_excerpt() -> SessionExcerpt {
255        SessionExcerpt {
256            source: "sessao1.jsonl".into(),
257            modified: Utc::now(),
258            text: "[user] implementa login".into(),
259            in_window: true,
260            mentions_branch: true,
261        }
262    }
263
264    #[test]
265    fn compute_confidence_levels() {
266        assert_eq!(compute_confidence(&[]), Confidence::Low);
267        let medium = SessionExcerpt {
268            in_window: true,
269            mentions_branch: false,
270            ..fake_excerpt()
271        };
272        assert_eq!(compute_confidence(&[medium]), Confidence::Medium);
273        assert_eq!(compute_confidence(&[fake_excerpt()]), Confidence::High);
274    }
275
276    #[test]
277    fn build_prompt_includes_evidence_and_sections() {
278        let p = build_prompt(
279            &fake_loop(),
280            "main",
281            "abc feat: wip",
282            "x.txt | 2 +",
283            &[fake_excerpt()],
284        );
285        assert!(p.contains("## Why"));
286        assert!(p.contains("## Next step"));
287        assert!(p.contains("app/feat/login"));
288        assert!(p.contains("abc feat: wip"));
289        assert!(p.contains("[user] implementa login"));
290    }
291
292    #[test]
293    fn build_prompt_without_sessions_declares_absence() {
294        let p = build_prompt(&fake_loop(), "main", "", "", &[]);
295        assert!(p.contains("none found"));
296    }
297
298    #[test]
299    fn run_llm_passes_prompt_via_stdin() {
300        // `cat` echoes stdin: validates the contract without a real LLM
301        let out = run_llm("cat", "test prompt").unwrap();
302        assert_eq!(out.trim(), "test prompt");
303    }
304
305    #[test]
306    fn run_llm_contextual_error_when_command_fails() {
307        let err = run_llm("false", "x").unwrap_err();
308        assert!(err.to_string().contains("LLM command failed"));
309    }
310
311    #[test]
312    fn with_sources_appends_git_and_sessions() {
313        let doc = with_sources(
314            "## Why\nlogin",
315            &fake_loop(),
316            &[fake_excerpt()],
317            Confidence::High,
318        );
319        assert!(doc.contains("## Sources"));
320        assert!(doc.contains("**Confidence:** high"));
321        assert!(doc.contains("abcdef1")); // short sha
322        assert!(doc.contains("sessao1.jsonl"));
323    }
324
325    #[test]
326    fn format_dry_run_lists_evidence_without_llm_sections() {
327        let doc = format_dry_run(
328            &fake_loop(),
329            "main",
330            "abc feat: wip",
331            "x.txt | 2 +",
332            &[fake_excerpt()],
333            Confidence::High,
334        );
335        assert!(doc.contains("**Confidence:** high"));
336        assert!(doc.contains("abc feat: wip"));
337        assert!(doc.contains("sessao1.jsonl"));
338        assert!(doc.contains("in commit window, mentions branch"));
339        assert!(doc.contains("Dry run — LLM not invoked"));
340        assert!(!doc.contains("## Why"));
341    }
342
343    #[test]
344    fn with_sources_short_sha_when_head_sha_under_7_chars() {
345        let lp = OpenLoop {
346            root_label: "app".into(),
347            repo_name: "app".into(),
348            repo_path: PathBuf::from("/tmp/app"),
349            branch: "feat/x".into(),
350            head_sha: "ab1".into(), // 3 chars < 7
351            last_commit: Utc::now(),
352            ahead: 0,
353            behind: 0,
354        };
355        let doc = with_sources("## Why\nconteudo", &lp, &[], Confidence::Low);
356        assert!(doc.contains("ab1"));
357        assert!(!doc.contains("ab1\0")); // no extra bytes
358    }
359}