Skip to main content

shiplog_cluster_llm/
lib.rs

1//! LLM-assisted workstream clustering adapters.
2//!
3//! Provides [`LlmClusterer`] plus pluggable backends for OpenAI-compatible
4//! APIs, and [`LlmWithFallback`] for repo-based fallback on LLM failure.
5
6mod client;
7mod config;
8pub mod parse;
9pub mod prompt;
10
11pub use client::{FailingLlmBackend, LlmBackend, MockLlmBackend, OpenAiCompatibleBackend};
12pub use config::LlmConfig;
13pub use parse::parse_llm_response;
14pub use prompt::{chunk_events, format_event_list, summarize_event, system_prompt};
15
16use anyhow::Result;
17use shiplog_ports::WorkstreamClusterer;
18use shiplog_schema::event::EventEnvelope;
19use shiplog_schema::workstream::WorkstreamsFile;
20use shiplog_workstreams::RepoClusterer;
21
22/// LLM-assisted workstream clusterer.
23pub struct LlmClusterer {
24    backend: Box<dyn LlmBackend>,
25    config: LlmConfig,
26}
27
28impl LlmClusterer {
29    pub fn new(backend: Box<dyn LlmBackend>, config: LlmConfig) -> Self {
30        Self { backend, config }
31    }
32
33    fn cluster_chunk(&self, events: &[EventEnvelope], indices: &[usize]) -> Result<String> {
34        let subset: Vec<&EventEnvelope> = indices.iter().map(|&i| &events[i]).collect();
35        let event_list: String = subset
36            .iter()
37            .enumerate()
38            .map(|(i, ev)| format!("[{}] {}", i, summarize_event(ev)))
39            .collect::<Vec<_>>()
40            .join("\n");
41
42        let system = system_prompt(self.config.max_workstreams);
43        let user_msg =
44            format!("Cluster these development events into workstreams:\n\n{event_list}");
45
46        self.backend.complete(&system, &user_msg)
47    }
48}
49
50impl WorkstreamClusterer for LlmClusterer {
51    fn cluster(&self, events: &[EventEnvelope]) -> Result<WorkstreamsFile> {
52        let chunks = chunk_events(events, self.config.max_input_tokens);
53
54        if chunks.len() <= 1 {
55            // Single pass
56            let event_list = format_event_list(events);
57            let system = system_prompt(self.config.max_workstreams);
58            let user_msg =
59                format!("Cluster these development events into workstreams:\n\n{event_list}");
60            let response = self.backend.complete(&system, &user_msg)?;
61            parse_llm_response(&response, events)
62        } else {
63            // Multi-chunk: cluster each chunk, merge results
64            let mut all_workstreams = Vec::new();
65
66            for chunk_indices in &chunks {
67                let response = self.cluster_chunk(events, chunk_indices)?;
68                // Map local indices back to global
69                let chunk_events: Vec<EventEnvelope> =
70                    chunk_indices.iter().map(|&i| events[i].clone()).collect();
71                let mut ws_file = parse_llm_response(&response, &chunk_events)?;
72                all_workstreams.append(&mut ws_file.workstreams);
73            }
74
75            Ok(WorkstreamsFile {
76                version: 1,
77                generated_at: chrono::Utc::now(),
78                workstreams: all_workstreams,
79            })
80        }
81    }
82}
83
84/// Wrapper that falls back to repo-based clustering on LLM failure.
85pub struct LlmWithFallback {
86    llm: LlmClusterer,
87    fallback: RepoClusterer,
88}
89
90impl LlmWithFallback {
91    pub fn new(llm: LlmClusterer) -> Self {
92        Self {
93            llm,
94            fallback: RepoClusterer,
95        }
96    }
97}
98
99impl WorkstreamClusterer for LlmWithFallback {
100    fn cluster(&self, events: &[EventEnvelope]) -> Result<WorkstreamsFile> {
101        match self.llm.cluster(events) {
102            Ok(ws) => Ok(ws),
103            Err(e) => {
104                eprintln!("WARN: LLM clustering failed, falling back to repo-based: {e}");
105                self.fallback.cluster(events)
106            }
107        }
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114    use chrono::Utc;
115    use shiplog_ids::EventId;
116    use shiplog_schema::event::*;
117
118    fn make_test_event(repo: &str, pr_num: u64, title: &str) -> EventEnvelope {
119        EventEnvelope {
120            id: EventId::from_parts(["test", &pr_num.to_string()]),
121            kind: EventKind::PullRequest,
122            occurred_at: Utc::now(),
123            actor: Actor {
124                login: "user".into(),
125                id: None,
126            },
127            repo: RepoRef {
128                full_name: repo.into(),
129                html_url: None,
130                visibility: RepoVisibility::Unknown,
131            },
132            payload: EventPayload::PullRequest(PullRequestEvent {
133                number: pr_num,
134                title: title.into(),
135                state: PullRequestState::Merged,
136                created_at: Utc::now(),
137                merged_at: Some(Utc::now()),
138                additions: Some(10),
139                deletions: Some(5),
140                changed_files: Some(3),
141                touched_paths_hint: vec![],
142                window: None,
143            }),
144            tags: vec![],
145            links: vec![],
146            source: SourceRef {
147                system: SourceSystem::Github,
148                url: None,
149                opaque_id: None,
150            },
151        }
152    }
153
154    #[test]
155    fn mock_llm_produces_valid_workstreams() {
156        let mock_response = serde_json::json!({
157            "workstreams": [{
158                "title": "Auth improvements",
159                "summary": "Authentication work",
160                "tags": ["auth", "backend"],
161                "event_indices": [0, 1],
162                "receipt_indices": [0]
163            }]
164        });
165
166        let backend = MockLlmBackend {
167            response: mock_response.to_string(),
168        };
169        let config = LlmConfig::default();
170        let clusterer = LlmClusterer::new(Box::new(backend), config);
171
172        let events = vec![
173            make_test_event("org/auth", 1, "Add OAuth"),
174            make_test_event("org/auth", 2, "Fix token refresh"),
175        ];
176
177        let ws = clusterer.cluster(&events).unwrap();
178        assert_eq!(ws.workstreams.len(), 1);
179        assert_eq!(ws.workstreams[0].title, "Auth improvements");
180        assert_eq!(ws.workstreams[0].events.len(), 2);
181    }
182
183    #[test]
184    fn fallback_on_llm_failure() {
185        let backend = FailingLlmBackend;
186        let config = LlmConfig::default();
187        let llm = LlmClusterer::new(Box::new(backend), config);
188        let clusterer = LlmWithFallback::new(llm);
189
190        let events = vec![
191            make_test_event("org/repo-a", 1, "Feature A"),
192            make_test_event("org/repo-b", 2, "Feature B"),
193        ];
194
195        let ws = clusterer.cluster(&events).unwrap();
196        // Should fall back to repo-based clustering
197        assert_eq!(ws.workstreams.len(), 2);
198    }
199
200    #[test]
201    fn orphan_events_collected() {
202        // LLM only assigns event 0, event 1 becomes orphan
203        let mock_response = serde_json::json!({
204            "workstreams": [{
205                "title": "Only first",
206                "summary": "test",
207                "tags": [],
208                "event_indices": [0],
209                "receipt_indices": [0]
210            }]
211        });
212
213        let backend = MockLlmBackend {
214            response: mock_response.to_string(),
215        };
216        let config = LlmConfig::default();
217        let clusterer = LlmClusterer::new(Box::new(backend), config);
218
219        let events = vec![
220            make_test_event("org/repo", 1, "Assigned"),
221            make_test_event("org/repo", 2, "Orphan"),
222        ];
223
224        let ws = clusterer.cluster(&events).unwrap();
225        assert_eq!(ws.workstreams.len(), 2);
226        assert_eq!(ws.workstreams[1].title, "Uncategorized");
227    }
228
229    #[test]
230    fn out_of_bounds_indices_skipped() {
231        let mock_response = serde_json::json!({
232            "workstreams": [{
233                "title": "Test",
234                "summary": "test",
235                "tags": [],
236                "event_indices": [0, 99],
237                "receipt_indices": [0]
238            }]
239        });
240
241        let backend = MockLlmBackend {
242            response: mock_response.to_string(),
243        };
244        let config = LlmConfig::default();
245        let clusterer = LlmClusterer::new(Box::new(backend), config);
246
247        let events = vec![make_test_event("org/repo", 1, "Only event")];
248
249        let ws = clusterer.cluster(&events).unwrap();
250        assert_eq!(ws.workstreams.len(), 1);
251        assert_eq!(ws.workstreams[0].events.len(), 1);
252    }
253
254    #[test]
255    fn malformed_json_returns_error() {
256        let backend = MockLlmBackend {
257            response: "not json at all".to_string(),
258        };
259        let config = LlmConfig::default();
260        let clusterer = LlmClusterer::new(Box::new(backend), config);
261
262        let events = vec![make_test_event("org/repo", 1, "Test")];
263        assert!(clusterer.cluster(&events).is_err());
264    }
265}