1mod client;
7mod config;
8pub mod parse;
9pub mod prompt;
10
11pub use client::{FailingLlmBackend, LlmBackend, MockLlmBackend, OpenAiCompatibleBackend};
12pub use config::LlmConfig;
13pub use parse::parse_llm_response;
14pub use prompt::{chunk_events, format_event_list, summarize_event, system_prompt};
15
16use anyhow::Result;
17use shiplog_ports::WorkstreamClusterer;
18use shiplog_schema::event::EventEnvelope;
19use shiplog_schema::workstream::WorkstreamsFile;
20use shiplog_workstreams::RepoClusterer;
21
22pub struct LlmClusterer {
24 backend: Box<dyn LlmBackend>,
25 config: LlmConfig,
26}
27
28impl LlmClusterer {
29 pub fn new(backend: Box<dyn LlmBackend>, config: LlmConfig) -> Self {
30 Self { backend, config }
31 }
32
33 fn cluster_chunk(&self, events: &[EventEnvelope], indices: &[usize]) -> Result<String> {
34 let subset: Vec<&EventEnvelope> = indices.iter().map(|&i| &events[i]).collect();
35 let event_list: String = subset
36 .iter()
37 .enumerate()
38 .map(|(i, ev)| format!("[{}] {}", i, summarize_event(ev)))
39 .collect::<Vec<_>>()
40 .join("\n");
41
42 let system = system_prompt(self.config.max_workstreams);
43 let user_msg =
44 format!("Cluster these development events into workstreams:\n\n{event_list}");
45
46 self.backend.complete(&system, &user_msg)
47 }
48}
49
50impl WorkstreamClusterer for LlmClusterer {
51 fn cluster(&self, events: &[EventEnvelope]) -> Result<WorkstreamsFile> {
52 let chunks = chunk_events(events, self.config.max_input_tokens);
53
54 if chunks.len() <= 1 {
55 let event_list = format_event_list(events);
57 let system = system_prompt(self.config.max_workstreams);
58 let user_msg =
59 format!("Cluster these development events into workstreams:\n\n{event_list}");
60 let response = self.backend.complete(&system, &user_msg)?;
61 parse_llm_response(&response, events)
62 } else {
63 let mut all_workstreams = Vec::new();
65
66 for chunk_indices in &chunks {
67 let response = self.cluster_chunk(events, chunk_indices)?;
68 let chunk_events: Vec<EventEnvelope> =
70 chunk_indices.iter().map(|&i| events[i].clone()).collect();
71 let mut ws_file = parse_llm_response(&response, &chunk_events)?;
72 all_workstreams.append(&mut ws_file.workstreams);
73 }
74
75 Ok(WorkstreamsFile {
76 version: 1,
77 generated_at: chrono::Utc::now(),
78 workstreams: all_workstreams,
79 })
80 }
81 }
82}
83
84pub struct LlmWithFallback {
86 llm: LlmClusterer,
87 fallback: RepoClusterer,
88}
89
90impl LlmWithFallback {
91 pub fn new(llm: LlmClusterer) -> Self {
92 Self {
93 llm,
94 fallback: RepoClusterer,
95 }
96 }
97}
98
99impl WorkstreamClusterer for LlmWithFallback {
100 fn cluster(&self, events: &[EventEnvelope]) -> Result<WorkstreamsFile> {
101 match self.llm.cluster(events) {
102 Ok(ws) => Ok(ws),
103 Err(e) => {
104 eprintln!("WARN: LLM clustering failed, falling back to repo-based: {e}");
105 self.fallback.cluster(events)
106 }
107 }
108 }
109}
110
111#[cfg(test)]
112mod tests {
113 use super::*;
114 use chrono::Utc;
115 use shiplog_ids::EventId;
116 use shiplog_schema::event::*;
117
118 fn make_test_event(repo: &str, pr_num: u64, title: &str) -> EventEnvelope {
119 EventEnvelope {
120 id: EventId::from_parts(["test", &pr_num.to_string()]),
121 kind: EventKind::PullRequest,
122 occurred_at: Utc::now(),
123 actor: Actor {
124 login: "user".into(),
125 id: None,
126 },
127 repo: RepoRef {
128 full_name: repo.into(),
129 html_url: None,
130 visibility: RepoVisibility::Unknown,
131 },
132 payload: EventPayload::PullRequest(PullRequestEvent {
133 number: pr_num,
134 title: title.into(),
135 state: PullRequestState::Merged,
136 created_at: Utc::now(),
137 merged_at: Some(Utc::now()),
138 additions: Some(10),
139 deletions: Some(5),
140 changed_files: Some(3),
141 touched_paths_hint: vec![],
142 window: None,
143 }),
144 tags: vec![],
145 links: vec![],
146 source: SourceRef {
147 system: SourceSystem::Github,
148 url: None,
149 opaque_id: None,
150 },
151 }
152 }
153
154 #[test]
155 fn mock_llm_produces_valid_workstreams() {
156 let mock_response = serde_json::json!({
157 "workstreams": [{
158 "title": "Auth improvements",
159 "summary": "Authentication work",
160 "tags": ["auth", "backend"],
161 "event_indices": [0, 1],
162 "receipt_indices": [0]
163 }]
164 });
165
166 let backend = MockLlmBackend {
167 response: mock_response.to_string(),
168 };
169 let config = LlmConfig::default();
170 let clusterer = LlmClusterer::new(Box::new(backend), config);
171
172 let events = vec![
173 make_test_event("org/auth", 1, "Add OAuth"),
174 make_test_event("org/auth", 2, "Fix token refresh"),
175 ];
176
177 let ws = clusterer.cluster(&events).unwrap();
178 assert_eq!(ws.workstreams.len(), 1);
179 assert_eq!(ws.workstreams[0].title, "Auth improvements");
180 assert_eq!(ws.workstreams[0].events.len(), 2);
181 }
182
183 #[test]
184 fn fallback_on_llm_failure() {
185 let backend = FailingLlmBackend;
186 let config = LlmConfig::default();
187 let llm = LlmClusterer::new(Box::new(backend), config);
188 let clusterer = LlmWithFallback::new(llm);
189
190 let events = vec![
191 make_test_event("org/repo-a", 1, "Feature A"),
192 make_test_event("org/repo-b", 2, "Feature B"),
193 ];
194
195 let ws = clusterer.cluster(&events).unwrap();
196 assert_eq!(ws.workstreams.len(), 2);
198 }
199
200 #[test]
201 fn orphan_events_collected() {
202 let mock_response = serde_json::json!({
204 "workstreams": [{
205 "title": "Only first",
206 "summary": "test",
207 "tags": [],
208 "event_indices": [0],
209 "receipt_indices": [0]
210 }]
211 });
212
213 let backend = MockLlmBackend {
214 response: mock_response.to_string(),
215 };
216 let config = LlmConfig::default();
217 let clusterer = LlmClusterer::new(Box::new(backend), config);
218
219 let events = vec![
220 make_test_event("org/repo", 1, "Assigned"),
221 make_test_event("org/repo", 2, "Orphan"),
222 ];
223
224 let ws = clusterer.cluster(&events).unwrap();
225 assert_eq!(ws.workstreams.len(), 2);
226 assert_eq!(ws.workstreams[1].title, "Uncategorized");
227 }
228
229 #[test]
230 fn out_of_bounds_indices_skipped() {
231 let mock_response = serde_json::json!({
232 "workstreams": [{
233 "title": "Test",
234 "summary": "test",
235 "tags": [],
236 "event_indices": [0, 99],
237 "receipt_indices": [0]
238 }]
239 });
240
241 let backend = MockLlmBackend {
242 response: mock_response.to_string(),
243 };
244 let config = LlmConfig::default();
245 let clusterer = LlmClusterer::new(Box::new(backend), config);
246
247 let events = vec![make_test_event("org/repo", 1, "Only event")];
248
249 let ws = clusterer.cluster(&events).unwrap();
250 assert_eq!(ws.workstreams.len(), 1);
251 assert_eq!(ws.workstreams[0].events.len(), 1);
252 }
253
254 #[test]
255 fn malformed_json_returns_error() {
256 let backend = MockLlmBackend {
257 response: "not json at all".to_string(),
258 };
259 let config = LlmConfig::default();
260 let clusterer = LlmClusterer::new(Box::new(backend), config);
261
262 let events = vec![make_test_event("org/repo", 1, "Test")];
263 assert!(clusterer.cluster(&events).is_err());
264 }
265}