mod client;
mod config;
pub mod parse;
pub mod prompt;
pub use client::{FailingLlmBackend, LlmBackend, MockLlmBackend, OpenAiCompatibleBackend};
pub use config::LlmConfig;
pub use parse::parse_llm_response;
pub use prompt::{chunk_events, format_event_list, summarize_event, system_prompt};
use crate::workstreams::RepoClusterer;
use anyhow::Result;
use shiplog::ports::WorkstreamClusterer;
use shiplog::schema::event::EventEnvelope;
use shiplog::schema::workstream::WorkstreamsFile;
pub struct LlmClusterer {
backend: Box<dyn LlmBackend>,
config: LlmConfig,
}
impl LlmClusterer {
pub fn new(backend: Box<dyn LlmBackend>, config: LlmConfig) -> Self {
Self { backend, config }
}
fn cluster_chunk(&self, events: &[EventEnvelope], indices: &[usize]) -> Result<String> {
let subset: Vec<&EventEnvelope> = indices.iter().map(|&i| &events[i]).collect();
let event_list: String = subset
.iter()
.enumerate()
.map(|(i, ev)| format!("[{}] {}", i, summarize_event(ev)))
.collect::<Vec<_>>()
.join("\n");
let system = system_prompt(self.config.max_workstreams);
let user_msg =
format!("Cluster these development events into workstreams:\n\n{event_list}");
self.backend.complete(&system, &user_msg)
}
}
impl WorkstreamClusterer for LlmClusterer {
fn cluster(&self, events: &[EventEnvelope]) -> Result<WorkstreamsFile> {
let chunks = chunk_events(events, self.config.max_input_tokens);
if chunks.len() <= 1 {
let event_list = format_event_list(events);
let system = system_prompt(self.config.max_workstreams);
let user_msg =
format!("Cluster these development events into workstreams:\n\n{event_list}");
let response = self.backend.complete(&system, &user_msg)?;
parse_llm_response(&response, events)
} else {
let mut all_workstreams = Vec::new();
for chunk_indices in &chunks {
let response = self.cluster_chunk(events, chunk_indices)?;
let chunk_events: Vec<EventEnvelope> =
chunk_indices.iter().map(|&i| events[i].clone()).collect();
let mut ws_file = parse_llm_response(&response, &chunk_events)?;
all_workstreams.append(&mut ws_file.workstreams);
}
Ok(WorkstreamsFile {
version: 1,
generated_at: chrono::Utc::now(),
workstreams: all_workstreams,
})
}
}
}
pub struct LlmWithFallback {
llm: LlmClusterer,
fallback: RepoClusterer,
}
impl LlmWithFallback {
pub fn new(llm: LlmClusterer) -> Self {
Self {
llm,
fallback: RepoClusterer,
}
}
}
impl WorkstreamClusterer for LlmWithFallback {
fn cluster(&self, events: &[EventEnvelope]) -> Result<WorkstreamsFile> {
match self.llm.cluster(events) {
Ok(ws) => Ok(ws),
Err(e) => {
eprintln!("WARN: LLM clustering failed, falling back to repo-based: {e}");
self.fallback.cluster(events)
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use shiplog::ids::EventId;
use shiplog::schema::event::*;
fn make_test_event(repo: &str, pr_num: u64, title: &str) -> EventEnvelope {
EventEnvelope {
id: EventId::from_parts(["test", &pr_num.to_string()]),
kind: EventKind::PullRequest,
occurred_at: Utc::now(),
actor: Actor {
login: "user".into(),
id: None,
},
repo: RepoRef {
full_name: repo.into(),
html_url: None,
visibility: RepoVisibility::Unknown,
},
payload: EventPayload::PullRequest(PullRequestEvent {
number: pr_num,
title: title.into(),
state: PullRequestState::Merged,
created_at: Utc::now(),
merged_at: Some(Utc::now()),
additions: Some(10),
deletions: Some(5),
changed_files: Some(3),
touched_paths_hint: vec![],
window: None,
}),
tags: vec![],
links: vec![],
source: SourceRef {
system: SourceSystem::Github,
url: None,
opaque_id: None,
},
}
}
#[test]
fn mock_llm_produces_valid_workstreams() {
let mock_response = serde_json::json!({
"workstreams": [{
"title": "Auth improvements",
"summary": "Authentication work",
"tags": ["auth", "backend"],
"event_indices": [0, 1],
"receipt_indices": [0]
}]
});
let backend = MockLlmBackend {
response: mock_response.to_string(),
};
let config = LlmConfig::default();
let clusterer = LlmClusterer::new(Box::new(backend), config);
let events = vec![
make_test_event("org/auth", 1, "Add OAuth"),
make_test_event("org/auth", 2, "Fix token refresh"),
];
let ws = clusterer.cluster(&events).unwrap();
assert_eq!(ws.workstreams.len(), 1);
assert_eq!(ws.workstreams[0].title, "Auth improvements");
assert_eq!(ws.workstreams[0].events.len(), 2);
}
#[test]
fn fallback_on_llm_failure() {
let backend = FailingLlmBackend;
let config = LlmConfig::default();
let llm = LlmClusterer::new(Box::new(backend), config);
let clusterer = LlmWithFallback::new(llm);
let events = vec![
make_test_event("org/repo-a", 1, "Feature A"),
make_test_event("org/repo-b", 2, "Feature B"),
];
let ws = clusterer.cluster(&events).unwrap();
assert_eq!(ws.workstreams.len(), 2);
}
#[test]
fn orphan_events_collected() {
let mock_response = serde_json::json!({
"workstreams": [{
"title": "Only first",
"summary": "test",
"tags": [],
"event_indices": [0],
"receipt_indices": [0]
}]
});
let backend = MockLlmBackend {
response: mock_response.to_string(),
};
let config = LlmConfig::default();
let clusterer = LlmClusterer::new(Box::new(backend), config);
let events = vec![
make_test_event("org/repo", 1, "Assigned"),
make_test_event("org/repo", 2, "Orphan"),
];
let ws = clusterer.cluster(&events).unwrap();
assert_eq!(ws.workstreams.len(), 2);
assert_eq!(ws.workstreams[1].title, "Uncategorized");
}
#[test]
fn out_of_bounds_indices_skipped() {
let mock_response = serde_json::json!({
"workstreams": [{
"title": "Test",
"summary": "test",
"tags": [],
"event_indices": [0, 99],
"receipt_indices": [0]
}]
});
let backend = MockLlmBackend {
response: mock_response.to_string(),
};
let config = LlmConfig::default();
let clusterer = LlmClusterer::new(Box::new(backend), config);
let events = vec![make_test_event("org/repo", 1, "Only event")];
let ws = clusterer.cluster(&events).unwrap();
assert_eq!(ws.workstreams.len(), 1);
assert_eq!(ws.workstreams[0].events.len(), 1);
}
#[test]
fn malformed_json_returns_error() {
let backend = MockLlmBackend {
response: "not json at all".to_string(),
};
let config = LlmConfig::default();
let clusterer = LlmClusterer::new(Box::new(backend), config);
let events = vec![make_test_event("org/repo", 1, "Test")];
assert!(clusterer.cluster(&events).is_err());
}
}