use std::path::PathBuf;
use std::sync::mpsc::{Receiver, SyncSender, TrySendError, sync_channel};
use cera::session::{FinishReason, ModalitySink};
use cera::tokenizer::{BpeTokenizer, ChatMessage};
use triage_core::session::SessionId;
const SYSTEM_PROMPT: &str = "You label terminal sessions. Reply with a terse description of what \
the session is doing, at most 8 words, no trailing punctuation, no quotes. Output only the label.";
const DETAIL_SYSTEM_PROMPT: &str = "You summarize terminal sessions. In two or three short \
sentences, describe what the session is doing: the task, any commands run, and the current state \
(e.g. building, tests passing/failing, an error, waiting at a prompt). Be concrete and factual. \
No markdown, no quotes, no preamble — output only the summary.";
const MAX_SNIPPET_CHARS: usize = 60;
const MAX_SNIPPET_WORDS: usize = 8;
const MAX_DETAIL_CHARS: usize = 280;
#[derive(Debug, Clone)]
pub struct SummarizerConfig {
pub bundle_id: String,
pub quant: String,
pub context_size: u32,
pub max_tokens: u32,
pub detail_max_tokens: u32,
pub cache_dir: PathBuf,
pub queue_depth: usize,
}
pub struct SummarizeJob {
pub session_id: SessionId,
pub prompt_text: String,
pub output_seq: u64,
}
pub struct SnippetResult {
pub session_id: SessionId,
pub text: String,
pub detail: Option<String>,
pub generated_at_output_seq: u64,
}
#[derive(Clone)]
pub struct Summarizer {
jobs: Option<SyncSender<SummarizeJob>>,
}
impl Summarizer {
pub fn spawn(
config: SummarizerConfig,
on_result: impl Fn(SnippetResult) + Send + 'static,
) -> Self {
let (tx, rx) = sync_channel(config.queue_depth.max(1));
let builder = std::thread::Builder::new().name("triage-summarizer".to_string());
if let Err(error) = builder.spawn(move || run_worker(config, rx, on_result)) {
tracing::error!(%error, "failed to spawn summarizer thread; snippets disabled");
return Self::disabled();
}
Self { jobs: Some(tx) }
}
pub fn disabled() -> Self {
Self { jobs: None }
}
pub fn is_enabled(&self) -> bool {
self.jobs.is_some()
}
pub fn try_enqueue(&self, job: SummarizeJob) -> bool {
match &self.jobs {
Some(tx) => match tx.try_send(job) {
Ok(()) => true,
Err(TrySendError::Full(_)) | Err(TrySendError::Disconnected(_)) => false,
},
None => false,
}
}
}
fn run_worker(
config: SummarizerConfig,
jobs: Receiver<SummarizeJob>,
on_result: impl Fn(SnippetResult),
) {
let mut engine: Option<cera::CeraEngine> = None;
let mut load_failed = false;
while let Ok(first) = jobs.recv() {
let batch = coalesce(first, &jobs);
if load_failed {
continue;
}
if engine.is_none() {
match load_engine(&config) {
Ok(loaded) => {
tracing::info!(
bundle_id = %config.bundle_id,
quant = %config.quant,
"loaded session summarizer model"
);
engine = Some(loaded);
}
Err(error) => {
tracing::error!(%error, "failed to load summarizer model; snippets disabled");
load_failed = true;
continue;
}
}
}
let engine = engine.as_ref().expect("engine loaded above");
for job in batch {
match generate_one_line(engine, &config, &job.prompt_text) {
Ok(Some(text)) => {
let detail = match generate_detail(engine, &config, &job.prompt_text) {
Ok(detail) => detail,
Err(error) => {
tracing::warn!(
%error,
session_id = %job.session_id,
"detail summary generation failed"
);
None
}
};
on_result(SnippetResult {
session_id: job.session_id,
text,
detail,
generated_at_output_seq: job.output_seq,
});
}
Ok(None) => {
tracing::debug!(
session_id = %job.session_id,
"snippet generation produced empty output (dropped)"
)
}
Err(error) => {
tracing::warn!(%error, session_id = %job.session_id, "snippet generation failed")
}
}
}
}
}
fn coalesce(first: SummarizeJob, jobs: &Receiver<SummarizeJob>) -> Vec<SummarizeJob> {
use std::collections::HashMap;
let mut latest: HashMap<SessionId, SummarizeJob> = HashMap::new();
let mut consider = |job: SummarizeJob| match latest.get(&job.session_id) {
Some(existing) if existing.output_seq >= job.output_seq => {}
_ => {
latest.insert(job.session_id.clone(), job);
}
};
consider(first);
while let Ok(job) = jobs.try_recv() {
consider(job);
}
latest.into_values().collect()
}
fn load_engine(config: &SummarizerConfig) -> Result<cera::CeraEngine, cera::session::CeraError> {
let repo = cera::bundle::BundleRepo::new(&config.cache_dir);
let engine_config = cera::EngineConfig {
context_size: config.context_size as usize,
backend: cera::BackendPreference::Auto,
bundle_repo: Some(repo),
};
cera::CeraEngine::from_bundle_id(&config.bundle_id, &config.quant, engine_config)
}
fn generate_one_line(
engine: &cera::CeraEngine,
config: &SummarizerConfig,
prompt_text: &str,
) -> anyhow::Result<Option<String>> {
let mut session = engine.new_session(cera::SessionConfig::default());
let messages = [
ChatMessage {
role: "system".to_string(),
content: SYSTEM_PROMPT.to_string(),
},
ChatMessage {
role: "user".to_string(),
content: prompt_text.to_string(),
},
];
let rendered = cera::tokenizer::apply_chat_template(engine.tokenizer(), &messages, true)?;
session.append_text(&rendered)?;
let mut sink = OneLineSink::new(engine.tokenizer());
let opts = cera::GenerateOpts {
max_tokens: config.max_tokens,
temperature: 0.0,
..Default::default()
};
session.generate(&opts, &mut sink)?;
Ok(sanitize_one_line(&sink.text))
}
fn generate_detail(
engine: &cera::CeraEngine,
config: &SummarizerConfig,
prompt_text: &str,
) -> anyhow::Result<Option<String>> {
let mut session = engine.new_session(cera::SessionConfig::default());
let messages = [
ChatMessage {
role: "system".to_string(),
content: DETAIL_SYSTEM_PROMPT.to_string(),
},
ChatMessage {
role: "user".to_string(),
content: prompt_text.to_string(),
},
];
let rendered = cera::tokenizer::apply_chat_template(engine.tokenizer(), &messages, true)?;
session.append_text(&rendered)?;
let mut sink = DetailSink::new(engine.tokenizer());
let opts = cera::GenerateOpts {
max_tokens: config.detail_max_tokens,
temperature: 0.0,
..Default::default()
};
session.generate(&opts, &mut sink)?;
Ok(sanitize_detail(&sink.text))
}
struct DetailSink<'a> {
tokenizer: &'a BpeTokenizer,
text: String,
}
impl<'a> DetailSink<'a> {
fn new(tokenizer: &'a BpeTokenizer) -> Self {
Self {
tokenizer,
text: String::new(),
}
}
}
impl ModalitySink for DetailSink<'_> {
fn on_text_tokens(&mut self, tokens: &[u32]) {
self.text.push_str(&self.tokenizer.decode(tokens));
}
fn on_done(&mut self, _reason: FinishReason) {}
}
fn sanitize_detail(raw: &str) -> Option<String> {
let collapsed: String = raw.split_whitespace().collect::<Vec<_>>().join(" ");
let trimmed = collapsed.trim();
if trimmed.is_empty() {
return None;
}
if trimmed.chars().count() > MAX_DETAIL_CHARS {
let mut capped: String = trimmed.chars().take(MAX_DETAIL_CHARS).collect();
capped = capped.trim_end().to_string();
capped.push('…');
Some(capped)
} else {
Some(trimmed.to_string())
}
}
struct OneLineSink<'a> {
tokenizer: &'a BpeTokenizer,
text: String,
stopped: bool,
}
impl<'a> OneLineSink<'a> {
fn new(tokenizer: &'a BpeTokenizer) -> Self {
Self {
tokenizer,
text: String::new(),
stopped: false,
}
}
}
impl ModalitySink for OneLineSink<'_> {
fn on_text_tokens(&mut self, tokens: &[u32]) {
if self.stopped {
return;
}
let decoded = self.tokenizer.decode(tokens);
if let Some(newline) = decoded.find('\n') {
self.text.push_str(&decoded[..newline]);
self.stopped = true;
} else {
self.text.push_str(&decoded);
}
}
fn on_done(&mut self, _reason: FinishReason) {}
}
fn sanitize_one_line(raw: &str) -> Option<String> {
let first_line = raw.lines().next().unwrap_or("").trim();
let collapsed: String = first_line.split_whitespace().collect::<Vec<_>>().join(" ");
let unquoted = collapsed
.strip_prefix(['"', '\'', '`'])
.and_then(|s| s.strip_suffix(['"', '\'', '`']))
.unwrap_or(&collapsed)
.trim();
let mut capped: String = unquoted
.split(' ')
.take(MAX_SNIPPET_WORDS)
.collect::<Vec<_>>()
.join(" ");
if capped.chars().count() > MAX_SNIPPET_CHARS {
capped = capped.chars().take(MAX_SNIPPET_CHARS).collect::<String>();
capped = capped.trim_end().to_string();
}
if capped.is_empty() {
None
} else {
Some(capped)
}
}
pub fn build_prompt_text(visible_rows: &[String]) -> Option<String> {
const MAX_PROMPT_ROWS: usize = 20;
const MAX_PROMPT_CHARS: usize = 1500;
let kept: Vec<&str> = visible_rows
.iter()
.map(|row| row.trim_end())
.filter(|row| !row.is_empty())
.collect();
if kept.is_empty() {
return None;
}
let start = kept.len().saturating_sub(MAX_PROMPT_ROWS);
let mut text = kept[start..].join("\n");
if text.chars().count() > MAX_PROMPT_CHARS {
let skip = text.chars().count() - MAX_PROMPT_CHARS;
text = text.chars().skip(skip).collect();
}
Some(text)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sanitize_strips_quotes_and_caps_words() {
assert_eq!(
sanitize_one_line("\"running cargo test\""),
Some("running cargo test".to_string())
);
assert_eq!(
sanitize_one_line("one two three four five six seven eight nine ten"),
Some("one two three four five six seven eight".to_string())
);
assert_eq!(
sanitize_one_line("first line\nsecond line"),
Some("first line".to_string())
);
assert_eq!(sanitize_one_line(" \n "), None);
}
#[test]
fn sanitize_detail_collapses_whitespace_and_caps() {
assert_eq!(
sanitize_detail(" Running cargo test.\n\nAll 83 tests passed. "),
Some("Running cargo test. All 83 tests passed.".to_string())
);
assert_eq!(sanitize_detail(" \n\n "), None);
let long = "word ".repeat(100);
let capped = sanitize_detail(&long).expect("non-empty");
assert!(capped.chars().count() <= MAX_DETAIL_CHARS + 1, "{capped:?}");
assert!(capped.ends_with('…'), "{capped:?}");
}
#[test]
fn build_prompt_drops_blank_rows_and_keeps_tail() {
let rows = vec![
"".to_string(),
"$ cargo build ".to_string(),
" ".to_string(),
"Compiling triaged".to_string(),
];
let prompt = build_prompt_text(&rows).expect("non-empty");
assert_eq!(prompt, "$ cargo build\nCompiling triaged");
assert_eq!(build_prompt_text(&[]), None);
assert_eq!(build_prompt_text(&["".to_string(), " ".to_string()]), None);
}
#[test]
#[ignore = "downloads ~0.7GB model and runs local inference"]
fn end_to_end_generates_a_snippet() {
use std::sync::mpsc;
use std::time::Duration;
use triage_core::session::SessionId;
let config = SummarizerConfig {
bundle_id: "LFM2.5-1.2B-Instruct-GGUF".to_string(),
quant: "Q4_0".to_string(),
context_size: 1024,
max_tokens: 24,
detail_max_tokens: 96,
cache_dir: crate::session::default_model_cache_dir(),
queue_depth: 4,
};
let (tx, rx) = mpsc::channel();
let summarizer = Summarizer::spawn(config, move |result| {
let _ = tx.send(result);
});
assert!(summarizer.is_enabled(), "summarizer should spawn");
let prompt = build_prompt_text(&[
"user@host project % cargo test".to_string(),
" Compiling triaged v0.1.5".to_string(),
" Finished `test` profile in 4.2s".to_string(),
"running 83 tests".to_string(),
"test result: ok. 83 passed; 0 failed".to_string(),
])
.expect("prompt");
assert!(summarizer.try_enqueue(SummarizeJob {
session_id: SessionId::new("e2e").unwrap(),
prompt_text: prompt,
output_seq: 1,
}));
let result = rx
.recv_timeout(Duration::from_secs(600))
.expect("a snippet within timeout");
eprintln!("GENERATED SNIPPET: {:?}", result.text);
assert!(!result.text.is_empty(), "snippet should be non-empty");
assert!(
result.text.split_whitespace().count() <= MAX_SNIPPET_WORDS,
"snippet should respect the word cap: {:?}",
result.text
);
}
}