use anyhow::Context;
use crate::dream::backend::{BackfillEvent, BackfillInput, DreamBackend};
use crate::llm::LlmBackend;
pub struct LlmDreamBackend {
llm: Box<dyn LlmBackend>,
usage: std::sync::Mutex<crate::llm::LlmUsage>,
}
impl LlmDreamBackend {
pub fn new(llm: Box<dyn LlmBackend>) -> Self {
Self {
llm,
usage: std::sync::Mutex::new(crate::llm::LlmUsage::default()),
}
}
pub fn backend_name(&self) -> &'static str {
self.llm.name()
}
pub fn usage(&self) -> crate::llm::LlmUsage {
*self.usage.lock().unwrap()
}
}
const TRANSCRIPT_CHAR_BUDGET: usize = 150_000;
impl DreamBackend for LlmDreamBackend {
fn backfill(&self, input: &BackfillInput) -> anyhow::Result<Vec<BackfillEvent>> {
let mut out = Vec::new();
for chunk in chunk_transcript(&input.transcript, TRANSCRIPT_CHAR_BUDGET) {
let chunk_input = BackfillInput {
tasks: input.tasks.clone(),
transcript: chunk,
};
let prompt = crate::dream::prompt::build_prompt(&chunk_input);
match self.llm.complete_usage(&prompt, 1024) {
Ok((text, usage)) => {
self.usage.lock().unwrap().add(usage);
match parse_backfill_json(&text) {
Ok(evs) => out.extend(evs),
Err(e) => {
tracing::warn!(error = %e, "dream backfill: skipping unparseable chunk")
}
}
}
Err(e) => tracing::warn!(error = %e, "dream backfill: skipping failed chunk"),
}
}
Ok(out)
}
}
fn chunk_transcript(transcript: &str, budget: usize) -> Vec<String> {
if transcript.len() <= budget {
return vec![transcript.to_string()];
}
let mut chunks = Vec::new();
let mut cur = String::new();
for line in transcript.split_inclusive('\n') {
if !cur.is_empty() && cur.len() + line.len() > budget {
chunks.push(std::mem::take(&mut cur));
}
if line.len() > budget {
for ch in line.chars() {
if !cur.is_empty() && cur.len() + ch.len_utf8() > budget {
chunks.push(std::mem::take(&mut cur));
}
cur.push(ch);
}
} else {
cur.push_str(line);
}
}
if !cur.is_empty() {
chunks.push(cur);
}
chunks
}
pub fn parse_backfill_json(text: &str) -> anyhow::Result<Vec<BackfillEvent>> {
let json_str = text
.trim()
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
let slice = match (json_str.find('['), json_str.rfind(']')) {
(Some(a), Some(b)) if b > a => &json_str[a..=b],
_ => json_str,
};
serde_json::from_str(slice).with_context(|| format!("dream JSON parse failed; got: {json_str}"))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::event::EventType;
#[test]
fn parse_strips_fence_and_decodes() {
let reply = "```json\n[{\"event_type\":\"decision\",\"task_id\":\"tj-1\",\
\"text\":\"chose X\",\"timestamp\":\"2026-06-13T00:00:00Z\"}]\n```";
let evs = parse_backfill_json(reply).unwrap();
assert_eq!(evs.len(), 1);
assert_eq!(evs[0].event_type, EventType::Decision);
assert_eq!(evs[0].task_id, "tj-1");
assert!(evs[0].text.contains("chose X"));
}
#[test]
fn parse_empty_array() {
assert!(parse_backfill_json("[]").unwrap().is_empty());
}
#[test]
fn parse_extracts_array_wrapped_in_prose() {
let reply = "Here are the missed events:\n[{\"event_type\":\"finding\",\
\"task_id\":\"tj-1\",\"text\":\"found\",\"timestamp\":\"2026-06-13T00:00:00Z\"}]\nHope that helps!";
let evs = parse_backfill_json(reply).unwrap();
assert_eq!(evs.len(), 1);
}
#[test]
fn parse_errors_on_pure_prose() {
assert!(parse_backfill_json("Контекст в норме. Что дальше?").is_err());
}
#[test]
fn backfill_skips_unparseable_chunk_reply() {
struct ChattyLlm;
impl LlmBackend for ChattyLlm {
fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
Ok("Контекст в норме. 566.5k/1M использовано. Что дальше?".to_string())
}
fn name(&self) -> &'static str {
"chatty"
}
}
let b = LlmDreamBackend::new(Box::new(ChattyLlm));
let input = BackfillInput {
tasks: vec![],
transcript: "user: hi\nassistant: hello".into(),
};
let evs = b.backfill(&input).unwrap();
assert!(evs.is_empty());
}
#[test]
fn backfill_skips_chunk_whose_call_errors() {
struct FailingLlm;
impl LlmBackend for FailingLlm {
fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
Err(anyhow::anyhow!(
"`claude -p` exited with status 1: Prompt is too long"
))
}
fn name(&self) -> &'static str {
"failing"
}
}
let b = LlmDreamBackend::new(Box::new(FailingLlm));
let input = BackfillInput {
tasks: vec![],
transcript: "user: hi\nassistant: hello".into(),
};
let evs = b.backfill(&input).unwrap();
assert!(evs.is_empty());
}
#[test]
fn small_transcript_is_one_chunk() {
let c = chunk_transcript("a\nb\nc\n", 100);
assert_eq!(c.len(), 1);
assert_eq!(c[0], "a\nb\nc\n");
}
#[test]
fn big_transcript_splits_on_lines_and_preserves_content() {
let transcript: String = (0..10).map(|i| format!("line{i:015}\n")).collect();
let chunks = chunk_transcript(&transcript, 50);
assert!(chunks.len() > 1, "must split");
assert!(chunks.iter().all(|c| c.len() <= 50));
assert_eq!(chunks.concat(), transcript, "no content lost");
}
#[test]
fn oversized_single_line_is_hard_split() {
let line = "x".repeat(250);
let chunks = chunk_transcript(&line, 100);
assert!(chunks.len() >= 3);
assert!(chunks.iter().all(|c| c.len() <= 100));
assert_eq!(chunks.concat(), line);
}
#[test]
fn backfill_chunks_large_transcript_into_multiple_calls() {
use std::sync::atomic::{AtomicUsize, Ordering};
struct CountingLlm(AtomicUsize);
impl LlmBackend for CountingLlm {
fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
self.0.fetch_add(1, Ordering::SeqCst);
Ok("[]".to_string())
}
fn name(&self) -> &'static str {
"counting"
}
}
let llm = Box::new(CountingLlm(AtomicUsize::new(0)));
let transcript = "y\n".repeat(TRANSCRIPT_CHAR_BUDGET);
let b = LlmDreamBackend::new(llm);
let input = BackfillInput {
tasks: vec![],
transcript,
};
let evs = b.backfill(&input).unwrap();
assert!(evs.is_empty());
}
#[test]
fn llm_dream_backend_runs_and_parses() {
struct FakeLlm;
impl LlmBackend for FakeLlm {
fn complete(&self, _prompt: &str, _max: u32) -> anyhow::Result<String> {
Ok(
"[{\"event_type\":\"finding\",\"task_id\":\"tj-x\",\"text\":\"found it\",\
\"timestamp\":\"2026-06-13T00:00:00Z\"}]"
.to_string(),
)
}
fn name(&self) -> &'static str {
"fake"
}
}
let b = LlmDreamBackend::new(Box::new(FakeLlm));
let input = BackfillInput {
tasks: vec![],
transcript: "x".into(),
};
let evs = b.backfill(&input).unwrap();
assert_eq!(evs.len(), 1);
assert_eq!(evs[0].text, "found it");
assert_eq!(b.backend_name(), "fake");
}
}