localharness 0.26.0

A Rust-native agent SDK with pluggable LLM backends (Gemini today). Streaming, custom tools, safety policies, background triggers — zero external binaries.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
//! Context-window compaction.
//!
//! When the running token count for a turn exceeds
//! `CapabilitiesConfig::compaction_threshold`, we trim history. The
//! strategy:
//!
//! 1. Keep the system instruction (it lives outside `history`).
//! 2. Keep the most-recent `KEEP_RECENT_TURNS` user/model turn pairs
//!    verbatim — function-call/response pairs are kept together.
//! 3. Ask Gemini to summarize everything before the keep-window into a
//!    single short paragraph.
//! 4. Replace that prefix with one synthetic user-role turn containing
//!    the summary, tagged so future readers know it's a compaction
//!    artifact.
//!
//! If summarization fails (network error, missing client) we fall back
//! to dropping the oldest turns until we're under the keep-window. The
//! agent never errors out of a turn because of a compaction failure —
//! the dispatch loop logs at WARN and continues.

use parking_lot::Mutex;
use tracing::{debug, warn};

use crate::backends::gemini::api::SharedClient;
use crate::backends::gemini::wire::{
    Content, ContentRole, FinishReason, GenerateContentRequest, Part,
};
use crate::error::Result;

/// Tag prepended to a compaction summary so the model (and humans
/// inspecting history) can tell what they're looking at.
pub const COMPACTION_TAG: &str = "[compacted prior context]";

/// How many recent user/model turn pairs we always keep verbatim. The
/// model needs immediate context — a hard ceiling that's not too
/// stingy (don't break a multi-step tool use) and not too generous
/// (don't defeat the point of compaction).
const KEEP_RECENT_TURNS: usize = 6;

/// Pre-summary check: if history has fewer than this many entries
/// past the keep-window, compaction is a no-op (nothing to gain).
const MIN_HISTORY_TO_COMPACT: usize = 8;

/// Model used for summarization. Cheap + fast is right here — the
/// summary doesn't need to be brilliant, just faithful.
const SUMMARY_PROMPT: &str = "Summarize the conversation below in 200 words or less. \
    Preserve key facts, decisions, file paths, and any user requests. Drop greetings, \
    chit-chat, and redundant tool output. Output only the summary; no preamble.";

/// Try to compact `history` in place. Returns `true` if anything
/// changed. Safe to call from inside the agent loop — never errors out,
/// only logs.
pub async fn try_compact(
    history: &Mutex<Vec<Content>>,
    client: &SharedClient,
    model: &str,
) -> bool {
    let snapshot = history.lock().clone();
    let total = snapshot.len();
    if total < MIN_HISTORY_TO_COMPACT {
        debug!(total, "compaction: history too short, skipping");
        return false;
    }

    // Find a split point that respects function-call/response pairs.
    let split = pick_split(&snapshot, KEEP_RECENT_TURNS);
    if split == 0 {
        debug!("compaction: nothing to summarize before the keep-window");
        return false;
    }

    let (to_summarize, to_keep) = snapshot.split_at(split);
    debug!(
        to_summarize = to_summarize.len(),
        to_keep = to_keep.len(),
        "compaction: attempting summary"
    );

    let summary = match summarize(client, model, to_summarize).await {
        Ok(s) => s,
        Err(e) => {
            warn!(error = %e, "compaction: summarization failed; falling back to drop-oldest");
            return drop_oldest_fallback(history, split);
        }
    };

    if summary.trim().is_empty() {
        warn!("compaction: summarization returned empty text; falling back to drop-oldest");
        return drop_oldest_fallback(history, split);
    }

    // Install the summary as a single synthetic user turn at the head.
    let synthetic = Content {
        role: ContentRole::User,
        parts: vec![Part::Text {
            text: format!("{COMPACTION_TAG}\n{summary}"),
        }],
    };
    let mut hist = history.lock();
    if hist.len() != total {
        // Another turn raced us. Bail rather than corrupt the new state.
        warn!("compaction: history changed under us; aborting install");
        return false;
    }
    let kept: Vec<Content> = hist.split_off(split);
    hist.clear();
    hist.push(synthetic);
    hist.extend(kept);
    debug!(new_len = hist.len(), "compaction: installed summary");
    true
}

/// Pick an index `i` such that history[..i] is summarized and
/// history[i..] is kept. Honors `KEEP_RECENT_TURNS` (a turn = one
/// user+model pair, so 2 entries) and function-call/response pairing.
///
/// The kept slice `history[i..]` is API-valid iff its first message is NOT
/// a lone `functionResponse` user turn — otherwise the matching
/// `functionCall` (at `i-1`) would be summarized away and orphaned. (Linear
/// history guarantees a `functionCall` is always followed by its
/// `functionResponse`, so a kept *call* never dangles; only a leading
/// response can orphan.)
///
/// We start at the keep-window boundary and walk it EARLIER (toward 0) past
/// any leading `functionResponse`, absorbing the orphaned pair into the
/// summary. Walking earlier keeps strictly MORE history than requested
/// (never less) and can never run off the end — the old walk-FORWARD logic
/// could chain through a long run of tool round-trips and keep ZERO messages,
/// summarizing away the entire recent context including the turn being
/// answered.
fn pick_split(history: &[Content], keep_pairs: usize) -> usize {
    let keep_entries = keep_pairs * 2;
    if history.len() <= keep_entries {
        return 0;
    }
    let mut split = history.len() - keep_entries;

    while split > 0 && is_leading_orphan(history, split) {
        split -= 1;
    }
    split
}

/// True if keeping `history[split..]` would orphan a leading
/// `functionResponse`: the first kept message is a user turn of only
/// `functionResponse` parts, whose matching `functionCall` lives at
/// `split-1` (and would be summarized away).
fn is_leading_orphan(history: &[Content], split: usize) -> bool {
    match history.get(split) {
        Some(c) => matches!(c.role, ContentRole::User) && turn_is_function_response(c),
        None => false,
    }
}

fn turn_is_function_response(c: &Content) -> bool {
    c.parts
        .iter()
        .all(|p| matches!(p, Part::FunctionResponse { .. }))
        && !c.parts.is_empty()
}

async fn summarize(client: &SharedClient, model: &str, history: &[Content]) -> Result<String> {
    use futures_util::stream::StreamExt;

    // Render the to-summarize slice as a readable transcript. We feed
    // it as the user message of a one-shot request — no system
    // instruction, no tools, no history.
    let transcript = render_transcript(history);
    let req = GenerateContentRequest {
        contents: vec![Content {
            role: ContentRole::User,
            parts: vec![Part::Text {
                text: format!("{SUMMARY_PROMPT}\n\n---\n{transcript}"),
            }],
        }],
        ..Default::default()
    };

    let mut stream = client.stream_generate(model, &req).await?;
    let mut out = String::new();
    let mut finish: Option<FinishReason> = None;
    while let Some(chunk) = stream.next().await {
        let chunk = chunk?;
        for cand in chunk.candidates {
            if let Some(content) = cand.content {
                for part in content.parts {
                    if let Part::Text { text } = part {
                        out.push_str(&text);
                    }
                }
            }
            if let Some(r) = cand.finish_reason {
                finish = Some(r);
            }
        }
    }
    if !matches!(finish, Some(FinishReason::Stop) | None) {
        warn!(?finish, "compaction summary finished abnormally");
    }
    Ok(out)
}

fn render_transcript(history: &[Content]) -> String {
    let mut out = String::with_capacity(history.len() * 64);
    for entry in history {
        let role = match entry.role {
            ContentRole::User => "USER",
            ContentRole::Model => "MODEL",
        };
        out.push_str("## ");
        out.push_str(role);
        out.push('\n');
        for part in &entry.parts {
            match part {
                Part::Text { text } => out.push_str(text),
                Part::Thought {
                    text: Some(t), ..
                } => {
                    out.push_str("[thought] ");
                    out.push_str(t);
                }
                Part::FunctionCall { function_call } => {
                    out.push_str("[tool_call ");
                    out.push_str(&function_call.name);
                    out.push_str("] ");
                    out.push_str(&function_call.args.to_string());
                }
                Part::FunctionResponse { function_response } => {
                    out.push_str("[tool_result ");
                    out.push_str(&function_response.name);
                    out.push_str("] ");
                    let body = function_response.response.to_string();
                    // Truncate huge tool results — the summarizer
                    // doesn't need every byte.
                    if body.len() > 512 {
                        out.push_str(&body[..512]);
                        out.push_str("…[truncated]");
                    } else {
                        out.push_str(&body);
                    }
                }
                Part::InlineData { inline_data } => {
                    out.push_str("[inline_data ");
                    out.push_str(&inline_data.mime_type);
                    out.push(']');
                }
                _ => {}
            }
            out.push('\n');
        }
        out.push('\n');
    }
    out
}

/// Last-resort fallback when summarization isn't available. Just drops
/// the `split` oldest entries. Crude but always correct.
fn drop_oldest_fallback(history: &Mutex<Vec<Content>>, split: usize) -> bool {
    let mut hist = history.lock();
    if split >= hist.len() {
        return false;
    }
    let kept: Vec<Content> = hist.split_off(split);
    hist.clear();
    hist.push(Content {
        role: ContentRole::User,
        parts: vec![Part::Text {
            text: format!("{COMPACTION_TAG}\n[prior turns dropped]"),
        }],
    });
    hist.extend(kept);
    debug!(new_len = hist.len(), "compaction: drop-oldest fallback applied");
    true
}

/// Decide whether to attempt compaction based on the running token
/// count. `threshold` of `None` disables compaction entirely.
pub fn should_compact(total_tokens: Option<i32>, threshold: Option<u32>) -> bool {
    match (total_tokens, threshold) {
        (Some(t), Some(th)) => t as u32 > th,
        _ => false,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::backends::gemini::wire::{FunctionCall, FunctionResponse};
    use serde_json::json;

    fn user_text(s: &str) -> Content {
        Content {
            role: ContentRole::User,
            parts: vec![Part::Text { text: s.into() }],
        }
    }
    fn model_text(s: &str) -> Content {
        Content {
            role: ContentRole::Model,
            parts: vec![Part::Text { text: s.into() }],
        }
    }
    fn model_call(name: &str) -> Content {
        Content {
            role: ContentRole::Model,
            parts: vec![Part::FunctionCall {
                function_call: FunctionCall {
                    name: name.into(),
                    args: json!({}),
                },
            }],
        }
    }
    fn user_response(name: &str) -> Content {
        Content {
            role: ContentRole::User,
            parts: vec![Part::FunctionResponse {
                function_response: FunctionResponse {
                    name: name.into(),
                    response: json!({"ok": true}),
                },
            }],
        }
    }

    #[test]
    fn pick_split_below_keep_window() {
        let h = vec![user_text("u1"), model_text("m1")];
        assert_eq!(pick_split(&h, 6), 0);
    }

    #[test]
    fn pick_split_respects_keep_window() {
        // 10 user/model pairs = 20 entries. Keep last 6 pairs = 12
        // entries. Expected split: 20 - 12 = 8.
        let h: Vec<Content> = (0..10)
            .flat_map(|i| {
                vec![
                    user_text(&format!("u{i}")),
                    model_text(&format!("m{i}")),
                ]
            })
            .collect();
        assert_eq!(h.len(), 20);
        assert_eq!(pick_split(&h, 6), 8);
    }

    #[test]
    fn pick_split_does_not_orphan_function_response() {
        // [..., model_call(view_file), user_response(view_file), model_text(end)]
        // If the natural split lands between model_call and user_response,
        // pick_split should walk forward past the response.
        let mut h: Vec<Content> = (0..20).map(|i| user_text(&format!("u{i}"))).collect();
        // Replace the boundary entries to create a tool pair right at split.
        let pair_index = 14; // some index past min keep window
        h[pair_index] = model_call("view_file");
        h[pair_index + 1] = user_response("view_file");

        let split = pick_split(&h, 6);
        // Split index should NOT land between the call and the response.
        assert_ne!(split, pair_index + 1, "split must not orphan response");
    }

    #[test]
    fn should_compact_only_when_over_threshold() {
        assert!(!should_compact(None, Some(1000)));
        assert!(!should_compact(Some(500), None));
        assert!(!should_compact(Some(500), Some(1000)));
        assert!(should_compact(Some(1500), Some(1000)));
    }

    // --- Wire-invariant probes added by the compaction correctness dive ---

    /// Linear tool-heavy history: user_text, then N (model functionCall,
    /// user functionResponse) round-trips — the realistic compaction case.
    fn tool_heavy_history(rounds: usize) -> Vec<Content> {
        let mut h = vec![user_text("start")];
        for i in 0..rounds {
            let name = format!("view_file_{i}");
            h.push(model_call(&name));
            h.push(user_response(&name));
        }
        h
    }

    fn call_names(cs: &[Content]) -> std::collections::HashSet<String> {
        let mut s = std::collections::HashSet::new();
        for c in cs {
            for p in &c.parts {
                if let Part::FunctionCall { function_call } = p {
                    s.insert(function_call.name.clone());
                }
            }
        }
        s
    }

    fn response_names(cs: &[Content]) -> std::collections::HashSet<String> {
        let mut s = std::collections::HashSet::new();
        for c in cs {
            for p in &c.parts {
                if let Part::FunctionResponse { function_response } = p {
                    s.insert(function_response.name.clone());
                }
            }
        }
        s
    }

    /// THE CORE INVARIANT: the kept slice must be self-consistent — its
    /// functionResponses must each have their functionCall kept too, and
    /// vice-versa. (Gemini matches by name; we use unique names per round.)
    #[test]
    fn keep_slice_balanced_for_tool_heavy_history() {
        for rounds in 4..=20 {
            let h = tool_heavy_history(rounds);
            let split = pick_split(&h, KEEP_RECENT_TURNS);
            let kept = &h[split..];
            let calls = call_names(kept);
            let resps = response_names(kept);
            for r in &resps {
                assert!(
                    calls.contains(r),
                    "ORPHAN functionResponse {r:?} kept without its call (rounds={rounds}, split={split})"
                );
            }
            for c in &calls {
                assert!(
                    resps.contains(c),
                    "DANGLING functionCall {c:?} kept without its response (rounds={rounds}, split={split})"
                );
            }
        }
    }

    #[test]
    fn keep_slice_never_starts_with_orphan_function_response() {
        for rounds in 4..=20 {
            let h = tool_heavy_history(rounds);
            let split = pick_split(&h, KEEP_RECENT_TURNS);
            if split < h.len() {
                let first = &h[split];
                assert!(
                    !(matches!(first.role, ContentRole::User) && turn_is_function_response(first)),
                    "first kept message is an orphaned functionResponse (rounds={rounds}, split={split})"
                );
            }
        }
    }

    #[test]
    fn pick_split_keeps_at_least_something_when_over_window() {
        // Regression guard for the walk-FORWARD run-away bug: a long run of
        // tool round-trips must not summarize away ALL recent context.
        for rounds in 4..=30 {
            let h = tool_heavy_history(rounds);
            let split = pick_split(&h, KEEP_RECENT_TURNS);
            assert!(
                split < h.len(),
                "pick_split kept nothing (rounds={rounds}, split={split}, len={})",
                h.len()
            );
        }
    }

    #[test]
    fn pick_split_empty_history() {
        assert_eq!(pick_split(&[], 6), 0);
    }

    #[test]
    fn pick_split_single_message() {
        assert_eq!(pick_split(&[user_text("only")], 6), 0);
    }

    #[test]
    fn pick_split_exactly_at_keep_window() {
        let h: Vec<Content> = (0..12).map(|i| user_text(&format!("u{i}"))).collect();
        assert_eq!(pick_split(&h, 6), 0);
    }
}