rsclaw 2026.4.22

AI Agent Engine Compatible with OpenClaw
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
//! Session compaction — LLM-based context summarization and transcript logging.

use std::sync::Arc;

use chrono::Utc;
use futures::StreamExt;
use serde_json::{Value, json};
use tokio::io::AsyncWriteExt as _;
use tracing::{debug, info, warn};

use super::context_mgr::{compress_tool_results, estimate_tokens, msg_tokens};
use super::runtime::AgentRuntime;
use crate::provider::{
    ContentPart, LlmRequest, Message, MessageContent, Role, StreamEvent,
};

/// Prefix for compaction summaries. Tells the LLM that the summary is
/// reference material from a previous context window, NOT active instructions.
const COMPACTION_PREFIX: &str = "\
[CONTEXT COMPACTION - REFERENCE ONLY] Earlier turns were compacted \
into the summary below. This is a handoff from a previous context \
window - treat it as background reference, NOT as active instructions. \
Do NOT answer questions or fulfill requests mentioned in this summary; \
they were already addressed. \
Your current task is in the '## Active Task' section - resume from there. \
Respond ONLY to the latest user message that appears AFTER this summary.";

/// Returns true if a JSON message value represents a compaction summary (internal only).
/// Used by both the REST API (server/mod.rs) and WebSocket chat handler to filter
/// compaction messages from user-visible history.
pub fn is_compaction_message(v: &serde_json::Value) -> bool {
    let obj = match v.as_object() {
        Some(o) => o,
        None => return false,
    };
    let role = obj.get("role").and_then(|r| r.as_str()).unwrap_or("");
    if role != "user" {
        return false;
    }
    // Content can be a plain string or an array of parts.
    if let Some(text) = obj.get("content").and_then(|c| c.as_str()) {
        return text.starts_with("[CONTEXT COMPACTION");
    }
    if let Some(parts) = obj.get("content").and_then(|c| c.as_array()) {
        if let Some(first_text) = parts
            .first()
            .and_then(|p| p.get("text"))
            .and_then(|t| t.as_str())
        {
            return first_text.starts_with("[CONTEXT COMPACTION");
        }
    }
    false
}

impl AgentRuntime {
    /// Summarise the session history via LLM when the total character count
    /// approaches `reserveTokensFloor` (approximated as floor * 4 chars/token,
    /// default 100 000 chars).
    ///
    /// **Layered mode** (default): keeps the last N user-assistant pairs
    /// verbatim and only summarises the older portion, so recent context is
    /// never lost.  Falls back to Default/Safeguard when configured.
    pub(crate) async fn compact_if_needed(&mut self, session_key: &str, model: &str) {
        self.compact_inner(session_key, model, false).await;
    }

    /// Force compaction regardless of threshold (used by /compact).
    pub(crate) async fn compact_force(&mut self, session_key: &str, model: &str) {
        self.compact_inner(session_key, model, true).await;
    }

    /// Estimate the fixed token overhead from system prompt, tools, and skills.
    /// This overhead is always present regardless of conversation length.
    pub(crate) fn estimate_fixed_overhead(&self) -> usize {
        let mut overhead = 0usize;

        // System prompt.
        if let Some(ref sys) = self.cached_system_prompt {
            overhead += estimate_tokens(sys);
        }

        // Tool definitions.
        for tool in &self.cached_tools {
            overhead += estimate_tokens(&tool.name);
            overhead += estimate_tokens(&tool.description);
            overhead += estimate_tokens(&tool.parameters.to_string());
        }

        // Skills loaded in system prompt (rough estimate from skill count).
        let skill_count = self.skills.len();
        // Average skill prompt is ~500 tokens.
        overhead += skill_count * 500;

        overhead
    }

    pub(crate) async fn compact_inner(&mut self, session_key: &str, model: &str, force: bool) {
        use crate::config::schema::CompactionMode;

        // Use configured compaction settings, or sensible defaults.
        let cfg = self.config.agents.defaults.compaction.clone()
            .unwrap_or_default();

        // Compaction trigger: token threshold ONLY.
        // Turn count and time-based triggers were removed because they
        // unnecessarily discard context and break KV cache in the new
        // append-only architecture.
        let context_tokens = self.config.agents.defaults.context_tokens.unwrap_or(64_000) as usize;
        let kv_cache_mode = self.config.agents.defaults.kv_cache_mode.unwrap_or(1);
        // kvCacheMode >= 1: append-only mode, compact at 80% to leave headroom
        // for token estimation inaccuracy (10-15%). Mode 0: legacy 50% threshold.
        let default_threshold = if kv_cache_mode >= 1 {
            (context_tokens * 4 / 5).max(16_000) // 80% — safe margin for estimation error
        } else {
            (context_tokens / 2).max(16_000)      // 50% — same as hermes, save tokens/cost
        };
        // reserveTokensFloor is the MINIMUM tokens to keep free for replies.
        // Trigger compaction when used tokens exceed (contextTokens - reserveFloor).
        // NOT a direct threshold — it's how much headroom to leave.
        let token_threshold = if let Some(floor) = cfg.reserve_tokens_floor {
            context_tokens.saturating_sub(floor as usize).max(16_000)
        } else {
            default_threshold
        };

        // Estimate fixed overhead: system prompt + tools + skills.
        // This is NOT in the session messages but still counts towards context.
        let overhead_tokens = self.estimate_fixed_overhead();

        let session_tokens: usize = self
            .sessions
            .get(session_key)
            .map(|msgs| msgs.iter().map(msg_tokens).sum())
            .unwrap_or(0);
        let total_tokens = session_tokens + overhead_tokens;

        let turns = self
            .compaction_state
            .get(session_key)
            .map(|(_, t)| *t)
            .unwrap_or(0);

        let token_trigger = total_tokens > token_threshold;

        debug!(
            session = session_key,
            total_tokens,
            token_threshold,
            turns,
            token_trigger,
            force,
            "compaction check"
        );

        if !force && !token_trigger {
            self.compaction_state
                .entry(session_key.to_owned())
                .and_modify(|(_, t)| *t += 1)
                .or_insert((std::time::Instant::now(), 1));
            return;
        }

        let trigger_reason = if token_trigger {
            "tokens"
        } else {
            "time"
        };
        info!(
            session = session_key,
            trigger = trigger_reason,
            total_tokens,
            turns,
            "compaction triggered"
        );

        let mode = cfg
            .mode
            .as_ref()
            .cloned()
            .unwrap_or(CompactionMode::Layered);
        let compaction_model = cfg.model.as_deref().unwrap_or(model);
        // Dynamic keepRecentPairs: reduce when token pressure is high.
        let configured_pairs = cfg.keep_recent_pairs.unwrap_or(5) as usize;
        let keep_pairs = if total_tokens > token_threshold * 3 {
            1.max(configured_pairs / 3) // extreme pressure: keep 1-2 pairs
        } else if total_tokens > token_threshold * 2 {
            1.max(configured_pairs / 2) // high pressure: keep 2-3 pairs
        } else {
            configured_pairs // normal: use configured value
        };
        let extract_facts = cfg.extract_facts.unwrap_or(true);

        let msgs_to_text = |msgs: &[Message]| -> String {
            let default_transcript = (context_tokens * 7 / 10).max(16_000);
            let max_total_tokens: usize = cfg.max_transcript_tokens
                .map(|t| t as usize)
                .unwrap_or(default_transcript);
            Self::msgs_to_text_static(msgs, max_total_tokens)
        };

        // Split messages into (head, old_portion, recent_portion) for layered mode.
        // Head: first user-assistant pair (contains [Session started:...], preserved verbatim)
        // Middle: compressed into summary
        // Tail: recent N pairs (preserved verbatim)
        let (head_msgs, old_text, recent_msgs) = if mode == CompactionMode::Layered {
            let msgs = self.sessions.get(session_key).cloned().unwrap_or_default();

            // Head: protect first user + first assistant (2 messages).
            // If first message is a compaction summary from a previous round,
            // don't protect it (it will be updated via iterative summary).
            let head_end = {
                let first_is_summary = msgs.first()
                    .and_then(|m| if let MessageContent::Text(t) = &m.content { Some(t.starts_with("[CONTEXT COMPACTION")) } else { None })
                    .unwrap_or(false);
                if first_is_summary {
                    0 // don't protect old summary as head
                } else {
                    // Protect first user + first assistant pair
                    let mut count = 0usize;
                    let mut end = 0;
                    for (idx, m) in msgs.iter().enumerate() {
                        if m.role == Role::Assistant {
                            count += 1;
                            end = idx + 1;
                            if count >= 1 { break; }
                        }
                    }
                    end.min(msgs.len())
                }
            };

            // Tail: count user-assistant pairs from the end.
            let mut pair_count = 0usize;
            let mut split_idx = msgs.len();
            let mut i = msgs.len();
            while i > head_end && pair_count < keep_pairs {
                i -= 1;
                if msgs[i].role == Role::User {
                    pair_count += 1;
                    split_idx = i;
                }
            }
            // Ensure split doesn't overlap with head.
            split_idx = split_idx.max(head_end);

            // Don't split inside a tool_call/tool_result group.
            // If split_idx lands on tool results, move it back to include the
            // preceding assistant(tool_calls) message so the pair stays together.
            while split_idx > head_end
                && split_idx < msgs.len()
                && msgs[split_idx].role == Role::Tool
            {
                split_idx -= 1;
            }

            let head = msgs[..head_end].to_vec();
            let mut old_portion = msgs[head_end..split_idx].to_vec();
            let recent = msgs[split_idx..].to_vec();
            if old_portion.is_empty() {
                return; // not enough history to compact
            }
            compress_tool_results(&mut old_portion, 6);
            (head, msgs_to_text(&old_portion), recent)
        } else {
            let mut msgs = self.sessions.get(session_key).cloned().unwrap_or_default();
            compress_tool_results(&mut msgs, 6);
            (vec![], msgs_to_text(&msgs), vec![])
        };

        // Pre-compaction entity preservation: deterministic extraction (phone/ID/email)
        // plus LLM-based semantic extraction (name, birthday, zodiac, etc.)
        // Both run BEFORE the LLM summary to guarantee no data loss.
        {
            let entities = crate::agent::context_mgr::extract_key_entities(&old_text);
            if !entities.is_empty() {
                if let Some(ref mem) = self.memory {
                    let scope = format!("agent:{}", self.handle.id);
                    crate::agent::context_mgr::write_entity_memories(mem, &scope, entities).await;
                    debug!(session = session_key, "pre-compaction deterministic entities pinned");
                }
            }
            // LLM-based entity extraction is now handled by the summary prompt
            // (Entities section), parsed after summary generation below.
        }

        // Detect previous compaction summary for iterative update.
        let previous_summary = {
            let msgs = self.sessions.get(session_key).cloned().unwrap_or_default();
            msgs.iter().find_map(|m| {
                if let MessageContent::Text(t) = &m.content {
                    if t.starts_with("[CONTEXT COMPACTION") {
                        let summary_start = t.find("\n\n").map(|i| i + 2).unwrap_or(0);
                        Some(t[summary_start..].to_owned())
                    } else {
                        None
                    }
                } else {
                    None
                }
            })
        };

        // Summarise the old portion.
        // KV cache mode: append summary instruction to existing session messages
        // so the LLM reuses the already-cached prefix. Only the summary prompt
        // itself needs to be computed. Falls back to standalone mode on failure.
        let summary = if kv_cache_mode >= 1 && mode != CompactionMode::Safeguard {
            let result = self.compact_with_kv_cache(
                session_key,
                compaction_model,
                &old_text,
                previous_summary.as_deref(),
            ).await;
            if result.is_some() {
                result
            } else {
                // Fallback to standalone summarization
                info!(session = session_key, "KV cache compact failed, falling back to standalone");
                self.compact_single(compaction_model, &old_text, previous_summary.as_deref()).await
            }
        } else {
            match mode {
                CompactionMode::Default | CompactionMode::Layered => {
                    self.compact_single(
                        compaction_model,
                        &old_text,
                        previous_summary.as_deref(),
                    ).await
                }
                CompactionMode::Safeguard => {
                    const CHUNK_SIZE: usize = 40_000;
                    let chunks: Vec<&str> = {
                        let mut result = Vec::new();
                        let mut remaining = old_text.as_str();
                        while !remaining.is_empty() {
                            let mut end = CHUNK_SIZE.min(remaining.len());
                            while end < remaining.len() && !remaining.is_char_boundary(end) {
                                end -= 1;
                            }
                            let (chunk, rest) = remaining.split_at(end);
                            result.push(chunk);
                            remaining = rest;
                        }
                        result
                    };
                    let mut combined = String::new();
                    for chunk in chunks {
                        match self.compact_single(compaction_model, chunk, None).await {
                            Some(s) => {
                                combined.push_str(&s);
                                combined.push('\n');
                            }
                            None => return,
                        }
                    }
                    if combined.is_empty() { None } else { Some(combined) }
                }
            }
        };

        let Some(summary) = summary else { return };

        // -- Entity extraction from summary's Entities section --
        // The summary prompt includes an Entities section (kind=value format).
        // Parse it and write as pinned memories — no extra LLM call needed.
        if let Some(ref mem) = self.memory {
            let entities = parse_entities_from_summary(&summary);
            if !entities.is_empty() {
                let scope = format!("agent:{}", self.handle.id);
                crate::agent::context_mgr::write_entity_memories(mem, &scope, entities).await;
                debug!(session = session_key, "entities extracted from compaction summary");
            }
        }

        // -- Key fact extraction: store important facts in long-term memory --
        if extract_facts {
            if let Some(facts) = self.extract_key_facts(compaction_model, &old_text).await {
                if let Some(ref mem) = self.memory {
                    let scope = format!("agent:{}", self.handle.id);
                    let mut guard = mem.lock().await;
                    for fact in facts.lines().filter(|l| !l.trim().is_empty()) {
                        let fact_text = fact.trim_start_matches("- ").trim();
                        if fact_text.len() > 5 {
                            let doc = crate::agent::memory::MemoryDoc {
                                id: format!("cf-{}", uuid::Uuid::new_v4()),
                                scope: scope.clone(),
                                kind: "compaction_fact".to_owned(),
                                text: fact_text.to_owned(),
                                vector: vec![],
                                created_at: 0, // filled by add()
                                accessed_at: 0,
                                access_count: 0,
                                importance: 0.7, // higher than default
                                tier: Default::default(),
                                abstract_text: None,
                                overview_text: None,
                                tags: vec![],
                pinned: false,
                            };
                            if let Err(e) = guard.add(doc).await {
                                tracing::warn!("compaction fact memory add failed: {e:#}");
                            }
                        }
                    }
                    drop(guard);
                    debug!(
                        session = session_key,
                        "key facts extracted to long-term memory"
                    );
                }
            }
        }

        // Replace session history: head + summary + tail.
        // Head: first user-assistant pair (contains [Session started:...])
        // Summary: wrapped with compaction prefix
        // Tail: recent messages kept verbatim
        if let Some(sess) = self.sessions.get_mut(session_key) {
            let summary_msg = Message {
                role: Role::User,
                content: MessageContent::Text(format!(
                    "{COMPACTION_PREFIX}\n\n{summary}"
                )),
            };
            sess.clear();
            // Head: preserved verbatim (first user + first assistant).
            sess.extend(head_msgs);
            // Summary: compacted middle portion.
            sess.push(summary_msg);
            // Tail: recent messages kept verbatim.
            sess.extend(recent_msgs);
        }

        // Reset compaction state after successful compaction.
        self.compaction_state
            .insert(session_key.to_owned(), (std::time::Instant::now(), 0));

        // Persist compacted session to redb (survives restarts).
        if let Some(sess) = self.sessions.get(session_key) {
            if let Err(e) = self.store.db.delete_session(session_key) {
                tracing::warn!("compaction: failed to delete old session: {e:#}");
            }
            for msg in sess.iter() {
                let val = serde_json::to_value(msg).unwrap_or_default();
                if let Err(e) = self.store.db.append_message(session_key, &val) {
                    tracing::warn!("compaction: failed to persist message: {e:#}");
                }
            }
        }

        // Invalidate plugins/skills cache so they are rebuilt (sorted) on
        // the next turn, merging any trailing additions into [1]/[2].
        self.invalidate_plugins_skills_cache();

        let new_tokens: usize = self
            .sessions
            .get(session_key)
            .map(|msgs| msgs.iter().map(msg_tokens).sum())
            .unwrap_or(0);
        info!(
            session = session_key,
            tokens_before = total_tokens,
            tokens_after = new_tokens,
            keep_pairs,
            "auto-compaction complete (layered)"
        );

        // If compaction barely helped (still >80% of threshold), inject a
        // system hint so the agent will relay the /reset suggestion to the user.
        if new_tokens > token_threshold * 4 / 5 {
            let zh = crate::i18n::default_lang() == "zh";
            let hint = if zh {
                "[system] 上下文压缩后仍然较大,响应可能变慢。请告知用户发送 /reset 重置会话以恢复正常速度。"
            } else {
                "[system] Context is still large after compaction and responses may slow down. Please tell the user to send /reset to start a fresh session."
            };
            if let Some(sess) = self.sessions.get_mut(session_key) {
                sess.push(Message {
                    role: Role::System,
                    content: MessageContent::Text(hint.to_owned()),
                });
            }
            warn!(
                session = session_key,
                tokens_after = new_tokens,
                threshold = token_threshold,
                "compaction insufficient, /reset recommended"
            );
        }

        // Persist compaction marker to transcript.
        self.append_transcript(
            session_key,
            "[auto-compaction triggered]",
            &format!("[summary: {summary}]"),
        )
        .await;
    }

    /// Render messages as plain text transcript with two-pass budget allocation.
    ///
    /// Total output is capped at `max_total_tokens` to avoid blowing up the
    /// compact LLM's context window. Recent messages get full detail first;
    /// older messages get progressively reduced detail until budget is exhausted.
    pub(crate) fn msgs_to_text_static(msgs: &[Message], max_total_tokens: usize) -> String {
        // Helper: truncate to N chars (UTF-8 safe).
        fn trunc(s: &str, max: usize) -> String {
            match s.char_indices().nth(max) {
                None => s.to_owned(),
                Some((byte_idx, _)) => {
                    let mut t = s[..byte_idx].to_owned();
                    t.push_str("...[truncated]");
                    t
                }
            }
        }

        // Helper: smart-truncate tool_call args.
        fn compact_args(input: &Value) -> String {
            const BULK_FIELDS: &[&str] = &["content", "old_string", "new_string"];
            const MAX_BULK: usize = 300;
            const MAX_CMD: usize = 500;
            const MAX_TOTAL: usize = 2000;

            if let Some(obj) = input.as_object() {
                let needs = obj.iter().any(|(k, v)| {
                    let limit = if BULK_FIELDS.contains(&k.as_str()) { MAX_BULK }
                                else if k == "command" { MAX_CMD }
                                else { return false; };
                    v.as_str().map(|s| s.char_indices().nth(limit).is_some()).unwrap_or(false)
                });
                if needs {
                    let mut compact = serde_json::Map::new();
                    for (k, v) in obj {
                        let limit = if BULK_FIELDS.contains(&k.as_str()) { Some(MAX_BULK) }
                                    else if k == "command" { Some(MAX_CMD) }
                                    else { None };
                        if let (Some(lim), Some(s)) = (limit, v.as_str()) {
                            compact.insert(k.clone(), Value::String(trunc(s, lim)));
                        } else {
                            compact.insert(k.clone(), v.clone());
                        }
                    }
                    let ser = serde_json::to_string(&Value::Object(compact)).unwrap_or_default();
                    return if ser.char_indices().nth(MAX_TOTAL).is_some() { trunc(&ser, MAX_TOTAL) } else { ser };
                }
            }
            let full = serde_json::to_string(input).unwrap_or_default();
            if full.char_indices().nth(MAX_TOTAL).is_some() { trunc(&full, MAX_TOTAL) } else { full }
        }

        // Render a single message at the given detail level:
        //   2 = full (tool args + results), 1 = medium, 0 = minimal
        let render_msg = |m: &Message, detail: u8| -> String {
            let role = format!("{:?}", m.role).to_lowercase();
            let body = match &m.content {
                MessageContent::Text(t) => {
                    if detail == 0 { trunc(t, 200) } else { t.clone() }
                }
                MessageContent::Parts(parts) => parts
                    .iter()
                    .filter_map(|p| match p {
                        ContentPart::Text { text } => Some(
                            if detail == 0 { trunc(text, 200) } else { text.clone() }
                        ),
                        ContentPart::ToolUse { name, input, .. } => match detail {
                            2 => Some(format!("[tool_call: {name}({})]", compact_args(input))),
                            1 => Some(format!("[tool_call: {name}({})]",
                                trunc(&serde_json::to_string(input).unwrap_or_default(), 100))),
                            _ => Some(format!("[tool_call: {name}]")),
                        },
                        ContentPart::ToolResult { tool_use_id: _, content, .. } => match detail {
                            2 => Some(format!("[tool_result: {}]", trunc(content, 800))),
                            1 => Some(format!("[tool_result: {}]", trunc(content, 150))),
                            _ => None,
                        },
                        ContentPart::Image { .. } => Some("[image]".to_owned()),
                        #[allow(unreachable_patterns)]
                        _ => None,
                    })
                    .collect::<Vec<_>>()
                    .join(" "),
            };
            format!("{role}: {body}")
        };

        // Pass 1: full detail, check if within budget.
        let full: Vec<String> = msgs.iter().map(|m| render_msg(m, 2)).collect();
        let full_tokens: Vec<usize> = full.iter().map(|s| estimate_tokens(s)).collect();
        let total: usize = full_tokens.iter().sum();
        if total <= max_total_tokens {
            return full.join("\n");
        }

        // Pass 2: allocate budget from newest to oldest.
        let n = msgs.len();
        let mut detail_levels = vec![0u8; n];
        let mut budget_used = 0usize;
        for i in (0..n).rev() {
            if budget_used + full_tokens[i] <= max_total_tokens {
                detail_levels[i] = 2;
                budget_used += full_tokens[i];
            } else {
                let m = &msgs[i];
                for &d in &[1u8, 0] {
                    let rendered = render_msg(m, d);
                    let cost = estimate_tokens(&rendered);
                    if budget_used + cost <= max_total_tokens || d == 0 {
                        detail_levels[i] = d;
                        budget_used += cost.min(max_total_tokens.saturating_sub(budget_used));
                        break;
                    }
                }
            }
            if budget_used >= max_total_tokens {
                break;
            }
        }

        // Final render in order.
        let mut result = String::new();
        let mut tokens_used = 0usize;
        for (i, m) in msgs.iter().enumerate() {
            let line = if detail_levels[i] == 2 {
                full[i].clone()
            } else {
                render_msg(m, detail_levels[i])
            };
            let line_tokens = estimate_tokens(&line);
            if tokens_used + line_tokens > max_total_tokens {
                result.push_str("\n...[context truncated]");
                break;
            }
            result.push_str(&line);
            result.push('\n');
            tokens_used += line_tokens;
        }
        result
    }

    /// Compact using existing session messages to reuse KV cache prefix.
    ///
    /// Instead of sending a standalone request, appends a summary instruction
    /// to the current session's messages. The system prompt + tools + history
    /// are already cached in the LLM slot, so only the final summary prompt
    /// needs to be computed.
    pub(crate) async fn compact_with_kv_cache(
        &mut self,
        session_key: &str,
        model: &str,
        _old_text: &str,
        previous_summary: Option<&str>,
    ) -> Option<String> {
        let system_prompt = self.cached_system_prompt.clone()?;

        // Clone session messages — we'll append the summary instruction
        // to the API copy only, not to the stored session.
        let mut messages = self
            .sessions
            .get(session_key)
            .cloned()
            .unwrap_or_default();

        if messages.is_empty() {
            return None;
        }

        // Build the summary instruction
        let template = Self::summary_template();
        let instruction = if let Some(prev) = previous_summary {
            format!(
                "Ignore all previous instructions. You are now a summarization agent. \
                 Do NOT call any tools. Do NOT answer questions. Output ONLY a structured summary.\n\n\
                 Update the previous compaction summary with the new conversation turns above.\n\n\
                 PREVIOUS SUMMARY:\n{prev}\n\n\
                 PRESERVE existing info, ADD new actions, update Active Task.\n\n{template}"
            )
        } else {
            format!(
                "Ignore all previous instructions. You are now a summarization agent. \
                 Do NOT call any tools. Do NOT answer questions. Output ONLY a structured summary \
                 of the entire conversation above.\n\n{template}"
            )
        };

        // Append summary instruction as the last user message
        messages.push(Message {
            role: Role::User,
            content: MessageContent::Text(instruction),
        });

        // Reuse the cached tools from the last run_turn for exact prefix match.
        // The summary instruction already says "Do NOT call any tools".
        // If the LLM still tries, we ignore the tool call in the stream handler.
        let tools = self.cached_tools.clone();

        let req = LlmRequest {
            model: model.to_owned(),
            messages,
            tools,
            system: Some(system_prompt),
            max_tokens: Some(4096),
            temperature: None,
            frequency_penalty: None,
            thinking_budget: None, kv_cache_mode: 0, session_key: None,
        };

        let providers = Arc::clone(&self.providers);
        let mut stream = match self.failover.call(req, &providers).await {
            Ok(s) => s,
            Err(e) => {
                warn!("KV cache compact LLM call failed: {e:#}");
                return None;
            }
        };

        let mut summary = String::new();
        while let Some(event) = stream.next().await {
            match event {
                Ok(StreamEvent::TextDelta(d)) => summary.push_str(&d),
                Ok(StreamEvent::ReasoningDelta(_)) => {}
                Ok(StreamEvent::Done { .. }) | Ok(StreamEvent::Error(_)) => break,
                Ok(StreamEvent::ToolCall { .. }) => {
                    // LLM tried to call a tool despite empty tools list — skip
                    warn!("compact_with_kv_cache: unexpected tool call, ignoring");
                }
                Err(e) => {
                    warn!("KV cache compact stream error: {e:#}");
                    return None;
                }
            }
        }

        if summary.is_empty() {
            None
        } else {
            info!("compact_with_kv_cache: summary generated ({} chars)", summary.len());
            Some(summary)
        }
    }

    /// Shared summary template used by both standalone and KV cache modes.
    fn summary_template() -> &'static str {
        "Use this exact structure:\n\n\
         ## Active Task\n\
         [THE MOST IMPORTANT FIELD. Copy the user's most recent unfulfilled request \
         verbatim. If no outstanding task, write \"None.\"]\n\n\
         ## Goal\n[Overall goal]\n\n\
         ## Completed\n[Numbered list: N. ACTION target - outcome]\n\n\
         ## Active State\n[Modified files, test status, running processes, branch]\n\n\
         ## In Progress\n[Work underway when compaction fired]\n\n\
         ## Key Data\n[Exact values verbatim: file paths, URLs, IDs, phone numbers]\n\n\
         ## Decisions\n[Technical decisions and WHY]\n\n\
         ## Pending\n[Blocked items or awaiting user]\n\n\
         ## Resolved Questions\n[Already answered — include the answer]\n\n\
         ## Files\n[Files read/modified/created]\n\n\
         ## Entities\n[kind=value per line. Kinds: name, phone, id_card, email, birthday, \
         age, zodiac, address, relationship, preference. If none: (none)]\n\n\
         CRITICAL: Copy ALL values character-for-character. Be CONCRETE."
    }

    /// Call the LLM once with a summarization prompt and return the text.
    ///
    /// Supports iterative updates: if `previous_summary` is provided (from a
    /// prior compaction), the LLM updates it with new turns instead of
    /// starting from scratch. This preserves information across multiple
    /// compactions.
    pub(crate) async fn compact_single(
        &mut self,
        model: &str,
        history: &str,
        previous_summary: Option<&str>,
    ) -> Option<String> {
        let preamble = "You are a summarization agent creating a context checkpoint. \
            Your output will be injected as reference for a DIFFERENT assistant that \
            continues the conversation. Do NOT respond to any questions or requests \
            in the conversation - only output the structured summary. \
            Do NOT include any preamble, greeting, or prefix.";

        let template = Self::summary_template();

        let prompt = if let Some(prev) = previous_summary {
            format!(
                "{preamble}\n\n\
                 You are updating a context compaction summary. A previous compaction \
                 produced the summary below. New conversation turns have occurred \
                 since then and need to be incorporated.\n\n\
                 PREVIOUS SUMMARY:\n{prev}\n\n\
                 NEW TURNS TO INCORPORATE:\n{history}\n\n\
                 Update the summary using this exact structure. PRESERVE all existing \
                 information that is still relevant. ADD new completed actions \
                 (continue numbering). Move items from \"In Progress\" to \"Completed\" \
                 when done. Update \"Active State\" to reflect current state. \
                 Remove information only if clearly obsolete. \
                 CRITICAL: Update \"## Active Task\" to the user's most recent \
                 unfulfilled request.\n\n{template}"
            )
        } else {
            format!(
                "{preamble}\n\n\
                 Create a structured handoff summary. The next assistant should \
                 understand what happened without re-reading the original turns.\n\n\
                 TURNS TO SUMMARIZE:\n{history}\n\n\
                 Use this exact structure:\n\n{template}"
            )
        };

        let req = LlmRequest {
            model: model.to_owned(),
            messages: vec![Message {
                role: Role::User,
                content: MessageContent::Text(prompt),
            }],
            tools: vec![], // no tools — compact must only produce text
            system: None, // preamble is in the user message
            max_tokens: Some(4096),
            temperature: None,
            frequency_penalty: None,
            thinking_budget: None, kv_cache_mode: 0, session_key: None,
        };

        let providers = Arc::clone(&self.providers);
        let mut stream = match self.failover.call(req, &providers).await {
            Ok(s) => s,
            Err(e) => {
                warn!("compaction LLM call failed: {e:#}");
                return None;
            }
        };

        let mut summary = String::new();
        while let Some(event) = stream.next().await {
            match event {
                Ok(StreamEvent::TextDelta(d)) => summary.push_str(&d),
                Ok(StreamEvent::ReasoningDelta(_)) => {} // ignore reasoning in compaction
                Ok(StreamEvent::Done { .. }) | Ok(StreamEvent::Error(_)) => break,
                Ok(StreamEvent::ToolCall { .. }) => {} // unexpected in summarization
                Err(e) => {
                    warn!("compaction stream error: {e:#}");
                    return None;
                }
            }
        }

        if summary.is_empty() {
            None
        } else {
            Some(summary)
        }
    }

    /// Extract key facts (names, IDs, decisions, file paths) from a
    /// conversation transcript for long-term memory storage.
    pub(crate) async fn extract_key_facts(&mut self, model: &str, history: &str) -> Option<String> {
        // Limit input to avoid huge summarisation calls.
        let input = if history.len() > 60_000 {
            let mut end = 60_000;
            while end < history.len() && !history.is_char_boundary(end) {
                end += 1;
            }
            &history[..end]
        } else {
            history
        };
        let req = LlmRequest {
            model: model.to_owned(),
            messages: vec![Message {
                role: Role::User,
                content: MessageContent::Text(format!(
                    "Extract the key facts from this conversation that should be remembered \
                     long-term. Output ONLY a bullet list (one fact per line, prefixed with \
                     '- '). Include: names, user IDs, chat IDs, phone numbers, account numbers, \
                     any numeric sequences that were looked up or confirmed, important decisions, \
                     file paths, URLs, preferences, and action items. \
                     IMPORTANT: copy numeric values (phone numbers, IDs) character-for-character — \
                     never truncate or paraphrase them. Be concise. Skip ephemeral chit-chat.\n\n{input}"
                )),
            }],
            tools: vec![],
            system: Some(
                "You extract key facts from conversations. Output only a bullet list.".to_owned(),
            ),
            max_tokens: Some(1024),
            temperature: None,
            frequency_penalty: None,
            thinking_budget: None, kv_cache_mode: 0, session_key: None,
        };

        let providers = Arc::clone(&self.providers);
        let mut stream = match self.failover.call(req, &providers).await {
            Ok(s) => s,
            Err(e) => {
                warn!("key fact extraction failed: {e:#}");
                return None;
            }
        };

        let mut result = String::new();
        while let Some(event) = stream.next().await {
            match event {
                Ok(StreamEvent::TextDelta(d)) => result.push_str(&d),
                Ok(StreamEvent::Done { .. }) | Ok(StreamEvent::Error(_)) => break,
                _ => {}
            }
        }

        if result.is_empty() {
            None
        } else {
            Some(result)
        }
    }

    // -----------------------------------------------------------------------
    // JSONL transcript (AGENTS.md $20 step 11)
    // -----------------------------------------------------------------------

    /// Append user + assistant messages to `~/.rsclaw/transcripts/<key>.jsonl`.
    pub(crate) async fn append_transcript(&self, session_key: &str, user_text: &str, assistant_text: &str) {
        let transcripts_dir = dirs_next::home_dir()
            .unwrap_or_default()
            .join(".rsclaw/transcripts");

        // Sanitize session key for use as a filename.
        let safe_key: String = session_key
            .chars()
            .map(|c| {
                if c.is_alphanumeric() || c == '-' {
                    c
                } else {
                    '_'
                }
            })
            .collect();
        let path = transcripts_dir.join(format!("{safe_key}.jsonl"));

        if let Err(e) = tokio::fs::create_dir_all(&transcripts_dir).await {
            warn!("transcript mkdir: {e:#}");
            return;
        }

        let ts = Utc::now().to_rfc3339();
        let mut lines = String::new();
        for (role, content) in [("user", user_text), ("assistant", assistant_text)] {
            let entry = json!({
                "role": role,
                "content": content,
                "session": session_key,
                "agent": self.handle.id,
                "ts": ts,
            });
            if let Ok(s) = serde_json::to_string(&entry) {
                lines.push_str(&s);
                lines.push('\n');
            }
        }

        match tokio::fs::OpenOptions::new()
            .create(true)
            .append(true)
            .open(&path)
            .await
        {
            Ok(mut f) => {
                if let Err(e) = f.write_all(lines.as_bytes()).await {
                    warn!("transcript write: {e:#}");
                }
            }
            Err(e) => warn!("transcript open: {e:#}"),
        }
    }
}

/// Parse `## Entities` section from a compaction summary.
///
/// Expected format (one per line):
/// ```text
/// ## Entities
/// name=小王
/// phone=18674030927
/// birthday=1995年3月15日
/// ```
fn parse_entities_from_summary(summary: &str) -> Vec<crate::agent::context_mgr::KeyEntity> {
    let mut entities = Vec::new();

    // Find the Entities section
    let section_start = summary.find("## Entities");
    let Some(start) = section_start else {
        return entities;
    };
    let content = &summary[start..];
    // Take lines until next ## section or end of string
    let section_end = content[3..].find("\n## ").map(|i| i + 3).unwrap_or(content.len());
    let section = &content[..section_end];

    let kind_to_label: &[(&str, &str, &'static str)] = &[
        ("name", "用户姓名", "name"),
        ("phone", "用户手机号", "phone_number"),
        ("id_card", "用户身份证", "id_card"),
        ("email", "用户邮箱", "email"),
        ("birthday", "用户生日", "birthday"),
        ("age", "用户年龄", "age"),
        ("zodiac", "用户星座", "zodiac"),
        ("lucky_number", "用户幸运数字", "lucky_number"),
        ("address", "用户地址", "address"),
        ("relationship", "用户关系", "relationship"),
        ("preference", "用户偏好", "preference"),
    ];

    for line in section.lines().skip(1) {
        let line = line.trim();
        if line.is_empty() || line == "(none)" || line.starts_with("##") {
            continue;
        }
        // Parse kind=value
        if let Some((kind, value)) = line.split_once('=') {
            let kind = kind.trim().to_lowercase();
            let value = value.trim();
            if value.is_empty() {
                continue;
            }
            if let Some((_, label, static_kind)) = kind_to_label.iter().find(|(k, _, _)| *k == kind) {
                entities.push(crate::agent::context_mgr::KeyEntity {
                    kind: static_kind,
                    value: value.to_owned(),
                    memory_text: format!("{label}: {value}"),
                });
            }
        }
    }

    entities
}