Skip to main content

sparrow/
redaction.rs

1use crate::event::Event;
2use std::collections::HashSet;
3
4// ─── Redaction filter ───────────────────────────────────────────────────────────
5
6/// Filters secrets from events, transcripts, logs, and model context.
7/// §5: "Secrets never enter transcripts, logs, or model context unless explicitly a tool argument."
8pub struct RedactionFilter {
9    /// Known secrets to redact (loaded from auth store)
10    secrets: HashSet<String>,
11    /// Patterns to redact (regex or prefix patterns)
12    patterns: Vec<String>,
13    /// Replacement string
14    replacement: String,
15}
16
17impl RedactionFilter {
18    pub fn new() -> Self {
19        Self {
20            secrets: HashSet::new(),
21            patterns: vec![
22                // Common secret patterns
23                "sk-".into(),
24                "sk-ant-".into(),
25                "xai-".into(),
26                "ghp_".into(),
27                "gho_".into(),
28                "ghu_".into(),
29                "ghs_".into(),
30                "ghr_".into(),
31                "hf_".into(),
32                "nvapi-".into(),
33                "gsk_".into(),
34                "org-".into(),
35                "proj-".into(),
36            ],
37            replacement: "[REDACTED]".into(),
38        }
39    }
40
41    /// Load secrets from the auth store
42    pub fn load_secrets(&mut self, secrets: Vec<String>) {
43        for s in secrets {
44            if !s.is_empty() {
45                self.secrets.insert(s);
46            }
47        }
48    }
49
50    /// Redact a string, replacing all known secrets with the replacement
51    pub fn redact_str(&self, text: &str) -> String {
52        let mut result = text.to_string();
53
54        // Exact match redaction
55        for secret in &self.secrets {
56            if !secret.is_empty() {
57                result = result.replace(secret.as_str(), &self.replacement);
58            }
59        }
60
61        // Pattern-based redaction (secrets starting with known prefixes)
62        for pattern in &self.patterns {
63            let lower = result.to_lowercase();
64            if let Some(pos) = lower.find(pattern) {
65                // Find the end of the secret (until whitespace or end)
66                let end = result[pos..]
67                    .find(|c: char| c.is_whitespace() || c == '"' || c == '\'')
68                    .map(|e| pos + e)
69                    .unwrap_or(result.len());
70                result.replace_range(pos..end, &self.replacement);
71            }
72        }
73
74        result
75    }
76
77    /// Redact an Event, returning a new Event with secrets removed
78    pub fn redact_event(&self, event: &Event) -> Event {
79        let mut e = event.clone();
80        match &mut e {
81            Event::ThinkingDelta { text, .. } => {
82                *text = self.redact_str(text);
83            }
84            Event::ReasoningDelta { text, .. } => {
85                *text = self.redact_str(text);
86            }
87            Event::Message { text, .. } => {
88                *text = self.redact_str(text);
89            }
90            Event::ApprovalRequested { summary, .. } => {
91                *summary = self.redact_str(summary);
92            }
93            Event::ToolOutput { blocks, .. } => {
94                for block in blocks {
95                    match block {
96                        crate::event::Block::Text(t) => {
97                            *t = self.redact_str(t);
98                        }
99                        _ => {}
100                    }
101                }
102            }
103            Event::Error { message, .. } => {
104                *message = self.redact_str(message);
105            }
106            _ => {}
107        }
108        e
109    }
110
111    /// Check if a string contains any secrets
112    pub fn contains_secret(&self, text: &str) -> bool {
113        for secret in &self.secrets {
114            if !secret.is_empty() && text.contains(secret.as_str()) {
115                return true;
116            }
117        }
118        for pattern in &self.patterns {
119            if text.to_lowercase().contains(pattern) {
120                return true;
121            }
122        }
123        false
124    }
125}
126
127impl Default for RedactionFilter {
128    fn default() -> Self {
129        Self::new()
130    }
131}
132
133// ─── Context Manager ────────────────────────────────────────────────────────────
134
135use crate::memory::RepoMap;
136use crate::provider::Msg;
137
138/// Manages context window by summarizing/compacting when approaching limits.
139/// §3.7: "The Context Manager enforces the model's window via summarization/compaction
140/// and a repo-map instead of dumping files."
141pub struct ContextManager {
142    /// Maximum context tokens before compaction
143    max_tokens: u64,
144    /// Approximate tokens per character (conservative)
145    tokens_per_char: f64,
146}
147
148impl ContextManager {
149    pub fn new(max_tokens: u64) -> Self {
150        Self {
151            max_tokens,
152            tokens_per_char: 0.25, // ~4 chars per token
153        }
154    }
155
156    /// Estimate token count for a string
157    pub fn estimate_tokens(&self, text: &str) -> u64 {
158        (text.len() as f64 * self.tokens_per_char) as u64
159    }
160
161    /// Check if we're approaching the context limit
162    pub fn needs_compaction(&self, total_chars: usize, reserve_tokens: u64) -> bool {
163        let used = self.estimate_tokens(&"x".repeat(total_chars));
164        used + reserve_tokens > self.max_tokens
165    }
166
167    /// Compact messages: keep system + last N messages, summarize earlier ones
168    pub fn compact_messages(
169        &self,
170        messages: &[Msg],
171        system_prompt_len: usize,
172        keep_last: usize,
173    ) -> Vec<Msg> {
174        let system_tokens = self.estimate_tokens(&"x".repeat(system_prompt_len));
175        let available = self.max_tokens.saturating_sub(system_tokens);
176
177        if messages.len() <= keep_last {
178            return messages.to_vec();
179        }
180
181        let mut compacted = Vec::new();
182        let mut used = 0u64;
183
184        // Always keep the first user message
185        if let Some(first) = messages.first() {
186            compacted.push(first.clone());
187            used += self.estimate_tokens(&serde_json::to_string(first).unwrap_or_default());
188        }
189
190        // Summarize middle section: extract real key topics
191        let middle: Vec<&Msg> = messages[1..messages.len() - keep_last].iter().collect();
192        if !middle.is_empty() {
193            // Extract key topics from actual message content
194            let mut tools_used = std::collections::HashSet::new();
195            let mut files_mentioned = std::collections::HashSet::new();
196            let mut error_count = 0u32;
197
198            for msg in &middle {
199                for block in &msg.content {
200                    if let crate::provider::ContentBlock::Text { text } = block {
201                        // Extract tool names
202                        for tool in &[
203                            "fs_read", "fs_write", "edit", "exec", "git", "search", "test",
204                        ] {
205                            if text.contains(tool) {
206                                tools_used.insert(*tool);
207                            }
208                        }
209                        // Extract file mentions
210                        for word in text.split_whitespace() {
211                            if word.ends_with(".rs")
212                                || word.ends_with(".toml")
213                                || word.ends_with(".md")
214                                || word.ends_with(".py")
215                                || word.ends_with(".js")
216                                || word.ends_with(".ts")
217                            {
218                                files_mentioned.insert(word.to_string());
219                            }
220                        }
221                        if text.contains("error")
222                            || text.contains("Error")
223                            || text.contains("FAILED")
224                        {
225                            error_count += 1;
226                        }
227                    }
228                }
229            }
230
231            let mut topics = Vec::new();
232            if !tools_used.is_empty() {
233                let mut tools: Vec<_> = tools_used.into_iter().collect();
234                tools.sort();
235                topics.push(format!("tools: {}", tools.join(", ")));
236            }
237            if !files_mentioned.is_empty() {
238                let mut files: Vec<_> = files_mentioned.into_iter().collect();
239                files.sort();
240                topics.push(format!("files: {}", files.join(", ")));
241            }
242            if error_count > 0 {
243                topics.push(format!("errors encountered: {}", error_count));
244            }
245
246            let summary_str = if topics.is_empty() {
247                format!("[{} messages summarized]", middle.len())
248            } else {
249                format!(
250                    "[{} messages summarized. {}]",
251                    middle.len(),
252                    topics.join("; ")
253                )
254            };
255
256            compacted.push(Msg {
257                role: "user".into(),
258                content: vec![crate::provider::ContentBlock::Text {
259                    text: summary_str.clone(),
260                }],
261            });
262            used += self.estimate_tokens(&summary_str);
263        }
264
265        // Keep last N messages
266        for msg in messages.iter().rev().take(keep_last).rev() {
267            let tokens = self.estimate_tokens(&serde_json::to_string(msg).unwrap_or_default());
268            if used + tokens > available {
269                break;
270            }
271            compacted.push(msg.clone());
272            used += tokens;
273        }
274
275        compacted
276    }
277
278    /// Build a compact repo map representation for context
279    pub fn repo_map_summary(&self, map: &RepoMap, max_files: usize) -> String {
280        let mut lines = vec![format!(
281            "Workspace: {} files, {} symbols",
282            map.files.len(),
283            map.symbols.len()
284        )];
285
286        // Show directory tree (top-level files/dirs)
287        let mut dirs: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
288        for f in &map.files {
289            if let Some(first) = f.path.split('/').next() {
290                dirs.insert(first.to_string());
291            }
292        }
293
294        lines.push("Top-level:".into());
295        for d in dirs.iter().take(max_files) {
296            lines.push(format!("  {}", d));
297        }
298
299        // Show key symbols
300        if !map.symbols.is_empty() {
301            lines.push("Key symbols:".into());
302            for s in map.symbols.iter().take(20) {
303                lines.push(format!("  {} ({}) in {}", s.name, s.kind, s.file));
304            }
305        }
306
307        lines.join("\n")
308    }
309}
310
311impl Default for ContextManager {
312    fn default() -> Self {
313        Self::new(128_000)
314    }
315}
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320
321    #[test]
322    fn test_redact_api_key() {
323        let mut filter = RedactionFilter::new();
324        filter.load_secrets(vec!["sk-ant-api03-abcdef123456".into()]);
325
326        let input = "Using key sk-ant-api03-abcdef123456 for auth";
327        let redacted = filter.redact_str(input);
328        assert!(!redacted.contains("sk-ant-api03-abcdef123456"));
329        assert!(redacted.contains("[REDACTED]"));
330    }
331
332    #[test]
333    fn test_redact_event() {
334        let mut filter = RedactionFilter::new();
335        filter.load_secrets(vec!["mysecret123".into()]);
336
337        let event = Event::ThinkingDelta {
338            run: crate::event::RunId("test".into()),
339            text: "The secret is mysecret123".into(),
340        };
341        let redacted = filter.redact_event(&event);
342        match redacted {
343            Event::ThinkingDelta { text, .. } => {
344                assert!(!text.contains("mysecret123"));
345                assert!(text.contains("[REDACTED]"));
346            }
347            _ => panic!("wrong event type"),
348        }
349    }
350
351    #[test]
352    fn test_context_compaction() {
353        let cm = ContextManager::new(1000);
354        let messages = vec![
355            Msg {
356                role: "user".into(),
357                content: vec![],
358            },
359            Msg {
360                role: "assistant".into(),
361                content: vec![],
362            },
363            Msg {
364                role: "user".into(),
365                content: vec![],
366            },
367            Msg {
368                role: "assistant".into(),
369                content: vec![],
370            },
371            Msg {
372                role: "user".into(),
373                content: vec![],
374            },
375        ];
376        let compacted = cm.compact_messages(&messages, 100, 2);
377        // Should have: first msg + summary + last 2
378        assert!(compacted.len() <= 4);
379    }
380}