Skip to main content

forge_guardrails/tool_output/
config.rs

1use std::fmt;
2use std::str::FromStr;
3
4/// Default maximum tool-output bytes retained before safe capping.
5pub const DEFAULT_MAX_OUTPUT_BYTES: usize = 64 * 1024;
6/// Default maximum dedup records kept per session.
7pub const DEFAULT_MAX_DEDUP_ENTRIES_PER_SESSION: usize = 128;
8/// Default maximum sessions kept in dedup memory.
9pub const DEFAULT_MAX_DEDUP_SESSIONS: usize = 64;
10
11/// Header prefixed to LZW dictionary-compressed tool outputs.
12pub const LZW_DICTIONARY_HEADER: &str = "[Forge LZW Dictionary]";
13/// Header prefixed to RePair dictionary-compressed tool outputs.
14pub const REPAIR_DICTIONARY_HEADER: &str = "[Forge RePair Dictionary]";
15/// Maximum size of a compression dictionary.
16pub const DICTIONARY_MAX_DICT_SIZE: usize = 20;
17/// Maximum input bytes allowed for dictionary compression.
18pub const DICTIONARY_MAX_INPUT_BYTES: usize = 50_000;
19/// Minimum occurrences of a pattern required to be added to the dictionary.
20pub const DICTIONARY_MIN_OCCURRENCES: usize = 3;
21/// Minimum net savings in bytes required to accept dictionary compression.
22pub const DICTIONARY_MIN_NET_SAVINGS_BYTES: usize = 32;
23/// Minimum per-entry savings in bytes for a dictionary entry to be committed.
24pub const DICTIONARY_MIN_ENTRY_SAVINGS_BYTES: usize = 16;
25/// Minimum net savings in percentage required to accept dictionary compression.
26pub const DICTIONARY_MIN_NET_SAVINGS_PERCENT: usize = 3;
27
28/// Returns true if the output has been compressed with an LZW or RePair dictionary.
29pub fn is_dictionary_compressed_output(output: &str) -> bool {
30    output.starts_with(LZW_DICTIONARY_HEADER) || output.starts_with(REPAIR_DICTIONARY_HEADER)
31}
32
33/// Returns true if the dictionary compression yields meaningful size savings.
34pub fn dictionary_has_meaningful_savings(original_len: usize, savings: usize) -> bool {
35    savings >= DICTIONARY_MIN_NET_SAVINGS_BYTES
36        && savings.saturating_mul(100) / original_len.max(1) >= DICTIONARY_MIN_NET_SAVINGS_PERCENT
37}
38
39/// Default compression level for tool outputs.
40#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
41pub enum ToolOutputCompressionMode {
42    /// No compression or mutation.
43    Disabled,
44    /// Safety-only transforms: redaction, ANSI stripping, binary suppression, capping.
45    Safe,
46    /// Safe transforms plus conservative structural and tool-family compaction.
47    #[default]
48    Standard,
49    /// Standard transforms plus explicitly lossy/high-aggression transforms.
50    Aggressive,
51}
52
53impl ToolOutputCompressionMode {
54    /// Return the stable lowercase mode name.
55    pub fn as_str(self) -> &'static str {
56        match self {
57            Self::Disabled => "disabled",
58            Self::Safe => "safe",
59            Self::Standard => "standard",
60            Self::Aggressive => "aggressive",
61        }
62    }
63
64    /// Return true if this mode can change tool output.
65    pub fn enabled(self) -> bool {
66        self != Self::Disabled
67    }
68}
69
70impl fmt::Display for ToolOutputCompressionMode {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72        f.write_str(self.as_str())
73    }
74}
75
76impl FromStr for ToolOutputCompressionMode {
77    type Err = String;
78
79    fn from_str(value: &str) -> Result<Self, Self::Err> {
80        match value.trim().to_ascii_lowercase().as_str() {
81            "disabled" | "off" | "false" | "none" => Ok(Self::Disabled),
82            "safe" => Ok(Self::Safe),
83            "standard" | "on" | "true" => Ok(Self::Standard),
84            "aggressive" => Ok(Self::Aggressive),
85            other => Err(format!(
86                "tool output compression must be disabled, safe, standard, or aggressive, got '{other}'"
87            )),
88        }
89    }
90}
91
92/// Dictionary algorithm used by aggressive tool-output compression.
93#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
94pub enum ToolOutputCompressionMethod {
95    /// LZW-style repeated substring dictionary compression.
96    #[default]
97    Lzw,
98    /// RePair-style repeated adjacent token-pair grammar compression.
99    Repair,
100    /// Run bounded dictionary methods and keep the smallest valid result.
101    Auto,
102}
103
104impl ToolOutputCompressionMethod {
105    /// Return the stable lowercase method name.
106    pub fn as_str(self) -> &'static str {
107        match self {
108            Self::Lzw => "lzw",
109            Self::Repair => "repair",
110            Self::Auto => "auto",
111        }
112    }
113}
114
115impl fmt::Display for ToolOutputCompressionMethod {
116    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117        f.write_str(self.as_str())
118    }
119}
120
121impl FromStr for ToolOutputCompressionMethod {
122    type Err = String;
123
124    fn from_str(value: &str) -> Result<Self, Self::Err> {
125        match value.trim().to_ascii_lowercase().as_str() {
126            "lzw" => Ok(Self::Lzw),
127            "repair" | "re-pair" => Ok(Self::Repair),
128            "auto" => Ok(Self::Auto),
129            other => Err(format!(
130                "tool output compression method must be lzw, repair, or auto, got '{other}'"
131            )),
132        }
133    }
134}
135
136/// Configuration for one tool-output compression pass.
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct ToolOutputCompressionConfig {
139    /// Compression mode.
140    pub mode: ToolOutputCompressionMode,
141    /// Dictionary method used by aggressive compression.
142    pub method: ToolOutputCompressionMethod,
143    /// Whether secret-looking values are redacted before other transforms.
144    pub redact_secrets: bool,
145    /// Whether repeated compressed outputs are replaced with bounded references.
146    pub enable_dedup: bool,
147    /// Whether per-call compressed output is memoized for byte-stable resends.
148    pub enable_memo: bool,
149    /// Optional client/session key used for dedup.
150    pub session_id: Option<String>,
151    /// Maximum bytes retained before safe capping.
152    pub max_output_bytes: usize,
153    /// Maximum dedup records per session.
154    pub max_dedup_entries_per_session: usize,
155    /// Maximum dedup sessions retained.
156    pub max_dedup_sessions: usize,
157}
158
159impl Default for ToolOutputCompressionConfig {
160    fn default() -> Self {
161        Self {
162            mode: ToolOutputCompressionMode::Standard,
163            method: ToolOutputCompressionMethod::Lzw,
164            redact_secrets: true,
165            enable_dedup: true,
166            enable_memo: true,
167            session_id: None,
168            max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
169            max_dedup_entries_per_session: DEFAULT_MAX_DEDUP_ENTRIES_PER_SESSION,
170            max_dedup_sessions: DEFAULT_MAX_DEDUP_SESSIONS,
171        }
172    }
173}
174
175impl ToolOutputCompressionConfig {
176    /// Return a disabled compression configuration.
177    pub fn disabled() -> Self {
178        Self {
179            mode: ToolOutputCompressionMode::Disabled,
180            ..Self::default()
181        }
182    }
183
184    /// Build a configuration from a mode with safe defaults.
185    pub fn from_mode(mode: ToolOutputCompressionMode) -> Self {
186        Self {
187            mode,
188            ..Self::default()
189        }
190    }
191
192    /// Return true if this configuration can change tool output.
193    pub fn enabled(&self) -> bool {
194        self.mode.enabled()
195    }
196}
197
198/// Internal metrics and output for a compression pass.
199#[derive(Debug, Clone, PartialEq)]
200pub struct ToolOutputCompressionResult {
201    /// Final output to send to the upstream model.
202    pub output: String,
203    /// Heuristic token estimate before compression.
204    pub before_tokens: i64,
205    /// Heuristic token estimate after compression.
206    pub after_tokens: i64,
207    /// Estimated tokens saved.
208    pub saved_tokens: i64,
209    /// Estimated percentage saved.
210    pub saved_pct: i64,
211    /// Canonical tool family: bash, read, grep, glob, or generic.
212    pub canonical_tool: String,
213    /// Content or command family used by filters.
214    pub family: String,
215    /// Compression mode used.
216    pub mode: ToolOutputCompressionMode,
217    /// Whether a secret redaction changed output.
218    pub redacted: bool,
219    /// Whether binary or oversized output was suppressed/capped.
220    pub capped: bool,
221    /// Whether output was replaced by a dedup reference.
222    pub deduped: bool,
223    /// Whether memoized compressed bytes were reused unchanged.
224    pub memo_reused: bool,
225    /// Whether an existing memo entry was invalidated by changed input or config.
226    pub memo_changed: bool,
227    /// Names of transforms that changed output.
228    pub strategies: Vec<String>,
229}