m2m/codec/
tables.rs

1//! Abbreviation tables for token compression.
2//!
3//! These tables map common JSON keys, roles, and values to shorter forms
4//! **optimized for LLM tokenizer efficiency** (not just byte reduction).
5//!
6//! # Token-Optimized Design
7//!
8//! All abbreviations in this module have been empirically validated to reduce
9//! token count using tiktoken cl100k_base encoding. Abbreviations that only
10//! save bytes but not tokens have been removed.
11//!
12//! Run `cargo run --bin token_analysis` to verify token savings.
13
14use phf::phf_map;
15
16/// Key abbreviations (JSON keys -> short form)
17///
18/// **ONLY includes abbreviations that save tokens** (validated via token_analysis)
19///
20/// Removed (cost same or more tokens):
21/// - user, description, type, text (WORSE - cost MORE tokens)
22/// - messages, message, role, usage, name, code, functions, parameters,
23///   object, function, properties, index, required, id, n, seed, tools, created (NO SAVE)
24pub static KEY_ABBREV: phf::Map<&'static str, &'static str> = phf_map! {
25    // Token-saving request keys (verified +1 to +2 tokens saved)
26    "content" => "c",           // 3->2 tokens (+1)
27    "model" => "M",             // 3->2 tokens (+1)
28    "temperature" => "T",       // 3->2 tokens (+1)
29    "max_tokens" => "x",        // 4->2 tokens (+2)
30    "stream" => "s",            // 3->2 tokens (+1)
31    "stop" => "S",              // 3->2 tokens (+1)
32    "top_p" => "p",             // 4->3 tokens (+1)
33    "frequency_penalty" => "f", // 4->2 tokens (+2)
34    "presence_penalty" => "P",  // 4->2 tokens (+2)
35    "function_call" => "fc",    // 4->3 tokens (+1)
36    "arguments" => "a",         // 3->2 tokens (+1)
37    "tool_calls" => "tc",       // 4->3 tokens (+1)
38    "tool_choice" => "tx",      // 4->3 tokens (+1)
39    "response_format" => "rf",  // 4->3 tokens (+1)
40    "logit_bias" => "lb",       // 4->3 tokens (+1)
41    "logprobs" => "lp",         // 4->3 tokens (+1)
42    "top_logprobs" => "tlp",    // 6->4 tokens (+2)
43    // Token-saving response keys
44    "choices" => "C",           // 3->2 tokens (+1)
45    "finish_reason" => "fr",    // 4->3 tokens (+1)
46    "prompt_tokens" => "pt",    // 4->3 tokens (+1)
47    "completion_tokens" => "ct",// 4->3 tokens (+1)
48    "total_tokens" => "tt",     // 4->3 tokens (+1)
49    "delta" => "D",             // 3->2 tokens (+1)
50    "system_fingerprint" => "sf", // 5->3 tokens (+2)
51    "error" => "E",             // 3->2 tokens (+1)
52};
53
54/// Reverse key mapping (short form -> full key)
55pub static KEY_EXPAND: phf::Map<&'static str, &'static str> = phf_map! {
56    "c" => "content",
57    "M" => "model",
58    "T" => "temperature",
59    "x" => "max_tokens",
60    "s" => "stream",
61    "S" => "stop",
62    "p" => "top_p",
63    "f" => "frequency_penalty",
64    "P" => "presence_penalty",
65    "fc" => "function_call",
66    "a" => "arguments",
67    "tc" => "tool_calls",
68    "tx" => "tool_choice",
69    "rf" => "response_format",
70    "lb" => "logit_bias",
71    "lp" => "logprobs",
72    "tlp" => "top_logprobs",
73    "C" => "choices",
74    "fr" => "finish_reason",
75    "pt" => "prompt_tokens",
76    "ct" => "completion_tokens",
77    "tt" => "total_tokens",
78    "D" => "delta",
79    "sf" => "system_fingerprint",
80    "E" => "error",
81};
82
83/// Role abbreviations
84///
85/// All role abbreviations save tokens (verified +1 each).
86/// Note: "user" is NOT abbreviated (analysis shows it costs MORE tokens as "u")
87pub static ROLE_ABBREV: phf::Map<&'static str, &'static str> = phf_map! {
88    "system" => "S",     // 3->2 tokens (+1)
89    "assistant" => "A",  // 3->2 tokens (+1)
90    "function" => "F",   // 3->2 tokens (+1)
91    "tool" => "T",       // 3->2 tokens (+1)
92};
93
94/// Reverse role mapping
95pub static ROLE_EXPAND: phf::Map<&'static str, &'static str> = phf_map! {
96    "S" => "system",
97    "A" => "assistant",
98    "F" => "function",
99    "T" => "tool",
100};
101
102/// Model name abbreviations
103///
104/// Only models with publicly accessible tokenizers are included.
105/// All abbreviations save 2-9 tokens (verified via token_analysis).
106///
107/// Removed (no token savings): o1, o3, o1-mini, o3-mini
108pub static MODEL_ABBREV: phf::Map<&'static str, &'static str> = phf_map! {
109    // OpenAI (tokenizer available via tiktoken)
110    "gpt-4o" => "g4o",           // 6->4 tokens (+2)
111    "gpt-4o-mini" => "g4om",     // 7->4 tokens (+3)
112    "gpt-4-turbo" => "g4t",      // 8->4 tokens (+4)
113    "gpt-4" => "g4",             // 5->3 tokens (+2)
114    "gpt-3.5-turbo" => "g35t",   // 10->4 tokens (+6)
115    // Meta Llama (open source tokenizer)
116    "llama-3.1-405b" => "l31405", // 11->5 tokens (+6)
117    "llama-3.1-70b" => "l3170",   // 11->5 tokens (+6)
118    "llama-3.1-8b" => "l318",     // 11->4 tokens (+7)
119    "llama-3.3-70b" => "l3370",   // 11->5 tokens (+6)
120    // Mistral (open source tokenizer)
121    "mistral-large-latest" => "mll",  // 7->4 tokens (+3)
122    "mistral-small-latest" => "msl",  // 7->4 tokens (+3)
123    "mixtral-8x7b" => "mx87",         // 10->4 tokens (+6)
124    "mixtral-8x22b" => "mx822",       // 10->4 tokens (+6)
125    // DeepSeek (open source tokenizer)
126    "deepseek-v3" => "dv3",      // 6->4 tokens (+2)
127    "deepseek-r1" => "dr1",      // 6->4 tokens (+2)
128    "deepseek-coder" => "dc",    // 6->3 tokens (+3)
129    // Qwen (open source tokenizer)
130    "qwen-2.5-72b" => "q2572",   // 11->5 tokens (+6)
131    "qwen-2.5-32b" => "q2532",   // 11->5 tokens (+6)
132    "qwen-2.5-coder-32b" => "qc32", // 13->4 tokens (+9)
133};
134
135/// Reverse model mapping
136pub static MODEL_EXPAND: phf::Map<&'static str, &'static str> = phf_map! {
137    // OpenAI
138    "g4o" => "gpt-4o",
139    "g4om" => "gpt-4o-mini",
140    "g4t" => "gpt-4-turbo",
141    "g4" => "gpt-4",
142    "g35t" => "gpt-3.5-turbo",
143    // Meta Llama
144    "l31405" => "llama-3.1-405b",
145    "l3170" => "llama-3.1-70b",
146    "l318" => "llama-3.1-8b",
147    "l3370" => "llama-3.3-70b",
148    // Mistral
149    "mll" => "mistral-large-latest",
150    "msl" => "mistral-small-latest",
151    "mx87" => "mixtral-8x7b",
152    "mx822" => "mixtral-8x22b",
153    // DeepSeek
154    "dv3" => "deepseek-v3",
155    "dr1" => "deepseek-r1",
156    "dc" => "deepseek-coder",
157    // Qwen
158    "q2572" => "qwen-2.5-72b",
159    "q2532" => "qwen-2.5-32b",
160    "qc32" => "qwen-2.5-coder-32b",
161};
162
163/// High-frequency patterns for token-efficient compression
164///
165/// These patterns are 5-8 tokens each and can be replaced with single-token
166/// escape sequences for significant savings. Use \u00XX format for JSON safety.
167///
168/// Pattern encoding: \u0001 to \u001F (control chars, valid in JSON strings)
169pub static PATTERN_ABBREV: &[(&str, &str)] = &[
170    // Role patterns (7 tokens each -> 1 token)
171    (r#"{"role":"user","content":""#, "\u{0001}"),
172    (r#"{"role":"assistant","content":""#, "\u{0002}"),
173    (r#"{"role":"system","content":""#, "\u{0003}"),
174    (r#"{"role":"tool","content":""#, "\u{0004}"),
175    // Streaming patterns (8 tokens each -> 1 token)
176    (r#"{"index":0,"delta":{"#, "\u{0005}"),
177    (r#"{"index":0,"message":{"#, "\u{0006}"),
178    // Finish reason patterns (6-7 tokens each -> 1 token)
179    (r#""finish_reason":"stop""#, "\u{0007}"),
180    (r#""finish_reason":"length""#, "\u{0008}"),
181    (r#""finish_reason":"tool_calls""#, "\u{0009}"),
182    // Tool patterns (8 tokens -> 1 token)
183    (r#"{"type":"function","function":{"#, "\u{000A}"),
184    // Common structural patterns (3-4 tokens each -> 1 token)
185    (r#""choices":[{"#, "\u{000B}"),
186    (r#"{"messages":["#, "\u{000C}"),
187    (r#"],"model":""#, "\u{000D}"),
188];
189
190/// Reverse pattern mapping for decompression
191pub static PATTERN_EXPAND: &[(&str, &str)] = &[
192    ("\u{0001}", r#"{"role":"user","content":""#),
193    ("\u{0002}", r#"{"role":"assistant","content":""#),
194    ("\u{0003}", r#"{"role":"system","content":""#),
195    ("\u{0004}", r#"{"role":"tool","content":""#),
196    ("\u{0005}", r#"{"index":0,"delta":{"#),
197    ("\u{0006}", r#"{"index":0,"message":{"#),
198    ("\u{0007}", r#""finish_reason":"stop""#),
199    ("\u{0008}", r#""finish_reason":"length""#),
200    ("\u{0009}", r#""finish_reason":"tool_calls""#),
201    ("\u{000A}", r#"{"type":"function","function":{"#),
202    ("\u{000B}", r#""choices":[{"#),
203    ("\u{000C}", r#"{"messages":["#),
204    ("\u{000D}", r#"],"model":""#),
205];
206
207/// Check if a value is a default that can be removed
208pub fn is_default_value(key: &str, value: &serde_json::Value) -> bool {
209    use serde_json::Value;
210
211    match (key, value) {
212        ("temperature" | "T", Value::Number(n)) => {
213            n.as_f64().map(|f| (f - 1.0).abs() < 0.001).unwrap_or(false)
214        },
215        ("top_p" | "p", Value::Number(n)) => {
216            n.as_f64().map(|f| (f - 1.0).abs() < 0.001).unwrap_or(false)
217        },
218        ("n", Value::Number(n)) => n.as_i64() == Some(1),
219        ("stream" | "s", Value::Bool(b)) => !b,
220        ("frequency_penalty" | "f", Value::Number(n)) => {
221            n.as_i64() == Some(0) || n.as_f64() == Some(0.0)
222        },
223        ("presence_penalty" | "P", Value::Number(n)) => {
224            n.as_i64() == Some(0) || n.as_f64() == Some(0.0)
225        },
226        ("logit_bias" | "lb", Value::Object(m)) => m.is_empty(),
227        ("stop" | "S", Value::Null) => true,
228        _ => false,
229    }
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235
236    #[test]
237    fn test_key_roundtrip() {
238        for (full, abbrev) in KEY_ABBREV.entries() {
239            assert_eq!(
240                KEY_EXPAND.get(*abbrev),
241                Some(full),
242                "Key '{full}' -> '{abbrev}' doesn't round-trip"
243            );
244        }
245    }
246
247    #[test]
248    fn test_role_roundtrip() {
249        for (full, abbrev) in ROLE_ABBREV.entries() {
250            assert_eq!(
251                ROLE_EXPAND.get(*abbrev),
252                Some(full),
253                "Role '{full}' -> '{abbrev}' doesn't round-trip"
254            );
255        }
256    }
257
258    #[test]
259    fn test_model_roundtrip() {
260        for (full, abbrev) in MODEL_ABBREV.entries() {
261            assert_eq!(
262                MODEL_EXPAND.get(*abbrev),
263                Some(full),
264                "Model '{full}' -> '{abbrev}' doesn't round-trip"
265            );
266        }
267    }
268
269    #[test]
270    fn test_pattern_roundtrip() {
271        for (pattern, abbrev) in PATTERN_ABBREV {
272            let expanded = PATTERN_EXPAND
273                .iter()
274                .find(|(a, _)| a == abbrev)
275                .map(|(_, p)| *p);
276            assert_eq!(
277                expanded,
278                Some(*pattern),
279                "Pattern '{pattern}' -> '{abbrev:?}' doesn't round-trip"
280            );
281        }
282    }
283
284    #[test]
285    fn test_default_detection() {
286        use serde_json::json;
287
288        assert!(is_default_value("temperature", &json!(1.0)));
289        assert!(!is_default_value("temperature", &json!(0.7)));
290        assert!(is_default_value("stream", &json!(false)));
291        assert!(!is_default_value("stream", &json!(true)));
292        assert!(is_default_value("n", &json!(1)));
293        assert!(!is_default_value("n", &json!(2)));
294    }
295}
m2m/codec/tables.rs

m2m/codec/
tables.rs