1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
//! Unified hardcoded value configuration - eliminates all magic numbers.
//!
//! This module centralizes all hardcoded thresholds, limits, and constants
//! used across the compression system for easy tuning and consistency.
use serde::{Deserialize, Serialize};
/// Unified configuration for all hardcoded values in compression system.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HardcodeConfig {
// ============================================================================
// Text Length Thresholds
// ============================================================================
/// Minimum word length to consider as meaningful (for coherence, focus)
pub min_word_length: usize,
/// Minimum text length to consider as substantial message
pub min_substantial_text_length: usize,
/// Threshold for "long text" that needs compression
pub long_text_threshold: usize,
/// Threshold for "very long text" (needs aggressive compression)
pub very_long_text_threshold: usize,
/// Maximum text length before truncation in simple mode
pub max_simple_truncation_length: usize,
/// Threshold for considering content as "code-heavy"
pub code_content_threshold: usize,
/// Maximum context length before aggressive truncation
pub max_context_length: usize,
/// Threshold for message content to be preserved
pub preserve_content_threshold: usize,
// ============================================================================
// Extraction Limits
// ============================================================================
/// Number of words to extract as fallback topic
pub fallback_topic_word_count: usize,
/// Number of sentences to extract in brief summary
pub brief_summary_sentence_count: usize,
/// Number of sentences to extract in detailed summary
pub detailed_summary_sentence_count: usize,
/// Maximum characters in extracted question
pub max_question_extract_length: usize,
/// Maximum sentences to keep in compressed output
pub max_compressed_sentence_count: usize,
/// Number of words to keep in short summary
pub short_summary_word_count: usize,
// ============================================================================
// Message Count Thresholds
// ============================================================================
/// Minimum messages to trigger compression
pub min_messages_for_compression: usize,
/// Large conversation threshold (many messages)
pub large_conversation_threshold: usize,
/// Medium conversation threshold
pub medium_conversation_threshold: usize,
/// Maximum recent context snippets to keep
pub max_recent_context_count: usize,
/// Minimum focus history size to consider
pub min_focus_history_size: usize,
// ============================================================================
// Question/Query Thresholds
// ============================================================================
/// Minimum question length (chars)
pub min_question_length: usize,
/// Maximum question length (chars)
pub max_question_length: usize,
/// Minimum sentence length to keep
pub min_sentence_length: usize,
/// Maximum compressed output length
pub max_compressed_output_length: usize,
// ============================================================================
// Special Thresholds
// ============================================================================
/// Threshold for detecting code blocks (combined with keyword check)
pub code_detection_length_threshold: usize,
/// Maximum truncated context length
pub max_truncated_context_length: usize,
/// Maximum trimmed content length
pub max_trimmed_content_length: usize,
/// Summary length threshold
pub summary_length_threshold: usize,
/// Cache capacity for focus tracking
pub focus_cache_capacity: usize,
}
impl Default for HardcodeConfig {
fn default() -> Self {
Self {
// Text Length Thresholds
min_word_length: 3,
min_substantial_text_length: 20,
long_text_threshold: 200,
very_long_text_threshold: 500,
max_simple_truncation_length: 200,
code_content_threshold: 1000,
max_context_length: 3000,
preserve_content_threshold: 500,
// Extraction Limits
fallback_topic_word_count: 3,
brief_summary_sentence_count: 2,
detailed_summary_sentence_count: 5,
max_question_extract_length: 100,
max_compressed_sentence_count: 30,
short_summary_word_count: 10,
// Message Count Thresholds
min_messages_for_compression: 1,
large_conversation_threshold: 30,
medium_conversation_threshold: 20,
max_recent_context_count: 5,
min_focus_history_size: 1,
// Question/Query Thresholds
min_question_length: 2,
max_question_length: 30,
min_sentence_length: 20,
max_compressed_output_length: 30,
// Special Thresholds
code_detection_length_threshold: 1000,
max_truncated_context_length: 3000,
max_trimmed_content_length: 300,
summary_length_threshold: 200,
focus_cache_capacity: 100,
}
}
}
impl HardcodeConfig {
/// Create config for simple conversations (more aggressive compression)
pub fn simple_conversation() -> Self {
Self {
// Lower thresholds for early compression
min_substantial_text_length: 10,
long_text_threshold: 100,
very_long_text_threshold: 300,
// Fewer items to extract
max_recent_context_count: 3,
detailed_summary_sentence_count: 3,
// Smaller limits
max_question_extract_length: 80,
fallback_topic_word_count: 2,
..Self::default()
}
}
/// Create config for complex technical discussions (conservative compression)
pub fn complex_technical() -> Self {
Self {
// Higher thresholds for preserving more content
min_substantial_text_length: 30,
long_text_threshold: 300,
very_long_text_threshold: 800,
// More items to extract
max_recent_context_count: 7,
detailed_summary_sentence_count: 8,
// Larger limits
max_question_extract_length: 150,
fallback_topic_word_count: 5,
max_compressed_sentence_count: 50,
// Higher message thresholds
large_conversation_threshold: 50,
medium_conversation_threshold: 30,
..Self::default()
}
}
/// Create config from complexity level
pub fn from_complexity(level: crate::compress::complexity::ComplexityLevel) -> Self {
match level {
crate::compress::complexity::ComplexityLevel::High => Self::complex_technical(),
crate::compress::complexity::ComplexityLevel::Medium => Self::default(),
crate::compress::complexity::ComplexityLevel::Low => Self::simple_conversation(),
}
}
/// Validate configuration
pub fn validate(&self) -> bool {
// All thresholds should be positive
self.min_word_length > 0 &&
self.min_substantial_text_length > 0 &&
self.long_text_threshold > 0 &&
self.fallback_topic_word_count > 0 &&
self.max_question_extract_length > 0 &&
// Logical constraints
self.min_substantial_text_length < self.long_text_threshold &&
self.long_text_threshold < self.very_long_text_threshold &&
self.min_question_length < self.max_question_length &&
self.brief_summary_sentence_count < self.detailed_summary_sentence_count
}
/// Check if text is "short" (below substantial threshold)
pub fn is_short_text(&self, len: usize) -> bool {
len < self.min_substantial_text_length
}
/// Check if text is "long" (needs compression)
pub fn is_long_text(&self, len: usize) -> bool {
len > self.long_text_threshold
}
/// Check if text is "very long" (needs aggressive compression)
pub fn is_very_long_text(&self, len: usize) -> bool {
len > self.very_long_text_threshold
}
/// Check if conversation is "large"
pub fn is_large_conversation(&self, message_count: usize) -> bool {
message_count > self.large_conversation_threshold
}
/// Check if conversation is "medium"
pub fn is_medium_conversation(&self, message_count: usize) -> bool {
message_count > self.medium_conversation_threshold
}
/// Check if word is meaningful
pub fn is_meaningful_word(&self, word_len: usize) -> bool {
word_len > self.min_word_length
}
/// Check if question is valid length
pub fn is_valid_question_length(&self, len: usize) -> bool {
len > self.min_question_length && len < self.max_question_length
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = HardcodeConfig::default();
assert!(config.validate());
assert_eq!(config.min_word_length, 3);
assert_eq!(config.long_text_threshold, 200);
}
#[test]
fn test_simple_conversation_config() {
let config = HardcodeConfig::simple_conversation();
assert_eq!(config.min_substantial_text_length, 10);
assert_eq!(config.max_recent_context_count, 3);
}
#[test]
fn test_complex_technical_config() {
let config = HardcodeConfig::complex_technical();
assert_eq!(config.min_substantial_text_length, 30);
assert_eq!(config.max_recent_context_count, 7);
}
#[test]
fn test_helper_methods() {
let config = HardcodeConfig::default();
assert!(config.is_short_text(10));
assert!(!config.is_short_text(30));
assert!(config.is_long_text(300));
assert!(!config.is_long_text(100));
assert!(config.is_very_long_text(1000));
assert!(!config.is_very_long_text(300));
assert!(config.is_meaningful_word(5));
assert!(!config.is_meaningful_word(2));
assert!(config.is_valid_question_length(10));
assert!(!config.is_valid_question_length(1));
}
#[test]
fn test_conversation_size_checks() {
let config = HardcodeConfig::default();
assert!(config.is_large_conversation(50));
assert!(!config.is_large_conversation(20));
assert!(config.is_medium_conversation(25));
assert!(!config.is_medium_conversation(15));
}
}