Skip to main content

matrixcode_core/compress/
hardcode_config.rs

1//! Unified hardcoded value configuration - eliminates all magic numbers.
2//!
3//! This module centralizes all hardcoded thresholds, limits, and constants
4//! used across the compression system for easy tuning and consistency.
5
6use serde::{Deserialize, Serialize};
7
8/// Unified configuration for all hardcoded values in compression system.
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct HardcodeConfig {
11    // ============================================================================
12    // Text Length Thresholds
13    // ============================================================================
14    
15    /// Minimum word length to consider as meaningful (for coherence, focus)
16    pub min_word_length: usize,
17    
18    /// Minimum text length to consider as substantial message
19    pub min_substantial_text_length: usize,
20    
21    /// Threshold for "long text" that needs compression
22    pub long_text_threshold: usize,
23    
24    /// Threshold for "very long text" (needs aggressive compression)
25    pub very_long_text_threshold: usize,
26    
27    /// Maximum text length before truncation in simple mode
28    pub max_simple_truncation_length: usize,
29    
30    /// Threshold for considering content as "code-heavy"
31    pub code_content_threshold: usize,
32    
33    /// Maximum context length before aggressive truncation
34    pub max_context_length: usize,
35    
36    /// Threshold for message content to be preserved
37    pub preserve_content_threshold: usize,
38    
39    // ============================================================================
40    // Extraction Limits
41    // ============================================================================
42    
43    /// Number of words to extract as fallback topic
44    pub fallback_topic_word_count: usize,
45    
46    /// Number of sentences to extract in brief summary
47    pub brief_summary_sentence_count: usize,
48    
49    /// Number of sentences to extract in detailed summary
50    pub detailed_summary_sentence_count: usize,
51    
52    /// Maximum characters in extracted question
53    pub max_question_extract_length: usize,
54    
55    /// Maximum sentences to keep in compressed output
56    pub max_compressed_sentence_count: usize,
57    
58    /// Number of words to keep in short summary
59    pub short_summary_word_count: usize,
60    
61    // ============================================================================
62    // Message Count Thresholds
63    // ============================================================================
64    
65    /// Minimum messages to trigger compression
66    pub min_messages_for_compression: usize,
67    
68    /// Large conversation threshold (many messages)
69    pub large_conversation_threshold: usize,
70    
71    /// Medium conversation threshold
72    pub medium_conversation_threshold: usize,
73    
74    /// Maximum recent context snippets to keep
75    pub max_recent_context_count: usize,
76    
77    /// Minimum focus history size to consider
78    pub min_focus_history_size: usize,
79    
80    // ============================================================================
81    // Question/Query Thresholds
82    // ============================================================================
83    
84    /// Minimum question length (chars)
85    pub min_question_length: usize,
86    
87    /// Maximum question length (chars)
88    pub max_question_length: usize,
89    
90    /// Minimum sentence length to keep
91    pub min_sentence_length: usize,
92    
93    /// Maximum compressed output length
94    pub max_compressed_output_length: usize,
95    
96    // ============================================================================
97    // Special Thresholds
98    // ============================================================================
99    
100    /// Threshold for detecting code blocks (combined with keyword check)
101    pub code_detection_length_threshold: usize,
102    
103    /// Maximum truncated context length
104    pub max_truncated_context_length: usize,
105    
106    /// Maximum trimmed content length
107    pub max_trimmed_content_length: usize,
108    
109    /// Summary length threshold
110    pub summary_length_threshold: usize,
111    
112    /// Cache capacity for focus tracking
113    pub focus_cache_capacity: usize,
114}
115
116impl Default for HardcodeConfig {
117    fn default() -> Self {
118        Self {
119            // Text Length Thresholds
120            min_word_length: 3,
121            min_substantial_text_length: 20,
122            long_text_threshold: 200,
123            very_long_text_threshold: 500,
124            max_simple_truncation_length: 200,
125            code_content_threshold: 1000,
126            max_context_length: 3000,
127            preserve_content_threshold: 500,
128            
129            // Extraction Limits
130            fallback_topic_word_count: 3,
131            brief_summary_sentence_count: 2,
132            detailed_summary_sentence_count: 5,
133            max_question_extract_length: 100,
134            max_compressed_sentence_count: 30,
135            short_summary_word_count: 10,
136            
137            // Message Count Thresholds
138            min_messages_for_compression: 1,
139            large_conversation_threshold: 30,
140            medium_conversation_threshold: 20,
141            max_recent_context_count: 5,
142            min_focus_history_size: 1,
143            
144            // Question/Query Thresholds
145            min_question_length: 2,
146            max_question_length: 30,
147            min_sentence_length: 20,
148            max_compressed_output_length: 30,
149            
150            // Special Thresholds
151            code_detection_length_threshold: 1000,
152            max_truncated_context_length: 3000,
153            max_trimmed_content_length: 300,
154            summary_length_threshold: 200,
155            focus_cache_capacity: 100,
156        }
157    }
158}
159
160impl HardcodeConfig {
161    /// Create config for simple conversations (more aggressive compression)
162    pub fn simple_conversation() -> Self {
163        Self {
164            // Lower thresholds for early compression
165            min_substantial_text_length: 10,
166            long_text_threshold: 100,
167            very_long_text_threshold: 300,
168            
169            // Fewer items to extract
170            max_recent_context_count: 3,
171            detailed_summary_sentence_count: 3,
172            
173            // Smaller limits
174            max_question_extract_length: 80,
175            fallback_topic_word_count: 2,
176            
177            ..Self::default()
178        }
179    }
180    
181    /// Create config for complex technical discussions (conservative compression)
182    pub fn complex_technical() -> Self {
183        Self {
184            // Higher thresholds for preserving more content
185            min_substantial_text_length: 30,
186            long_text_threshold: 300,
187            very_long_text_threshold: 800,
188            
189            // More items to extract
190            max_recent_context_count: 7,
191            detailed_summary_sentence_count: 8,
192            
193            // Larger limits
194            max_question_extract_length: 150,
195            fallback_topic_word_count: 5,
196            max_compressed_sentence_count: 50,
197            
198            // Higher message thresholds
199            large_conversation_threshold: 50,
200            medium_conversation_threshold: 30,
201            
202            ..Self::default()
203        }
204    }
205    
206    /// Create config from complexity level
207    pub fn from_complexity(level: crate::compress::complexity::ComplexityLevel) -> Self {
208        match level {
209            crate::compress::complexity::ComplexityLevel::High => Self::complex_technical(),
210            crate::compress::complexity::ComplexityLevel::Medium => Self::default(),
211            crate::compress::complexity::ComplexityLevel::Low => Self::simple_conversation(),
212        }
213    }
214    
215    /// Validate configuration
216    pub fn validate(&self) -> bool {
217        // All thresholds should be positive
218        self.min_word_length > 0 &&
219        self.min_substantial_text_length > 0 &&
220        self.long_text_threshold > 0 &&
221        self.fallback_topic_word_count > 0 &&
222        self.max_question_extract_length > 0 &&
223        
224        // Logical constraints
225        self.min_substantial_text_length < self.long_text_threshold &&
226        self.long_text_threshold < self.very_long_text_threshold &&
227        self.min_question_length < self.max_question_length &&
228        self.brief_summary_sentence_count < self.detailed_summary_sentence_count
229    }
230    
231    /// Check if text is "short" (below substantial threshold)
232    pub fn is_short_text(&self, len: usize) -> bool {
233        len < self.min_substantial_text_length
234    }
235    
236    /// Check if text is "long" (needs compression)
237    pub fn is_long_text(&self, len: usize) -> bool {
238        len > self.long_text_threshold
239    }
240    
241    /// Check if text is "very long" (needs aggressive compression)
242    pub fn is_very_long_text(&self, len: usize) -> bool {
243        len > self.very_long_text_threshold
244    }
245    
246    /// Check if conversation is "large"
247    pub fn is_large_conversation(&self, message_count: usize) -> bool {
248        message_count > self.large_conversation_threshold
249    }
250    
251    /// Check if conversation is "medium"
252    pub fn is_medium_conversation(&self, message_count: usize) -> bool {
253        message_count > self.medium_conversation_threshold
254    }
255    
256    /// Check if word is meaningful
257    pub fn is_meaningful_word(&self, word_len: usize) -> bool {
258        word_len > self.min_word_length
259    }
260    
261    /// Check if question is valid length
262    pub fn is_valid_question_length(&self, len: usize) -> bool {
263        len > self.min_question_length && len < self.max_question_length
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270    
271    #[test]
272    fn test_default_config() {
273        let config = HardcodeConfig::default();
274        assert!(config.validate());
275        assert_eq!(config.min_word_length, 3);
276        assert_eq!(config.long_text_threshold, 200);
277    }
278    
279    #[test]
280    fn test_simple_conversation_config() {
281        let config = HardcodeConfig::simple_conversation();
282        assert_eq!(config.min_substantial_text_length, 10);
283        assert_eq!(config.max_recent_context_count, 3);
284    }
285    
286    #[test]
287    fn test_complex_technical_config() {
288        let config = HardcodeConfig::complex_technical();
289        assert_eq!(config.min_substantial_text_length, 30);
290        assert_eq!(config.max_recent_context_count, 7);
291    }
292    
293    #[test]
294    fn test_helper_methods() {
295        let config = HardcodeConfig::default();
296        
297        assert!(config.is_short_text(10));
298        assert!(!config.is_short_text(30));
299        
300        assert!(config.is_long_text(300));
301        assert!(!config.is_long_text(100));
302        
303        assert!(config.is_very_long_text(1000));
304        assert!(!config.is_very_long_text(300));
305        
306        assert!(config.is_meaningful_word(5));
307        assert!(!config.is_meaningful_word(2));
308        
309        assert!(config.is_valid_question_length(10));
310        assert!(!config.is_valid_question_length(1));
311    }
312    
313    #[test]
314    fn test_conversation_size_checks() {
315        let config = HardcodeConfig::default();
316        
317        assert!(config.is_large_conversation(50));
318        assert!(!config.is_large_conversation(20));
319        
320        assert!(config.is_medium_conversation(25));
321        assert!(!config.is_medium_conversation(15));
322    }
323}