html_translation_lib/storage/
memory_pool.rs

1//! 内存池管理模块
2//!
3//! 提供对象池和内存优化功能,减少内存分配和GC压力
4
5use std::collections::VecDeque;
6use std::sync::{Arc, Mutex};
7use std::borrow::Cow;
8
9/// 字符串池,重用字符串对象
10pub struct StringPool {
11    /// 小字符串池 (<=64字符)
12    small_strings: VecDeque<String>,
13    /// 大字符串池 (>64字符)
14    large_strings: VecDeque<String>,
15    /// 最大池大小
16    max_size: usize,
17}
18
19impl StringPool {
20    /// 创建新的字符串池
21    pub fn new(max_size: usize) -> Self {
22        Self {
23            small_strings: VecDeque::with_capacity(max_size / 2),
24            large_strings: VecDeque::with_capacity(max_size / 2),
25            max_size,
26        }
27    }
28    
29    /// 获取字符串(从池中取出或创建新的)
30    pub fn acquire(&mut self, initial_capacity: usize) -> String {
31        let pool = if initial_capacity <= 64 {
32            &mut self.small_strings
33        } else {
34            &mut self.large_strings
35        };
36        
37        if let Some(mut string) = pool.pop_front() {
38            string.clear();
39            if string.capacity() < initial_capacity {
40                string.reserve(initial_capacity - string.capacity());
41            }
42            string
43        } else {
44            String::with_capacity(initial_capacity)
45        }
46    }
47    
48    /// 释放字符串回池中
49    pub fn release(&mut self, string: String) {
50        if string.capacity() == 0 {
51            return;
52        }
53        
54        let pool = if string.capacity() <= 64 {
55            &mut self.small_strings
56        } else {
57            &mut self.large_strings
58        };
59        
60        if pool.len() < self.max_size / 2 {
61            pool.push_back(string);
62        }
63    }
64    
65    /// 清理池,释放过多的内存
66    pub fn cleanup(&mut self) {
67        let keep_size = self.max_size / 4;
68        
69        while self.small_strings.len() > keep_size {
70            self.small_strings.pop_back();
71        }
72        
73        while self.large_strings.len() > keep_size {
74            self.large_strings.pop_back();
75        }
76    }
77    
78    /// 获取池状态信息
79    pub fn stats(&self) -> PoolStats {
80        PoolStats {
81            small_pool_size: self.small_strings.len(),
82            large_pool_size: self.large_strings.len(),
83            total_capacity: self.small_strings.iter().map(|s| s.capacity()).sum::<usize>()
84                + self.large_strings.iter().map(|s| s.capacity()).sum::<usize>(),
85        }
86    }
87}
88
89/// 向量池,重用Vec对象
90pub struct VecPool<T> {
91    pools: Vec<VecDeque<Vec<T>>>,
92    max_size_per_pool: usize,
93}
94
95impl<T> VecPool<T> {
96    /// 创建新的向量池
97    pub fn new(max_size_per_pool: usize) -> Self {
98        Self {
99            pools: vec![
100                VecDeque::new(), // 0-16 capacity
101                VecDeque::new(), // 17-64 capacity  
102                VecDeque::new(), // 65-256 capacity
103                VecDeque::new(), // 257+ capacity
104            ],
105            max_size_per_pool,
106        }
107    }
108    
109    /// 获取向量
110    pub fn acquire(&mut self, initial_capacity: usize) -> Vec<T> {
111        let pool_index = self.capacity_to_pool_index(initial_capacity);
112        
113        if let Some(mut vec) = self.pools[pool_index].pop_front() {
114            vec.clear();
115            if vec.capacity() < initial_capacity {
116                vec.reserve(initial_capacity - vec.capacity());
117            }
118            vec
119        } else {
120            Vec::with_capacity(initial_capacity)
121        }
122    }
123    
124    /// 释放向量回池中
125    pub fn release(&mut self, vec: Vec<T>) {
126        if vec.capacity() == 0 {
127            return;
128        }
129        
130        let pool_index = self.capacity_to_pool_index(vec.capacity());
131        
132        if self.pools[pool_index].len() < self.max_size_per_pool {
133            self.pools[pool_index].push_back(vec);
134        }
135    }
136    
137    /// 将容量映射到池索引
138    fn capacity_to_pool_index(&self, capacity: usize) -> usize {
139        match capacity {
140            0..=16 => 0,
141            17..=64 => 1,
142            65..=256 => 2,
143            _ => 3,
144        }
145    }
146    
147    /// 清理所有池
148    pub fn cleanup(&mut self) {
149        let keep_size = self.max_size_per_pool / 4;
150        
151        for pool in &mut self.pools {
152            while pool.len() > keep_size {
153                pool.pop_back();
154            }
155        }
156    }
157}
158
159/// 内存优化的文本项
160#[derive(Debug, Clone)]
161pub struct MemoryOptimizedTextItem<'a> {
162    /// 使用Cow避免不必要的字符串分配
163    pub text: Cow<'a, str>,
164    /// 文本类型(使用&'static str减少内存使用)
165    pub text_type: &'static str,
166    /// 原始长度(用于统计)
167    pub original_length: usize,
168    /// 是否已翻译
169    pub is_translated: bool,
170}
171
172impl<'a> MemoryOptimizedTextItem<'a> {
173    /// 创建新的内存优化文本项
174    pub fn new(text: impl Into<Cow<'a, str>>, text_type: &'static str) -> Self {
175        let text = text.into();
176        let original_length = text.len();
177        
178        Self {
179            text,
180            text_type,
181            original_length,
182            is_translated: false,
183        }
184    }
185    
186    /// 应用翻译(延迟克隆直到需要修改)
187    pub fn apply_translation(&mut self, translation: String) {
188        self.text = Cow::Owned(translation);
189        self.is_translated = true;
190    }
191    
192    /// 获取内存使用情况
193    pub fn memory_usage(&self) -> usize {
194        match &self.text {
195            Cow::Borrowed(_) => 0, // 借用的字符串不占用额外内存
196            Cow::Owned(s) => s.capacity() * std::mem::size_of::<u8>(),
197        }
198    }
199}
200
201/// 全局内存管理器
202pub struct GlobalMemoryManager {
203    string_pool: Arc<Mutex<StringPool>>,
204    vec_pool: Arc<Mutex<VecPool<String>>>,
205    /// 内存使用统计
206    memory_stats: Arc<Mutex<MemoryStats>>,
207}
208
209impl GlobalMemoryManager {
210    /// 创建全局内存管理器
211    pub fn new() -> Self {
212        Self {
213            string_pool: Arc::new(Mutex::new(StringPool::new(1000))),
214            vec_pool: Arc::new(Mutex::new(VecPool::new(100))),
215            memory_stats: Arc::new(Mutex::new(MemoryStats::default())),
216        }
217    }
218    
219    /// 获取字符串
220    pub fn acquire_string(&self, initial_capacity: usize) -> String {
221        let mut pool = self.string_pool.lock().unwrap();
222        let string = pool.acquire(initial_capacity);
223        
224        // 更新统计
225        let mut stats = self.memory_stats.lock().unwrap();
226        stats.strings_acquired += 1;
227        stats.total_string_capacity += string.capacity();
228        
229        string
230    }
231    
232    /// 释放字符串
233    pub fn release_string(&self, string: String) {
234        let capacity = string.capacity();
235        
236        let mut pool = self.string_pool.lock().unwrap();
237        pool.release(string);
238        
239        // 更新统计
240        let mut stats = self.memory_stats.lock().unwrap();
241        stats.strings_released += 1;
242        stats.total_string_capacity = stats.total_string_capacity.saturating_sub(capacity);
243    }
244    
245    /// 获取向量
246    pub fn acquire_vec(&self, initial_capacity: usize) -> Vec<String> {
247        let mut pool = self.vec_pool.lock().unwrap();
248        pool.acquire(initial_capacity)
249    }
250    
251    /// 释放向量
252    pub fn release_vec(&self, vec: Vec<String>) {
253        let mut pool = self.vec_pool.lock().unwrap();
254        pool.release(vec);
255    }
256    
257    /// 执行清理操作
258    pub fn cleanup(&self) {
259        {
260            let mut string_pool = self.string_pool.lock().unwrap();
261            string_pool.cleanup();
262        }
263        
264        {
265            let mut vec_pool = self.vec_pool.lock().unwrap();
266            vec_pool.cleanup();
267        }
268        
269        // 重置统计
270        let mut stats = self.memory_stats.lock().unwrap();
271        *stats = MemoryStats::default();
272    }
273    
274    /// 获取内存统计
275    pub fn get_stats(&self) -> MemoryStats {
276        self.memory_stats.lock().unwrap().clone()
277    }
278    
279    /// 获取池统计
280    pub fn get_pool_stats(&self) -> (PoolStats, usize) {
281        let string_stats = {
282            let pool = self.string_pool.lock().unwrap();
283            pool.stats()
284        };
285        
286        let vec_pools_len = {
287            let pool = self.vec_pool.lock().unwrap();
288            pool.pools.iter().map(|p| p.len()).sum()
289        };
290        
291        (string_stats, vec_pools_len)
292    }
293}
294
295impl Default for GlobalMemoryManager {
296    fn default() -> Self {
297        Self::new()
298    }
299}
300
301/// 池统计信息
302#[derive(Debug, Clone)]
303pub struct PoolStats {
304    pub small_pool_size: usize,
305    pub large_pool_size: usize,
306    pub total_capacity: usize,
307}
308
309/// 内存使用统计
310#[derive(Debug, Clone, Default)]
311pub struct MemoryStats {
312    pub strings_acquired: usize,
313    pub strings_released: usize,
314    pub total_string_capacity: usize,
315    pub peak_memory_usage: usize,
316}
317
318impl MemoryStats {
319    /// 计算池效率(命中率)
320    pub fn pool_hit_rate(&self) -> f64 {
321        if self.strings_acquired == 0 {
322            return 0.0;
323        }
324        self.strings_released as f64 / self.strings_acquired as f64
325    }
326    
327    /// 更新峰值内存使用
328    pub fn update_peak_usage(&mut self, current_usage: usize) {
329        if current_usage > self.peak_memory_usage {
330            self.peak_memory_usage = current_usage;
331        }
332    }
333}
334
335/// 内存优化的批处理器
336pub struct MemoryOptimizedBatchProcessor {
337    memory_manager: Arc<GlobalMemoryManager>,
338    batch_size: usize,
339    memory_threshold: usize, // 字节
340}
341
342impl MemoryOptimizedBatchProcessor {
343    /// 创建新的批处理器
344    pub fn new(memory_manager: Arc<GlobalMemoryManager>, batch_size: usize, memory_threshold: usize) -> Self {
345        Self {
346            memory_manager,
347            batch_size,
348            memory_threshold,
349        }
350    }
351    
352    /// 处理文本批次,自动管理内存
353    pub fn process_texts<'a>(&self, texts: &'a [String]) -> Vec<MemoryOptimizedTextItem<'a>> {
354        let items = self.memory_manager.acquire_vec(texts.len());
355        let mut results = Vec::with_capacity(texts.len());
356        
357        for (i, text) in texts.iter().enumerate() {
358            // 使用Cow借用原始字符串,避免复制
359            let item = MemoryOptimizedTextItem::new(
360                Cow::Borrowed(text.as_str()),
361                "content"
362            );
363            results.push(item);
364            
365            // 每处理一定数量的项目后检查内存使用
366            if i % 100 == 0 {
367                let current_memory = self.estimate_memory_usage(&results);
368                if current_memory > self.memory_threshold {
369                    // 触发清理
370                    self.memory_manager.cleanup();
371                }
372            }
373        }
374        
375        // 释放工作向量
376        self.memory_manager.release_vec(items);
377        
378        results
379    }
380    
381    /// 估算当前内存使用
382    fn estimate_memory_usage(&self, items: &[MemoryOptimizedTextItem<'_>]) -> usize {
383        items.iter().map(|item| item.memory_usage()).sum::<usize>()
384            + items.len() * std::mem::size_of::<MemoryOptimizedTextItem<'_>>()
385    }
386}
387
388#[cfg(test)]
389mod tests {
390    use super::*;
391
392    #[test]
393    fn test_string_pool_basic() {
394        let mut pool = StringPool::new(100);
395        
396        // 测试获取和释放
397        let s1 = pool.acquire(10);
398        assert_eq!(s1.capacity(), 10);
399        
400        pool.release(s1);
401        
402        let s2 = pool.acquire(10);
403        assert!(s2.capacity() >= 10);
404    }
405    
406    #[test]
407    fn test_vec_pool_basic() {
408        let mut pool: VecPool<String> = VecPool::new(50);
409        
410        let v1 = pool.acquire(20);
411        assert_eq!(v1.capacity(), 20);
412        
413        pool.release(v1);
414        
415        let v2 = pool.acquire(15);
416        assert!(v2.capacity() >= 15);
417    }
418    
419    #[test]
420    fn test_memory_optimized_text_item() {
421        let text = "Hello World".to_string();
422        let mut item = MemoryOptimizedTextItem::new(
423            text.as_str(),
424            "content"
425        );
426        
427        // 初始时应该是借用的,内存使用为0
428        assert_eq!(item.memory_usage(), 0);
429        assert!(!item.is_translated);
430        
431        // 应用翻译后变为拥有的
432        item.apply_translation("你好世界".to_string());
433        assert!(item.memory_usage() > 0);
434        assert!(item.is_translated);
435    }
436    
437    #[test]
438    fn test_global_memory_manager() {
439        let manager = GlobalMemoryManager::new();
440        
441        let s1 = manager.acquire_string(100);
442        assert_eq!(s1.capacity(), 100);
443        
444        let stats_before = manager.get_stats();
445        assert_eq!(stats_before.strings_acquired, 1);
446        
447        manager.release_string(s1);
448        
449        let stats_after = manager.get_stats();
450        assert_eq!(stats_after.strings_released, 1);
451    }
452    
453    #[test]
454    fn test_memory_stats() {
455        let mut stats = MemoryStats::default();
456        stats.strings_acquired = 100;
457        stats.strings_released = 80;
458        
459        assert_eq!(stats.pool_hit_rate(), 0.8);
460    }
461}