Skip to main content

ccstat_core/
memory_pool.rs

1//! Memory pool for efficient allocation during parsing
2//!
3//! This module provides arena-based allocation to reduce memory fragmentation
4//! and improve performance when processing large JSONL files.
5
6use typed_arena::Arena;
7
8/// Memory pool context that owns the arena
9pub struct MemoryPool {
10    string_arena: Arena<u8>,
11}
12
13impl Default for MemoryPool {
14    fn default() -> Self {
15        Self {
16            string_arena: Arena::new(),
17        }
18    }
19}
20
21impl MemoryPool {
22    /// Create a new memory pool
23    pub fn new() -> Self {
24        Self::default()
25    }
26
27    /// Allocate a string in the arena
28    pub fn alloc_string(&self, s: &str) -> &str {
29        let bytes = s.as_bytes();
30        let allocated = self.string_arena.alloc_extend(bytes.iter().copied());
31        unsafe {
32            // Safety: we just allocated valid UTF-8 bytes
33            std::str::from_utf8_unchecked(allocated)
34        }
35    }
36}
37
38/// Statistics about memory pool usage
39pub struct PoolStats {
40    /// Approximate bytes allocated
41    pub bytes_allocated: usize,
42    /// Number of allocations
43    pub allocation_count: usize,
44}
45
46impl PoolStats {
47    /// Get current pool statistics
48    ///
49    /// Note: This is an approximation as the arena doesn't expose exact metrics
50    pub fn current() -> Self {
51        // In a real implementation, we would track these metrics
52        // For now, return placeholder values
53        Self {
54            bytes_allocated: 0,
55            allocation_count: 0,
56        }
57    }
58}
59
60/// A batch processor that uses arena allocation
61#[allow(dead_code)]
62pub struct ArenaProcessor<'a> {
63    arena: Arena<UsageEntryData<'a>>,
64}
65
66/// Intermediate data structure for arena allocation
67#[derive(Debug)]
68#[allow(dead_code)]
69struct UsageEntryData<'a> {
70    session_id: &'a str,
71    model: &'a str,
72    project: Option<&'a str>,
73}
74
75impl<'a> Default for ArenaProcessor<'a> {
76    fn default() -> Self {
77        Self {
78            arena: Arena::new(),
79        }
80    }
81}
82
83impl<'a> ArenaProcessor<'a> {
84    /// Create a new arena processor
85    pub fn new() -> Self {
86        Self::default()
87    }
88
89    /// Process a batch of JSONL lines using arena allocation
90    pub fn process_batch(&mut self, lines: &[String]) -> Vec<crate::types::UsageEntry> {
91        let mut entries = Vec::with_capacity(lines.len());
92
93        for line in lines {
94            if line.trim().is_empty() {
95                continue;
96            }
97
98            // Parse directly into the final structure
99            // The arena is used internally by serde for temporary allocations
100            match serde_json::from_str::<crate::types::UsageEntry>(line) {
101                Ok(entry) => entries.push(entry),
102                Err(e) => {
103                    tracing::warn!("Failed to parse JSONL entry: {}", e);
104                }
105            }
106        }
107
108        entries
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn test_memory_pool() {
118        let pool = MemoryPool::new();
119
120        let s1 = pool.alloc_string("hello world");
121        let s2 = pool.alloc_string("hello world");
122
123        assert_eq!(s1, "hello world");
124        assert_eq!(s2, "hello world");
125
126        // Different allocations
127        assert_ne!(s1.as_ptr(), s2.as_ptr());
128    }
129
130    #[test]
131    fn test_arena_processor() {
132        let mut processor = ArenaProcessor::new();
133
134        let lines = vec![
135            r#"{"session_id":"test1","timestamp":"2024-01-01T00:00:00Z","model":"claude-3-opus","input_tokens":100,"output_tokens":50,"cache_creation_tokens":10,"cache_read_tokens":5}"#.to_string(),
136            r#"{"session_id":"test2","timestamp":"2024-01-01T01:00:00Z","model":"claude-3-sonnet","input_tokens":200,"output_tokens":100,"cache_creation_tokens":20,"cache_read_tokens":10}"#.to_string(),
137        ];
138
139        let entries = processor.process_batch(&lines);
140        assert_eq!(entries.len(), 2);
141        assert_eq!(entries[0].session_id.as_str(), "test1");
142        assert_eq!(entries[1].session_id.as_str(), "test2");
143    }
144}