Skip to main content

oxihuman_core/
arena_str.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5/// An arena-based string allocator that stores strings in contiguous memory.
6#[allow(dead_code)]
7#[derive(Debug, Clone)]
8pub struct ArenaStr {
9    buffer: Vec<u8>,
10    offsets: Vec<(usize, usize)>,
11}
12
13#[allow(dead_code)]
14impl ArenaStr {
15    pub fn new() -> Self {
16        Self {
17            buffer: Vec::new(),
18            offsets: Vec::new(),
19        }
20    }
21
22    pub fn with_capacity(capacity: usize) -> Self {
23        Self {
24            buffer: Vec::with_capacity(capacity),
25            offsets: Vec::new(),
26        }
27    }
28
29    pub fn alloc(&mut self, s: &str) -> usize {
30        let start = self.buffer.len();
31        self.buffer.extend_from_slice(s.as_bytes());
32        let id = self.offsets.len();
33        self.offsets.push((start, s.len()));
34        id
35    }
36
37    pub fn get(&self, id: usize) -> Option<&str> {
38        self.offsets
39            .get(id)
40            .and_then(|&(start, len)| std::str::from_utf8(&self.buffer[start..start + len]).ok())
41    }
42
43    pub fn len(&self) -> usize {
44        self.offsets.len()
45    }
46
47    pub fn is_empty(&self) -> bool {
48        self.offsets.is_empty()
49    }
50
51    pub fn total_bytes(&self) -> usize {
52        self.buffer.len()
53    }
54
55    pub fn clear(&mut self) {
56        self.buffer.clear();
57        self.offsets.clear();
58    }
59
60    pub fn contains(&self, s: &str) -> bool {
61        self.offsets
62            .iter()
63            .any(|&(start, len)| &self.buffer[start..start + len] == s.as_bytes())
64    }
65
66    pub fn avg_len(&self) -> f32 {
67        if self.offsets.is_empty() {
68            return 0.0;
69        }
70        self.buffer.len() as f32 / self.offsets.len() as f32
71    }
72}
73
74impl Default for ArenaStr {
75    fn default() -> Self {
76        Self::new()
77    }
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83
84    #[test]
85    fn test_new() {
86        let a = ArenaStr::new();
87        assert!(a.is_empty());
88        assert_eq!(a.total_bytes(), 0);
89    }
90
91    #[test]
92    fn test_alloc_get() {
93        let mut a = ArenaStr::new();
94        let id = a.alloc("hello");
95        assert_eq!(a.get(id), Some("hello"));
96    }
97
98    #[test]
99    fn test_multiple_allocs() {
100        let mut a = ArenaStr::new();
101        let id0 = a.alloc("foo");
102        let id1 = a.alloc("bar");
103        let id2 = a.alloc("baz");
104        assert_eq!(a.get(id0), Some("foo"));
105        assert_eq!(a.get(id1), Some("bar"));
106        assert_eq!(a.get(id2), Some("baz"));
107        assert_eq!(a.len(), 3);
108    }
109
110    #[test]
111    fn test_get_out_of_bounds() {
112        let a = ArenaStr::new();
113        assert_eq!(a.get(0), None);
114    }
115
116    #[test]
117    fn test_total_bytes() {
118        let mut a = ArenaStr::new();
119        a.alloc("abc");
120        a.alloc("de");
121        assert_eq!(a.total_bytes(), 5);
122    }
123
124    #[test]
125    fn test_clear() {
126        let mut a = ArenaStr::new();
127        a.alloc("test");
128        a.clear();
129        assert!(a.is_empty());
130        assert_eq!(a.total_bytes(), 0);
131    }
132
133    #[test]
134    fn test_contains() {
135        let mut a = ArenaStr::new();
136        a.alloc("hello");
137        assert!(a.contains("hello"));
138        assert!(!a.contains("world"));
139    }
140
141    #[test]
142    fn test_with_capacity() {
143        let a = ArenaStr::with_capacity(1024);
144        assert!(a.is_empty());
145    }
146
147    #[test]
148    fn test_avg_len() {
149        let mut a = ArenaStr::new();
150        a.alloc("ab");
151        a.alloc("cdef");
152        assert!((a.avg_len() - 3.0).abs() < 1e-6);
153    }
154
155    #[test]
156    fn test_avg_len_empty() {
157        let a = ArenaStr::new();
158        assert!((a.avg_len()).abs() < 1e-6);
159    }
160}