Skip to main content

oxihuman_core/
stream_parser.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5//! Streaming byte-level parser for reading structured binary or text data.
6
7/// The parser result.
8#[allow(dead_code)]
9#[derive(Debug, PartialEq)]
10pub enum ParseResult<T> {
11    Ok(T),
12    NeedMore,
13    Error(String),
14}
15
16/// A streaming cursor over a byte buffer.
17#[allow(dead_code)]
18pub struct StreamParser {
19    buf: Vec<u8>,
20    pos: usize,
21    parse_count: u64,
22}
23
24#[allow(dead_code)]
25impl StreamParser {
26    pub fn new() -> Self {
27        Self {
28            buf: Vec::new(),
29            pos: 0,
30            parse_count: 0,
31        }
32    }
33
34    /// Feed more data into the parser buffer.
35    pub fn feed(&mut self, data: &[u8]) {
36        self.buf.extend_from_slice(data);
37    }
38
39    /// Remaining unread bytes.
40    pub fn remaining(&self) -> usize {
41        self.buf.len().saturating_sub(self.pos)
42    }
43
44    /// Total bytes fed so far.
45    pub fn total_fed(&self) -> usize {
46        self.buf.len()
47    }
48
49    /// Current read cursor position.
50    pub fn position(&self) -> usize {
51        self.pos
52    }
53
54    /// Read a single byte.
55    pub fn read_u8(&mut self) -> ParseResult<u8> {
56        if self.pos >= self.buf.len() {
57            return ParseResult::NeedMore;
58        }
59        let b = self.buf[self.pos];
60        self.pos += 1;
61        self.parse_count += 1;
62        ParseResult::Ok(b)
63    }
64
65    /// Read a little-endian u16.
66    pub fn read_u16_le(&mut self) -> ParseResult<u16> {
67        if self.remaining() < 2 {
68            return ParseResult::NeedMore;
69        }
70        let lo = self.buf[self.pos] as u16;
71        let hi = self.buf[self.pos + 1] as u16;
72        self.pos += 2;
73        self.parse_count += 1;
74        ParseResult::Ok(lo | (hi << 8))
75    }
76
77    /// Read a little-endian u32.
78    pub fn read_u32_le(&mut self) -> ParseResult<u32> {
79        if self.remaining() < 4 {
80            return ParseResult::NeedMore;
81        }
82        let b = &self.buf[self.pos..self.pos + 4];
83        let v = u32::from_le_bytes([b[0], b[1], b[2], b[3]]);
84        self.pos += 4;
85        self.parse_count += 1;
86        ParseResult::Ok(v)
87    }
88
89    /// Read `n` raw bytes.
90    pub fn read_bytes(&mut self, n: usize) -> ParseResult<Vec<u8>> {
91        if self.remaining() < n {
92            return ParseResult::NeedMore;
93        }
94        let slice = self.buf[self.pos..self.pos + n].to_vec();
95        self.pos += n;
96        self.parse_count += 1;
97        ParseResult::Ok(slice)
98    }
99
100    /// Read a null-terminated UTF-8 string.
101    pub fn read_cstring(&mut self) -> ParseResult<String> {
102        let start = self.pos;
103        let null_pos = self.buf[start..].iter().position(|&b| b == 0);
104        match null_pos {
105            None => ParseResult::NeedMore,
106            Some(rel) => {
107                let s = std::str::from_utf8(&self.buf[start..start + rel])
108                    .map(|s| s.to_string())
109                    .unwrap_or_else(|e| format!("<utf8 error: {e}>"));
110                self.pos = start + rel + 1;
111                self.parse_count += 1;
112                ParseResult::Ok(s)
113            }
114        }
115    }
116
117    /// Skip `n` bytes.
118    pub fn skip(&mut self, n: usize) -> bool {
119        if self.remaining() < n {
120            return false;
121        }
122        self.pos += n;
123        true
124    }
125
126    /// Reset the cursor to the beginning (keeps buffer data).
127    pub fn reset_cursor(&mut self) {
128        self.pos = 0;
129    }
130
131    /// Discard already-consumed bytes to free memory.
132    pub fn compact(&mut self) {
133        self.buf.drain(..self.pos);
134        self.pos = 0;
135    }
136
137    /// Number of successful parse calls.
138    pub fn parse_count(&self) -> u64 {
139        self.parse_count
140    }
141}
142
143impl Default for StreamParser {
144    fn default() -> Self {
145        Self::new()
146    }
147}
148
149pub fn new_stream_parser() -> StreamParser {
150    StreamParser::new()
151}
152
153pub fn sp_feed(sp: &mut StreamParser, data: &[u8]) {
154    sp.feed(data);
155}
156
157pub fn sp_read_u8(sp: &mut StreamParser) -> ParseResult<u8> {
158    sp.read_u8()
159}
160
161pub fn sp_read_u32_le(sp: &mut StreamParser) -> ParseResult<u32> {
162    sp.read_u32_le()
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    #[test]
170    fn empty_parser_needs_more() {
171        let mut sp = new_stream_parser();
172        assert_eq!(sp_read_u8(&mut sp), ParseResult::NeedMore);
173    }
174
175    #[test]
176    fn feed_and_read_u8() {
177        let mut sp = new_stream_parser();
178        sp_feed(&mut sp, &[0xAB]);
179        assert_eq!(sp_read_u8(&mut sp), ParseResult::Ok(0xAB));
180    }
181
182    #[test]
183    fn read_u16_le() {
184        let mut sp = new_stream_parser();
185        sp.feed(&[0x01, 0x02]);
186        assert_eq!(sp.read_u16_le(), ParseResult::Ok(0x0201));
187    }
188
189    #[test]
190    fn read_u32_le() {
191        let mut sp = new_stream_parser();
192        sp.feed(&[0x01, 0x00, 0x00, 0x00]);
193        assert_eq!(sp_read_u32_le(&mut sp), ParseResult::Ok(1));
194    }
195
196    #[test]
197    fn read_cstring() {
198        let mut sp = new_stream_parser();
199        sp.feed(b"hello\0");
200        assert_eq!(sp.read_cstring(), ParseResult::Ok("hello".to_string()));
201    }
202
203    #[test]
204    fn partial_u32_needs_more() {
205        let mut sp = new_stream_parser();
206        sp.feed(&[0x01, 0x02]);
207        assert_eq!(sp_read_u32_le(&mut sp), ParseResult::NeedMore);
208    }
209
210    #[test]
211    fn skip_bytes() {
212        let mut sp = new_stream_parser();
213        sp.feed(&[0, 0, 42]);
214        assert!(sp.skip(2));
215        assert_eq!(sp_read_u8(&mut sp), ParseResult::Ok(42));
216    }
217
218    #[test]
219    fn compact_frees_consumed() {
220        let mut sp = new_stream_parser();
221        sp.feed(&[1, 2, 3]);
222        sp_read_u8(&mut sp);
223        sp_read_u8(&mut sp);
224        sp.compact();
225        assert_eq!(sp.total_fed(), 1);
226        assert_eq!(sp.position(), 0);
227    }
228
229    #[test]
230    fn parse_count_increments() {
231        let mut sp = new_stream_parser();
232        sp.feed(&[1, 2, 3]);
233        sp_read_u8(&mut sp);
234        sp_read_u8(&mut sp);
235        assert_eq!(sp.parse_count(), 2);
236    }
237
238    #[test]
239    fn read_bytes_exact() {
240        let mut sp = new_stream_parser();
241        sp.feed(&[10, 20, 30]);
242        assert_eq!(sp.read_bytes(3), ParseResult::Ok(vec![10, 20, 30]));
243    }
244}