1use crate::{Position, Result};
7use std::borrow::Cow;
8
9#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct ZeroString<'a> {
12 data: Cow<'a, str>,
13}
14
15impl<'a> ZeroString<'a> {
16 pub fn borrowed(s: &'a str) -> Self {
18 Self {
19 data: Cow::Borrowed(s),
20 }
21 }
22
23 pub fn owned(s: String) -> Self {
25 Self {
26 data: Cow::Owned(s),
27 }
28 }
29
30 pub fn as_str(&self) -> &str {
32 &self.data
33 }
34
35 pub fn into_owned(self) -> String {
37 self.data.into_owned()
38 }
39
40 pub fn is_borrowed(&self) -> bool {
42 matches!(self.data, Cow::Borrowed(_))
43 }
44
45 pub fn len(&self) -> usize {
47 self.data.len()
48 }
49
50 pub fn is_empty(&self) -> bool {
52 self.data.is_empty()
53 }
54}
55
56impl<'a> From<&'a str> for ZeroString<'a> {
57 fn from(s: &'a str) -> Self {
58 Self::borrowed(s)
59 }
60}
61
62impl<'a> From<String> for ZeroString<'a> {
63 fn from(s: String) -> Self {
64 Self::owned(s)
65 }
66}
67
68impl<'a> AsRef<str> for ZeroString<'a> {
69 fn as_ref(&self) -> &str {
70 &self.data
71 }
72}
73
74#[derive(Debug, Clone, PartialEq)]
76pub enum ZeroTokenType<'a> {
77 StreamStart,
79 StreamEnd,
81 DocumentStart,
83 DocumentEnd,
85 BlockSequenceStart,
87 BlockMappingStart,
89 BlockEnd,
91 FlowSequenceStart,
93 FlowSequenceEnd,
95 FlowMappingStart,
97 FlowMappingEnd,
99 BlockEntry,
101 FlowEntry,
103 Key,
105 Value,
107 Scalar(ZeroString<'a>, crate::scanner::QuoteStyle),
109 BlockScalarLiteral(ZeroString<'a>),
111 BlockScalarFolded(ZeroString<'a>),
113 Anchor(ZeroString<'a>),
115 Alias(ZeroString<'a>),
117 Tag(ZeroString<'a>),
119 Comment(ZeroString<'a>),
121}
122
123#[derive(Debug, Clone, PartialEq)]
125pub struct ZeroToken<'a> {
126 pub token_type: ZeroTokenType<'a>,
128 pub start_position: Position,
130 pub end_position: Position,
132}
133
134impl<'a> ZeroToken<'a> {
135 pub fn new(
137 token_type: ZeroTokenType<'a>,
138 start_position: Position,
139 end_position: Position,
140 ) -> Self {
141 Self {
142 token_type,
143 start_position,
144 end_position,
145 }
146 }
147
148 pub fn simple(token_type: ZeroTokenType<'a>, position: Position) -> Self {
150 Self::new(token_type, position, position)
151 }
152
153 pub fn into_owned(self) -> crate::scanner::Token {
155 use crate::scanner::{Token, TokenType};
156
157 let token_type = match self.token_type {
158 ZeroTokenType::StreamStart => TokenType::StreamStart,
159 ZeroTokenType::StreamEnd => TokenType::StreamEnd,
160 ZeroTokenType::DocumentStart => TokenType::DocumentStart,
161 ZeroTokenType::DocumentEnd => TokenType::DocumentEnd,
162 ZeroTokenType::BlockSequenceStart => TokenType::BlockSequenceStart,
163 ZeroTokenType::BlockMappingStart => TokenType::BlockMappingStart,
164 ZeroTokenType::BlockEnd => TokenType::BlockEnd,
165 ZeroTokenType::FlowSequenceStart => TokenType::FlowSequenceStart,
166 ZeroTokenType::FlowSequenceEnd => TokenType::FlowSequenceEnd,
167 ZeroTokenType::FlowMappingStart => TokenType::FlowMappingStart,
168 ZeroTokenType::FlowMappingEnd => TokenType::FlowMappingEnd,
169 ZeroTokenType::BlockEntry => TokenType::BlockEntry,
170 ZeroTokenType::FlowEntry => TokenType::FlowEntry,
171 ZeroTokenType::Key => TokenType::Key,
172 ZeroTokenType::Value => TokenType::Value,
173 ZeroTokenType::Scalar(s, style) => TokenType::Scalar(s.into_owned(), style),
174 ZeroTokenType::BlockScalarLiteral(s) => TokenType::BlockScalarLiteral(s.into_owned()),
175 ZeroTokenType::BlockScalarFolded(s) => TokenType::BlockScalarFolded(s.into_owned()),
176 ZeroTokenType::Anchor(s) => TokenType::Anchor(s.into_owned()),
177 ZeroTokenType::Alias(s) => TokenType::Alias(s.into_owned()),
178 ZeroTokenType::Tag(s) => TokenType::Tag(s.into_owned()),
179 ZeroTokenType::Comment(s) => TokenType::Comment(s.into_owned()),
180 };
181
182 Token::new(token_type, self.start_position, self.end_position)
183 }
184}
185
186pub struct TokenPool<'a> {
188 tokens: Vec<ZeroToken<'a>>,
190 index: usize,
192}
193
194impl<'a> TokenPool<'a> {
195 pub fn with_capacity(capacity: usize) -> Self {
197 Self {
198 tokens: Vec::with_capacity(capacity),
199 index: 0,
200 }
201 }
202
203 pub fn get_token(&mut self) -> &mut ZeroToken<'a> {
205 if self.index >= self.tokens.len() {
206 self.tokens.push(ZeroToken::simple(
208 ZeroTokenType::StreamStart,
209 Position::start(),
210 ));
211 }
212
213 let token = &mut self.tokens[self.index];
214 self.index += 1;
215 token
216 }
217
218 pub fn reset(&mut self) {
220 self.index = 0;
221 }
222
223 pub fn allocated_count(&self) -> usize {
225 self.tokens.len()
226 }
227
228 pub fn used_count(&self) -> usize {
230 self.index
231 }
232}
233
234pub struct ZeroScanner<'a> {
236 input: &'a str,
238 pub position: Position,
240 char_index: usize,
242 char_indices: Vec<(usize, char)>,
244 token_pool: TokenPool<'a>,
246}
247
248impl<'a> ZeroScanner<'a> {
249 pub fn new(input: &'a str) -> Self {
251 let char_indices: Vec<(usize, char)> = input.char_indices().collect();
252
253 Self {
254 input,
255 position: Position::start(),
256 char_index: 0,
257 char_indices,
258 token_pool: TokenPool::with_capacity(128), }
260 }
261
262 pub fn current_char(&self) -> Option<char> {
264 self.char_indices.get(self.char_index).map(|(_, ch)| *ch)
265 }
266
267 pub fn advance(&mut self) -> Option<char> {
269 if let Some((_byte_index, ch)) = self.char_indices.get(self.char_index) {
270 self.position = self.position.advance(*ch);
271 self.char_index += 1;
272 self.char_indices.get(self.char_index).map(|(_, ch)| *ch)
273 } else {
274 None
275 }
276 }
277
278 pub fn peek_char(&self, offset: isize) -> Option<char> {
280 if offset >= 0 {
281 let index = self.char_index + offset as usize;
282 self.char_indices.get(index).map(|(_, ch)| *ch)
283 } else {
284 let offset_abs = (-offset) as usize;
285 if self.char_index >= offset_abs {
286 let index = self.char_index - offset_abs;
287 self.char_indices.get(index).map(|(_, ch)| *ch)
288 } else {
289 None
290 }
291 }
292 }
293
294 pub fn slice_from(&self, start_position: Position) -> Result<&'a str> {
296 let start_byte = start_position.index;
297 let end_byte = self.position.index;
298
299 if start_byte <= end_byte && end_byte <= self.input.len() {
300 Ok(&self.input[start_byte..end_byte])
301 } else {
302 Err(crate::Error::parse(
303 self.position,
304 "Invalid slice bounds".to_string(),
305 ))
306 }
307 }
308
309 pub fn slice_between(&self, start: Position, end: Position) -> Result<&'a str> {
311 let start_byte = start.index;
312 let end_byte = end.index;
313
314 if start_byte <= end_byte && end_byte <= self.input.len() {
315 Ok(&self.input[start_byte..end_byte])
316 } else {
317 Err(crate::Error::parse(
318 self.position,
319 "Invalid slice bounds".to_string(),
320 ))
321 }
322 }
323
324 pub fn reset(&mut self) {
326 self.position = Position::start();
327 self.char_index = 0;
328 self.token_pool.reset();
329 }
330
331 pub fn stats(&self) -> ScannerStats {
333 ScannerStats {
334 input_length: self.input.len(),
335 chars_processed: self.char_index,
336 tokens_allocated: self.token_pool.allocated_count(),
337 tokens_used: self.token_pool.used_count(),
338 position: self.position,
339 }
340 }
341
342 pub fn scan_plain_scalar_zero_copy(&mut self) -> Result<ZeroToken<'a>> {
344 let start_pos = self.position;
345
346 while let Some(ch) = self.current_char() {
348 match ch {
350 '\n' | '\r' => break,
351 ':' if self.peek_char(1).map_or(true, |c| c.is_whitespace()) => break,
352 '#' if self.char_index == 0
353 || self.peek_char(-1).map_or(false, |c| c.is_whitespace()) =>
354 {
355 break;
356 }
357 ',' | '[' | ']' | '{' | '}' => break,
358 _ => {
359 self.advance();
360 }
361 }
362 }
363
364 let slice = self.slice_from(start_pos)?;
366 let trimmed_slice = slice.trim_end();
367
368 let zero_string = if trimmed_slice.len() == slice.len() {
370 ZeroString::borrowed(trimmed_slice)
372 } else {
373 ZeroString::owned(trimmed_slice.to_string())
375 };
376
377 Ok(ZeroToken::new(
378 ZeroTokenType::Scalar(zero_string, crate::scanner::QuoteStyle::Plain),
379 start_pos,
380 self.position,
381 ))
382 }
383
384 pub fn scan_identifier_zero_copy(&mut self) -> Result<ZeroString<'a>> {
386 let start_pos = self.position;
387
388 while let Some(ch) = self.current_char() {
390 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
391 self.advance();
392 } else {
393 break;
394 }
395 }
396
397 let slice = self.slice_from(start_pos)?;
398 Ok(ZeroString::borrowed(slice))
399 }
400
401 pub fn skip_whitespace(&mut self) {
403 while let Some(ch) = self.current_char() {
404 if ch == ' ' || ch == '\t' {
405 self.advance();
406 } else {
407 break;
408 }
409 }
410 }
411}
412
413#[derive(Debug, Clone)]
415pub struct ScannerStats {
416 pub input_length: usize,
418 pub chars_processed: usize,
420 pub tokens_allocated: usize,
422 pub tokens_used: usize,
424 pub position: Position,
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431
432 #[test]
433 fn test_zero_string_borrowed() {
434 let s = "hello world";
435 let zs = ZeroString::borrowed(s);
436
437 assert!(zs.is_borrowed());
438 assert_eq!(zs.as_str(), "hello world");
439 assert_eq!(zs.len(), 11);
440 assert!(!zs.is_empty());
441 }
442
443 #[test]
444 fn test_zero_string_owned() {
445 let s = String::from("hello world");
446 let zs = ZeroString::owned(s);
447
448 assert!(!zs.is_borrowed());
449 assert_eq!(zs.as_str(), "hello world");
450 assert_eq!(zs.len(), 11);
451 }
452
453 #[test]
454 fn test_zero_scanner_basic() {
455 let input = "hello: world";
456 let mut scanner = ZeroScanner::new(input);
457
458 assert_eq!(scanner.current_char(), Some('h'));
459 assert_eq!(scanner.advance(), Some('e'));
460 assert_eq!(scanner.current_char(), Some('e'));
461
462 assert_eq!(scanner.peek_char(1), Some('l'));
464 assert_eq!(scanner.peek_char(-1), Some('h'));
465 }
466
467 #[test]
468 fn test_zero_scanner_slicing() {
469 let input = "hello: world";
470 let mut scanner = ZeroScanner::new(input);
471
472 let start = scanner.position;
473
474 for _ in 0..5 {
476 scanner.advance();
477 }
478
479 let slice = scanner.slice_from(start).unwrap();
480 assert_eq!(slice, "hello");
481 }
482
483 #[test]
484 fn test_token_pool() {
485 let mut pool = TokenPool::with_capacity(2);
486
487 assert_eq!(pool.allocated_count(), 0);
488 assert_eq!(pool.used_count(), 0);
489
490 let _token1 = pool.get_token();
491 assert_eq!(pool.allocated_count(), 1);
492 assert_eq!(pool.used_count(), 1);
493
494 let _token2 = pool.get_token();
495 assert_eq!(pool.allocated_count(), 2);
496 assert_eq!(pool.used_count(), 2);
497
498 pool.reset();
499 assert_eq!(pool.allocated_count(), 2); assert_eq!(pool.used_count(), 0); }
502
503 #[test]
504 fn test_zero_copy_scalar_scanning() {
505 let input = "hello world: test";
506 let mut scanner = ZeroScanner::new(input);
507
508 let token = scanner.scan_plain_scalar_zero_copy().unwrap();
509
510 if let ZeroTokenType::Scalar(value, _) = token.token_type {
511 assert_eq!(value.as_str(), "hello world");
512 assert!(value.is_borrowed()); } else {
514 panic!("Expected scalar token");
515 }
516 }
517
518 #[test]
519 fn test_zero_copy_identifier_scanning() {
520 let input = "my_anchor_123 ";
521 let mut scanner = ZeroScanner::new(input);
522
523 let identifier = scanner.scan_identifier_zero_copy().unwrap();
524 assert_eq!(identifier.as_str(), "my_anchor_123");
525 assert!(identifier.is_borrowed()); }
527
528 #[test]
529 fn test_zero_copy_trimming() {
530 let input = "hello \n";
531 let mut scanner = ZeroScanner::new(input);
532
533 let token = scanner.scan_plain_scalar_zero_copy().unwrap();
534
535 if let ZeroTokenType::Scalar(value, _) = token.token_type {
536 assert_eq!(value.as_str(), "hello");
537 assert!(!value.is_borrowed());
539 } else {
540 panic!("Expected scalar token");
541 }
542 }
543}