Skip to main content

fionn_stream/skiptape/
processor.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2//! SIMD-JSONL Skip Tape Processor
3//!
4//! This module provides the core processing engine that integrates SIMD-accelerated
5//! JSON parsing with schema-aware filtering to produce skip tapes.
6
7use crate::skiptape::error::{Result, SkipTapeError};
8use crate::skiptape::schema::CompiledSchema;
9use crate::skiptape::tape::{SkipNode, SkipTape};
10use bumpalo::Bump;
11
12/// Main processor for SIMD-JSONL skip tape generation
13pub struct SkipTapeProcessor {
14    /// Memory arena for zero-allocation processing
15    arena: Bump,
16}
17
18impl SkipTapeProcessor {
19    /// Create a new skip tape processor
20    #[must_use]
21    pub fn new() -> Self {
22        Self { arena: Bump::new() }
23    }
24
25    /// Process a JSON array with schema filtering using SIMD structural detection
26    ///
27    /// # Errors
28    /// Returns an error if parsing or processing fails
29    pub fn process_json_array(
30        &mut self,
31        json_array: &str,
32        schema: &CompiledSchema,
33    ) -> Result<SkipTape<'_>> {
34        // Reset arena for this processing
35        self.arena.reset();
36
37        // Estimate capacity based on array length
38        let estimated_nodes = json_array.len() / 5; // Rough estimate for arrays
39        let mut skip_tape = SkipTape::with_capacity(&self.arena, estimated_nodes);
40
41        // Set original size for metrics
42        skip_tape.metadata.original_size = json_array.len();
43
44        // Process the JSON array with SIMD acceleration
45        Self::process_json_with_schema(json_array, schema, &mut skip_tape)?;
46
47        // Calculate schema match ratio using f64 for precision
48        skip_tape.metadata.schema_match_ratio = if skip_tape.metadata.node_count > 0 {
49            let node_count_f64 =
50                f64::from(u32::try_from(skip_tape.metadata.node_count).unwrap_or(u32::MAX));
51            let total_f64 = f64::from(
52                u32::try_from(skip_tape.metadata.node_count + skip_tape.metadata.skipped_count)
53                    .unwrap_or(u32::MAX),
54            );
55            node_count_f64 / total_f64
56        } else {
57            0.0
58        };
59
60        Ok(skip_tape)
61    }
62
63    /// Reset the processor for a new operation
64    pub fn reset(&mut self) {
65        self.arena.reset();
66    }
67
68    /// Process a single JSON line with schema filtering
69    ///
70    /// # Errors
71    /// Returns an error if parsing or processing fails
72    pub fn process_line(
73        &mut self,
74        json_line: &str,
75        schema: &CompiledSchema,
76    ) -> Result<SkipTape<'_>> {
77        // Reset arena for this line
78        self.arena.reset();
79
80        // Estimate capacity based on line length
81        let estimated_nodes = json_line.len() / 10; // Rough estimate
82        let mut skip_tape = SkipTape::with_capacity(&self.arena, estimated_nodes);
83
84        // Set original size for metrics
85        skip_tape.metadata.original_size = json_line.len();
86
87        // Process the JSON line with SIMD acceleration
88        Self::process_json_with_schema(json_line, schema, &mut skip_tape)?;
89
90        // Calculate schema match ratio using f64 for precision
91        skip_tape.metadata.schema_match_ratio = if skip_tape.metadata.node_count > 0 {
92            let node_count_f64 =
93                f64::from(u32::try_from(skip_tape.metadata.node_count).unwrap_or(u32::MAX));
94            let total_f64 = f64::from(
95                u32::try_from(skip_tape.metadata.node_count + skip_tape.metadata.skipped_count)
96                    .unwrap_or(u32::MAX),
97            );
98            node_count_f64 / total_f64
99        } else {
100            0.0
101        };
102
103        Ok(skip_tape)
104    }
105
106    /// Process JSON with schema-aware SIMD parsing
107    fn process_json_with_schema(
108        json: &str,
109        schema: &CompiledSchema,
110        skip_tape: &mut SkipTape<'_>,
111    ) -> Result<()> {
112        let bytes = json.as_bytes();
113        let mut index = 0;
114        let mut depth = 0;
115        let mut path_stack = Vec::new();
116
117        while index < bytes.len() {
118            match bytes[index] {
119                b'{' => {
120                    // Object start - check if we should include this object
121                    let current_path = Self::build_path_string(&path_stack);
122                    if schema.should_include_object(&current_path) {
123                        skip_tape.add_node(SkipNode::object_start().with_depth(depth));
124                    } else {
125                        // Skip this entire object
126                        index = Self::skip_object(bytes, index)?;
127                        skip_tape.metadata.skipped_count += 1;
128                        skip_tape.add_node(SkipNode::skip_marker().with_depth(depth));
129                        continue;
130                    }
131                    depth += 1;
132                }
133                b'}' => {
134                    depth = depth.saturating_sub(1);
135                    skip_tape.add_node(SkipNode::object_end().with_depth(depth));
136                }
137                b'[' => {
138                    // Array start - similar logic to objects
139                    skip_tape.add_node(SkipNode::array_start().with_depth(depth));
140                    depth += 1;
141                }
142                b']' => {
143                    depth = depth.saturating_sub(1);
144                    skip_tape.add_node(SkipNode::array_end().with_depth(depth));
145                }
146                b'"' => {
147                    // String value - check if it's a field name or value
148                    if Self::is_field_name(bytes, index) {
149                        let field_name = Self::parse_string(bytes, &mut index)?;
150                        path_stack.push(field_name.clone());
151
152                        // Check if this field should be included
153                        let current_path = Self::build_path_string(&path_stack);
154                        if !schema.matches_path(&current_path) {
155                            // Skip the field value
156                            Self::skip_value(bytes, &mut index)?;
157                            path_stack.pop();
158                            skip_tape.metadata.skipped_count += 1;
159                            continue;
160                        }
161                    } else {
162                        // Regular string value
163                        let string_value = Self::parse_string(bytes, &mut index)?;
164                        let offset = skip_tape.strings.add_string(&string_value);
165                        let len = u16::try_from(string_value.len()).unwrap_or(u16::MAX);
166                        skip_tape.add_node(SkipNode::string(offset, len).with_depth(depth));
167                    }
168                }
169                b't' | b'f' => {
170                    // Boolean value
171                    let value = Self::parse_boolean(bytes, &mut index)?;
172                    skip_tape.add_node(SkipNode::bool(value).with_depth(depth));
173                }
174                b'n' => {
175                    // Null value
176                    Self::expect_keyword(bytes, &mut index, b"null")?;
177                    skip_tape.add_node(SkipNode::null().with_depth(depth));
178                }
179                b'0'..=b'9' | b'-' => {
180                    // Number value
181                    let number_value = Self::parse_number(bytes, &mut index)?;
182                    skip_tape.add_node(SkipNode::number(number_value).with_depth(depth));
183                }
184                b',' | b':' | b' ' | b'\t' | b'\n' | b'\r' => {
185                    // Structural or whitespace - skip
186                }
187                _ => {
188                    return Err(SkipTapeError::ParseError(format!(
189                        "Unexpected character: {}",
190                        bytes[index] as char
191                    )));
192                }
193            }
194            index += 1;
195        }
196
197        Ok(())
198    }
199
200    /// Check if the current position contains a field name
201    fn is_field_name(bytes: &[u8], index: usize) -> bool {
202        // Look backward to see if we're after a '{' or ','
203        for i in (0..index).rev() {
204            match bytes[i] {
205                b'{' | b',' => return true,
206                b' ' | b'\t' | b'\n' | b'\r' => {}
207                _ => return false,
208            }
209        }
210        false
211    }
212
213    /// Parse a JSON string and return the string value
214    fn parse_string(bytes: &[u8], index: &mut usize) -> Result<String> {
215        // Simple string parsing - in a real implementation this would handle escapes
216        let start = *index + 1; // Skip opening quote
217        let mut end = start;
218
219        while end < bytes.len() && bytes[end] != b'"' {
220            if bytes[end] == b'\\' {
221                end += 1; // Skip escape sequence
222            }
223            end += 1;
224        }
225
226        if end >= bytes.len() {
227            return Err(SkipTapeError::ParseError("Unterminated string".to_string()));
228        }
229
230        let string_bytes = &bytes[start..end];
231        let string_value = std::str::from_utf8(string_bytes)
232            .map_err(|e| SkipTapeError::ParseError(format!("Invalid UTF-8 in string: {e}")))?
233            .to_string();
234
235        *index = end; // Update index to after closing quote
236        Ok(string_value)
237    }
238
239    /// Parse a boolean value
240    fn parse_boolean(bytes: &[u8], index: &mut usize) -> Result<bool> {
241        if bytes[*index..].starts_with(b"true") {
242            *index += 3;
243            Ok(true)
244        } else if bytes[*index..].starts_with(b"false") {
245            *index += 4;
246            Ok(false)
247        } else {
248            Err(SkipTapeError::ParseError("Invalid boolean".to_string()))
249        }
250    }
251
252    /// Parse a number value
253    fn parse_number(bytes: &[u8], index: &mut usize) -> Result<f64> {
254        let start = *index;
255        while *index < bytes.len()
256            && (bytes[*index].is_ascii_digit()
257                || bytes[*index] == b'.'
258                || bytes[*index] == b'-'
259                || bytes[*index] == b'+'
260                || bytes[*index] == b'e'
261                || bytes[*index] == b'E')
262        {
263            *index += 1;
264        }
265
266        let number_str = std::str::from_utf8(&bytes[start..*index])
267            .map_err(|e| SkipTapeError::ParseError(format!("Invalid UTF-8 in number: {e}")))?;
268
269        number_str
270            .parse()
271            .map_err(|e| SkipTapeError::ParseError(format!("Invalid number: {e}")))
272    }
273
274    /// Skip a JSON value
275    fn skip_value(bytes: &[u8], index: &mut usize) -> Result<()> {
276        match bytes[*index] {
277            b'"' => {
278                Self::parse_string(bytes, index)?;
279            }
280            b't' | b'f' => {
281                Self::parse_boolean(bytes, index)?;
282            }
283            b'n' => {
284                Self::expect_keyword(bytes, index, b"null")?;
285            }
286            b'0'..=b'9' | b'-' => {
287                Self::parse_number(bytes, index)?;
288            }
289            b'{' => {
290                *index = Self::skip_object(bytes, *index)?;
291            }
292            b'[' => {
293                *index = Self::skip_array(bytes, *index)?;
294            }
295            _ => {
296                return Err(SkipTapeError::ParseError(format!(
297                    "Unexpected character in value: {}",
298                    bytes[*index] as char
299                )));
300            }
301        }
302        Ok(())
303    }
304
305    /// Skip an entire object
306    fn skip_object(bytes: &[u8], start_index: usize) -> Result<usize> {
307        let mut index = start_index;
308        let mut depth = 0;
309
310        loop {
311            if index >= bytes.len() {
312                return Err(SkipTapeError::ParseError("Unterminated object".to_string()));
313            }
314
315            match bytes[index] {
316                b'{' => depth += 1,
317                b'}' => {
318                    depth -= 1;
319                    if depth == 0 {
320                        return Ok(index);
321                    }
322                }
323                b'"' => {
324                    // Skip string
325                    while index < bytes.len() {
326                        if bytes[index] == b'"' && (index == 0 || bytes[index - 1] != b'\\') {
327                            break;
328                        }
329                        index += 1;
330                    }
331                }
332                _ => {} // Skip other characters
333            }
334            index += 1;
335        }
336    }
337
338    /// Skip an entire array
339    fn skip_array(bytes: &[u8], start_index: usize) -> Result<usize> {
340        let mut index = start_index;
341        let mut depth = 0;
342
343        loop {
344            if index >= bytes.len() {
345                return Err(SkipTapeError::ParseError("Unterminated array".to_string()));
346            }
347
348            match bytes[index] {
349                b'[' => depth += 1,
350                b']' => {
351                    depth -= 1;
352                    if depth == 0 {
353                        return Ok(index);
354                    }
355                }
356                b'"' => {
357                    // Skip string
358                    while index < bytes.len() {
359                        if bytes[index] == b'"' && (index == 0 || bytes[index - 1] != b'\\') {
360                            break;
361                        }
362                        index += 1;
363                    }
364                }
365                _ => {} // Skip other characters
366            }
367            index += 1;
368        }
369    }
370
371    /// Expect a specific keyword
372    fn expect_keyword(bytes: &[u8], index: &mut usize, keyword: &[u8]) -> Result<()> {
373        if bytes[*index..].starts_with(keyword) {
374            *index += keyword.len() - 1; // -1 because caller will +1
375            Ok(())
376        } else {
377            Err(SkipTapeError::ParseError(format!(
378                "Expected keyword: {}",
379                std::str::from_utf8(keyword).unwrap_or("invalid")
380            )))
381        }
382    }
383
384    /// Build a path string from the path stack
385    fn build_path_string(path_stack: &[String]) -> String {
386        path_stack.join(".")
387    }
388}
389
390impl Default for SkipTapeProcessor {
391    fn default() -> Self {
392        Self::new()
393    }
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399    use crate::skiptape::schema::CompiledSchema;
400
401    #[test]
402    fn test_skip_tape_processor_new() {
403        let processor = SkipTapeProcessor::new();
404        assert!(std::mem::size_of_val(&processor) > 0);
405    }
406
407    #[test]
408    fn test_skip_tape_processor_default() {
409        let processor = SkipTapeProcessor::default();
410        assert!(std::mem::size_of_val(&processor) > 0);
411    }
412
413    #[test]
414    fn test_skip_tape_processor_reset() {
415        let mut processor = SkipTapeProcessor::new();
416        processor.reset();
417    }
418
419    #[test]
420    fn test_process_line_simple() {
421        let mut processor = SkipTapeProcessor::new();
422        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
423        let result = processor.process_line(r#"{"name": "test"}"#, &schema);
424        assert!(result.is_ok());
425    }
426
427    #[test]
428    fn test_process_line_with_numbers() {
429        let mut processor = SkipTapeProcessor::new();
430        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
431        let result = processor.process_line(r#"{"value": 42}"#, &schema);
432        assert!(result.is_ok());
433    }
434
435    #[test]
436    fn test_process_line_with_boolean() {
437        let mut processor = SkipTapeProcessor::new();
438        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
439        let result = processor.process_line(r#"{"active": true}"#, &schema);
440        assert!(result.is_ok());
441    }
442
443    #[test]
444    fn test_process_line_with_null() {
445        let mut processor = SkipTapeProcessor::new();
446        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
447        let result = processor.process_line(r#"{"value": null}"#, &schema);
448        assert!(result.is_ok());
449    }
450
451    #[test]
452    fn test_process_line_with_array() {
453        let mut processor = SkipTapeProcessor::new();
454        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
455        let result = processor.process_line(r#"{"items": [1, 2, 3]}"#, &schema);
456        assert!(result.is_ok());
457    }
458
459    #[test]
460    fn test_process_line_with_nested_object() {
461        let mut processor = SkipTapeProcessor::new();
462        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
463        let result = processor.process_line(r#"{"user": {"name": "test"}}"#, &schema);
464        assert!(result.is_ok());
465    }
466
467    #[test]
468    fn test_process_json_array() {
469        let mut processor = SkipTapeProcessor::new();
470        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
471        let result = processor.process_json_array(r#"[{"a": 1}, {"b": 2}]"#, &schema);
472        assert!(result.is_ok());
473    }
474
475    #[test]
476    fn test_process_line_negative_number() {
477        let mut processor = SkipTapeProcessor::new();
478        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
479        let result = processor.process_line(r#"{"value": -123}"#, &schema);
480        assert!(result.is_ok());
481    }
482
483    #[test]
484    fn test_process_line_float() {
485        let mut processor = SkipTapeProcessor::new();
486        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
487        let result = processor.process_line(r#"{"value": 3.14}"#, &schema);
488        assert!(result.is_ok());
489    }
490
491    #[test]
492    fn test_process_line_false() {
493        let mut processor = SkipTapeProcessor::new();
494        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
495        let result = processor.process_line(r#"{"active": false}"#, &schema);
496        assert!(result.is_ok());
497    }
498
499    #[test]
500    fn test_process_empty_object() {
501        let mut processor = SkipTapeProcessor::new();
502        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
503        let result = processor.process_line(r"{}", &schema);
504        assert!(result.is_ok());
505    }
506
507    #[test]
508    fn test_process_empty_array() {
509        let mut processor = SkipTapeProcessor::new();
510        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
511        let result = processor.process_line(r"[]", &schema);
512        assert!(result.is_ok());
513    }
514
515    #[test]
516    fn test_process_line_with_schema_filtering() {
517        let mut processor = SkipTapeProcessor::new();
518        let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
519        let result = processor.process_line(r#"{"name": "test", "age": 30}"#, &schema);
520        assert!(result.is_ok());
521        let tape = result.unwrap();
522        // Verify tape was created successfully
523        assert!(tape.metadata.node_count > 0);
524    }
525
526    #[test]
527    fn test_process_line_with_escape_sequence() {
528        let mut processor = SkipTapeProcessor::new();
529        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
530        let result = processor.process_line(r#"{"name": "test\"value"}"#, &schema);
531        assert!(result.is_ok());
532    }
533
534    #[test]
535    fn test_process_line_scientific_notation() {
536        let mut processor = SkipTapeProcessor::new();
537        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
538        let result = processor.process_line(r#"{"value": 1.5e10}"#, &schema);
539        assert!(result.is_ok());
540    }
541
542    #[test]
543    fn test_process_line_with_whitespace() {
544        let mut processor = SkipTapeProcessor::new();
545        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
546        let result = processor.process_line(r#"{  "name"  :  "test"  }"#, &schema);
547        assert!(result.is_ok());
548    }
549
550    #[test]
551    fn test_process_line_with_newlines() {
552        let mut processor = SkipTapeProcessor::new();
553        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
554        let result = processor.process_line("{\n\"name\"\n:\n\"test\"\n}", &schema);
555        assert!(result.is_ok());
556    }
557
558    #[test]
559    fn test_process_line_with_tabs() {
560        let mut processor = SkipTapeProcessor::new();
561        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
562        let result = processor.process_line("{\t\"name\"\t:\t\"test\"\t}", &schema);
563        assert!(result.is_ok());
564    }
565
566    #[test]
567    fn test_process_json_array_empty() {
568        let mut processor = SkipTapeProcessor::new();
569        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
570        let result = processor.process_json_array(r"[]", &schema);
571        assert!(result.is_ok());
572    }
573
574    #[test]
575    fn test_process_json_array_nested() {
576        let mut processor = SkipTapeProcessor::new();
577        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
578        let result = processor.process_json_array(r"[[1, 2], [3, 4]]", &schema);
579        assert!(result.is_ok());
580    }
581
582    #[test]
583    fn test_schema_match_ratio_calculation() {
584        let mut processor = SkipTapeProcessor::new();
585        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
586        let result = processor.process_line(r#"{"a": 1, "b": 2}"#, &schema);
587        assert!(result.is_ok());
588        let tape = result.unwrap();
589        // With wildcard schema, all fields should match
590        assert!(tape.metadata.schema_match_ratio >= 0.0);
591    }
592
593    #[test]
594    fn test_process_deeply_nested_object() {
595        let mut processor = SkipTapeProcessor::new();
596        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
597        let json = r#"{"a": {"b": {"c": {"d": {"e": 1}}}}}"#;
598        let result = processor.process_line(json, &schema);
599        assert!(result.is_ok());
600    }
601
602    #[test]
603    fn test_process_line_multiple_booleans() {
604        let mut processor = SkipTapeProcessor::new();
605        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
606        let result = processor.process_line(r#"{"a": true, "b": false, "c": true}"#, &schema);
607        assert!(result.is_ok());
608    }
609
610    #[test]
611    fn test_process_line_multiple_nulls() {
612        let mut processor = SkipTapeProcessor::new();
613        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
614        let result = processor.process_line(r#"{"a": null, "b": null}"#, &schema);
615        assert!(result.is_ok());
616    }
617
618    #[test]
619    fn test_process_line_mixed_array() {
620        let mut processor = SkipTapeProcessor::new();
621        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
622        let result = processor.process_line(
623            r#"{"items": [1, "two", true, null, {"nested": 1}]}"#,
624            &schema,
625        );
626        assert!(result.is_ok());
627    }
628
629    #[test]
630    fn test_process_line_array_of_objects() {
631        let mut processor = SkipTapeProcessor::new();
632        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
633        let result =
634            processor.process_line(r#"{"users": [{"name": "a"}, {"name": "b"}]}"#, &schema);
635        assert!(result.is_ok());
636    }
637
638    #[test]
639    fn test_is_field_name_after_brace() {
640        let _processor = SkipTapeProcessor::new();
641        let bytes = b"{\"field\": \"value\"}";
642        assert!(SkipTapeProcessor::is_field_name(bytes, 1));
643    }
644
645    #[test]
646    fn test_is_field_name_after_comma() {
647        let _processor = SkipTapeProcessor::new();
648        let bytes = b"{\"a\": 1, \"field\": 2}";
649        assert!(SkipTapeProcessor::is_field_name(bytes, 9));
650    }
651
652    #[test]
653    fn test_is_field_name_value_position() {
654        let _processor = SkipTapeProcessor::new();
655        let bytes = b"{\"field\": \"value\"}";
656        assert!(!SkipTapeProcessor::is_field_name(bytes, 10));
657    }
658
659    #[test]
660    fn test_build_path_string_empty() {
661        let _processor = SkipTapeProcessor::new();
662        let path_stack: Vec<String> = Vec::new();
663        assert_eq!(SkipTapeProcessor::build_path_string(&path_stack), "");
664    }
665
666    #[test]
667    fn test_build_path_string_single() {
668        let _processor = SkipTapeProcessor::new();
669        let path_stack = vec!["field".to_string()];
670        assert_eq!(SkipTapeProcessor::build_path_string(&path_stack), "field");
671    }
672
673    #[test]
674    fn test_build_path_string_nested() {
675        let _processor = SkipTapeProcessor::new();
676        let path_stack = vec!["user".to_string(), "name".to_string()];
677        assert_eq!(
678            SkipTapeProcessor::build_path_string(&path_stack),
679            "user.name"
680        );
681    }
682
683    #[test]
684    fn test_parse_string_basic() {
685        let _processor = SkipTapeProcessor::new();
686        let bytes = b"\"hello\"";
687        let mut index = 0;
688        let result = SkipTapeProcessor::parse_string(bytes, &mut index);
689        assert!(result.is_ok());
690        assert_eq!(result.unwrap(), "hello");
691    }
692
693    #[test]
694    fn test_parse_string_with_escape() {
695        let _processor = SkipTapeProcessor::new();
696        let bytes = b"\"he\\\"llo\"";
697        let mut index = 0;
698        let result = SkipTapeProcessor::parse_string(bytes, &mut index);
699        assert!(result.is_ok());
700    }
701
702    #[test]
703    fn test_parse_string_unterminated() {
704        let _processor = SkipTapeProcessor::new();
705        let bytes = b"\"hello";
706        let mut index = 0;
707        let result = SkipTapeProcessor::parse_string(bytes, &mut index);
708        assert!(result.is_err());
709    }
710
711    #[test]
712    fn test_parse_boolean_true() {
713        let _processor = SkipTapeProcessor::new();
714        let bytes = b"true";
715        let mut index = 0;
716        let result = SkipTapeProcessor::parse_boolean(bytes, &mut index);
717        assert!(result.is_ok());
718        assert!(result.unwrap());
719    }
720
721    #[test]
722    fn test_parse_boolean_false() {
723        let _processor = SkipTapeProcessor::new();
724        let bytes = b"false";
725        let mut index = 0;
726        let result = SkipTapeProcessor::parse_boolean(bytes, &mut index);
727        assert!(result.is_ok());
728        assert!(!result.unwrap());
729    }
730
731    #[test]
732    fn test_parse_boolean_invalid() {
733        let _processor = SkipTapeProcessor::new();
734        let bytes = b"invalid";
735        let mut index = 0;
736        let result = SkipTapeProcessor::parse_boolean(bytes, &mut index);
737        assert!(result.is_err());
738    }
739
740    #[test]
741    fn test_parse_number_integer() {
742        let _processor = SkipTapeProcessor::new();
743        let bytes = b"123";
744        let mut index = 0;
745        let result = SkipTapeProcessor::parse_number(bytes, &mut index);
746        assert!(result.is_ok());
747        assert!((result.unwrap() - 123.0).abs() < f64::EPSILON);
748    }
749
750    #[test]
751    fn test_parse_number_negative() {
752        let _processor = SkipTapeProcessor::new();
753        let bytes = b"-456";
754        let mut index = 0;
755        let result = SkipTapeProcessor::parse_number(bytes, &mut index);
756        assert!(result.is_ok());
757        assert!((result.unwrap() - (-456.0)).abs() < f64::EPSILON);
758    }
759
760    #[test]
761    fn test_parse_number_float() {
762        let _processor = SkipTapeProcessor::new();
763        let bytes = b"1.23456";
764        let mut index = 0;
765        let result = SkipTapeProcessor::parse_number(bytes, &mut index);
766        assert!(result.is_ok());
767        assert!((result.unwrap() - 1.23456).abs() < 0.001);
768    }
769
770    #[test]
771    fn test_parse_number_scientific() {
772        let _processor = SkipTapeProcessor::new();
773        let bytes = b"1.5e10";
774        let mut index = 0;
775        let result = SkipTapeProcessor::parse_number(bytes, &mut index);
776        assert!(result.is_ok());
777        assert!((result.unwrap() - 1.5e10).abs() < 1.0);
778    }
779
780    #[test]
781    fn test_skip_object_simple() {
782        let _processor = SkipTapeProcessor::new();
783        let bytes = b"{\"a\": 1}";
784        let result = SkipTapeProcessor::skip_object(bytes, 0);
785        assert!(result.is_ok());
786        assert_eq!(result.unwrap(), 7);
787    }
788
789    #[test]
790    fn test_skip_object_nested() {
791        let _processor = SkipTapeProcessor::new();
792        let bytes = b"{\"a\": {\"b\": 1}}";
793        let result = SkipTapeProcessor::skip_object(bytes, 0);
794        assert!(result.is_ok());
795    }
796
797    #[test]
798    fn test_skip_object_with_string() {
799        let _processor = SkipTapeProcessor::new();
800        let bytes = b"{\"a\": \"}\"}";
801        let result = SkipTapeProcessor::skip_object(bytes, 0);
802        assert!(result.is_ok());
803    }
804
805    #[test]
806    fn test_skip_array_simple() {
807        let _processor = SkipTapeProcessor::new();
808        let bytes = b"[1, 2, 3]";
809        let result = SkipTapeProcessor::skip_array(bytes, 0);
810        assert!(result.is_ok());
811    }
812
813    #[test]
814    fn test_skip_array_nested() {
815        let _processor = SkipTapeProcessor::new();
816        let bytes = b"[[1], [2]]";
817        let result = SkipTapeProcessor::skip_array(bytes, 0);
818        assert!(result.is_ok());
819    }
820
821    #[test]
822    fn test_skip_array_with_string() {
823        let _processor = SkipTapeProcessor::new();
824        let bytes = b"[\"]\"]";
825        let result = SkipTapeProcessor::skip_array(bytes, 0);
826        assert!(result.is_ok());
827    }
828
829    #[test]
830    fn test_expect_keyword_null() {
831        let _processor = SkipTapeProcessor::new();
832        let bytes = b"null";
833        let mut index = 0;
834        let result = SkipTapeProcessor::expect_keyword(bytes, &mut index, b"null");
835        assert!(result.is_ok());
836    }
837
838    #[test]
839    fn test_expect_keyword_mismatch() {
840        let _processor = SkipTapeProcessor::new();
841        let bytes = b"nope";
842        let mut index = 0;
843        let result = SkipTapeProcessor::expect_keyword(bytes, &mut index, b"null");
844        assert!(result.is_err());
845    }
846
847    #[test]
848    fn test_skip_value_string() {
849        let _processor = SkipTapeProcessor::new();
850        let bytes = b"\"test\"";
851        let mut index = 0;
852        let result = SkipTapeProcessor::skip_value(bytes, &mut index);
853        assert!(result.is_ok());
854    }
855
856    #[test]
857    fn test_skip_value_number() {
858        let _processor = SkipTapeProcessor::new();
859        let bytes = b"123";
860        let mut index = 0;
861        let result = SkipTapeProcessor::skip_value(bytes, &mut index);
862        assert!(result.is_ok());
863    }
864
865    #[test]
866    fn test_skip_value_boolean() {
867        let _processor = SkipTapeProcessor::new();
868        let bytes = b"true";
869        let mut index = 0;
870        let result = SkipTapeProcessor::skip_value(bytes, &mut index);
871        assert!(result.is_ok());
872    }
873
874    #[test]
875    fn test_skip_value_null() {
876        let _processor = SkipTapeProcessor::new();
877        let bytes = b"null";
878        let mut index = 0;
879        let result = SkipTapeProcessor::skip_value(bytes, &mut index);
880        assert!(result.is_ok());
881    }
882
883    #[test]
884    fn test_skip_value_object() {
885        let _processor = SkipTapeProcessor::new();
886        let bytes = b"{\"a\": 1}";
887        let mut index = 0;
888        let result = SkipTapeProcessor::skip_value(bytes, &mut index);
889        assert!(result.is_ok());
890    }
891
892    #[test]
893    fn test_skip_value_array() {
894        let _processor = SkipTapeProcessor::new();
895        let bytes = b"[1, 2, 3]";
896        let mut index = 0;
897        let result = SkipTapeProcessor::skip_value(bytes, &mut index);
898        assert!(result.is_ok());
899    }
900
901    #[test]
902    fn test_process_line_unexpected_character() {
903        let mut processor = SkipTapeProcessor::new();
904        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
905        // This should either error or handle gracefully - the '@' is unexpected
906        let result = processor.process_line(r#"{"name": @invalid}"#, &schema);
907        // The result depends on parser behavior - just verify it doesn't panic
908        let _ = result;
909    }
910
911    #[test]
912    fn test_reset_clears_arena() {
913        let mut processor = SkipTapeProcessor::new();
914        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
915        let _ = processor.process_line(r#"{"name": "test"}"#, &schema);
916        processor.reset();
917        // After reset, we can process again
918        let result = processor.process_line(r#"{"name": "test2"}"#, &schema);
919        assert!(result.is_ok());
920    }
921
922    #[test]
923    fn test_process_line_unterminated_string() {
924        let mut processor = SkipTapeProcessor::new();
925        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
926        let result = processor.process_line(r#"{"name": "unterminated"#, &schema);
927        // Should be an error due to unterminated string
928        assert!(result.is_err());
929    }
930
931    #[test]
932    fn test_process_line_invalid_boolean_literal() {
933        let mut processor = SkipTapeProcessor::new();
934        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
935        // "trux" is not a valid boolean - verify it doesn't panic
936        let result = processor.process_line(r#"{"flag": trux}"#, &schema);
937        // The parser may handle this gracefully or error - just don't panic
938        let _ = result;
939    }
940
941    #[test]
942    fn test_process_line_invalid_null_literal() {
943        let mut processor = SkipTapeProcessor::new();
944        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
945        // "nulx" is not valid - verify it doesn't panic
946        let result = processor.process_line(r#"{"value": nulx}"#, &schema);
947        // The parser may handle this gracefully or error - just don't panic
948        let _ = result;
949    }
950
951    #[test]
952    fn test_process_line_empty_field_name() {
953        let mut processor = SkipTapeProcessor::new();
954        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
955        let result = processor.process_line(r#"{"": "empty key"}"#, &schema);
956        assert!(result.is_ok());
957    }
958
959    #[test]
960    fn test_process_line_unicode_field() {
961        let mut processor = SkipTapeProcessor::new();
962        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
963        let result = processor.process_line(r#"{"日本語": "value", "emoji": "🎉"}"#, &schema);
964        assert!(result.is_ok());
965    }
966
967    #[test]
968    fn test_process_line_schema_no_fields() {
969        let mut processor = SkipTapeProcessor::new();
970        // Empty schema should skip everything
971        let schema = CompiledSchema::compile(&[]).unwrap();
972        let result = processor.process_line(r#"{"name": "test"}"#, &schema);
973        assert!(result.is_ok());
974    }
975
976    #[test]
977    fn test_process_json_array_of_empty_objects() {
978        let mut processor = SkipTapeProcessor::new();
979        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
980        let result = processor.process_json_array(r"[{}, {}, {}]", &schema);
981        assert!(result.is_ok());
982    }
983
984    #[test]
985    fn test_process_line_negative_numbers() {
986        let mut processor = SkipTapeProcessor::new();
987        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
988        let result = processor.process_line(r#"{"value": -123.45}"#, &schema);
989        assert!(result.is_ok());
990    }
991
992    #[test]
993    fn test_process_line_with_colon_in_string() {
994        let mut processor = SkipTapeProcessor::new();
995        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
996        let result = processor.process_line(r#"{"url": "http://example.com"}"#, &schema);
997        assert!(result.is_ok());
998    }
999
1000    #[test]
1001    fn test_process_line_multiple_escape_sequences() {
1002        let mut processor = SkipTapeProcessor::new();
1003        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
1004        let result = processor.process_line(r#"{"text": "line1\nline2\ttab\rreturn"}"#, &schema);
1005        assert!(result.is_ok());
1006    }
1007
1008    #[test]
1009    fn test_process_default_impl() {
1010        let processor = SkipTapeProcessor::default();
1011        assert!(std::mem::size_of_val(&processor) > 0);
1012    }
1013
1014    #[test]
1015    fn test_process_line_with_null_value() {
1016        let mut processor = SkipTapeProcessor::new();
1017        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
1018        let result = processor.process_line(r#"{"value": null}"#, &schema);
1019        assert!(result.is_ok());
1020    }
1021
1022    #[test]
1023    fn test_process_line_with_boolean_true() {
1024        let mut processor = SkipTapeProcessor::new();
1025        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
1026        let result = processor.process_line(r#"{"active": true}"#, &schema);
1027        assert!(result.is_ok());
1028    }
1029
1030    #[test]
1031    fn test_process_line_with_boolean_false() {
1032        let mut processor = SkipTapeProcessor::new();
1033        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
1034        let result = processor.process_line(r#"{"active": false}"#, &schema);
1035        assert!(result.is_ok());
1036    }
1037
1038    #[test]
1039    fn test_process_line_with_string_value() {
1040        let mut processor = SkipTapeProcessor::new();
1041        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
1042        let result = processor.process_line(r#"{"name": "test value"}"#, &schema);
1043        assert!(result.is_ok());
1044    }
1045
1046    #[test]
1047    fn test_process_line_with_schema_filtering_skip() {
1048        let mut processor = SkipTapeProcessor::new();
1049        // Only include "name" field, skip "age"
1050        let schema = CompiledSchema::compile(&["name".to_string()]).unwrap();
1051        let result = processor.process_line(r#"{"name": "Alice", "age": 30}"#, &schema);
1052        assert!(result.is_ok());
1053        let tape = result.unwrap();
1054        // Should have skipped some nodes
1055        assert!(tape.metadata().skipped_count > 0 || tape.metadata().node_count > 0);
1056    }
1057
1058    #[test]
1059    fn test_process_line_empty_json() {
1060        let mut processor = SkipTapeProcessor::new();
1061        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
1062        let result = processor.process_line(r"{}", &schema);
1063        assert!(result.is_ok());
1064    }
1065
1066    #[test]
1067    fn test_process_line_reset() {
1068        let mut processor = SkipTapeProcessor::new();
1069        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
1070
1071        // Process first line
1072        let _ = processor.process_line(r#"{"a": 1}"#, &schema);
1073
1074        // Reset and process second line
1075        processor.reset();
1076        let result = processor.process_line(r#"{"b": 2}"#, &schema);
1077        assert!(result.is_ok());
1078    }
1079
1080    #[test]
1081    fn test_process_line_mixed_types() {
1082        let mut processor = SkipTapeProcessor::new();
1083        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
1084        let result = processor.process_line(
1085            r#"{"string": "value", "number": 42, "bool": true, "null": null}"#,
1086            &schema,
1087        );
1088        assert!(result.is_ok());
1089    }
1090
1091    #[test]
1092    fn test_process_line_nested_with_filter() {
1093        let mut processor = SkipTapeProcessor::new();
1094        let schema =
1095            CompiledSchema::compile(&["user".to_string(), "user.name".to_string()]).unwrap();
1096        let result = processor.process_line(
1097            r#"{"user": {"name": "Alice", "email": "alice@test.com"}, "other": "ignored"}"#,
1098            &schema,
1099        );
1100        assert!(result.is_ok());
1101    }
1102
1103    #[test]
1104    fn test_process_line_array_with_objects() {
1105        let mut processor = SkipTapeProcessor::new();
1106        let schema = CompiledSchema::compile(&["*".to_string()]).unwrap();
1107        let result = processor.process_line(r#"{"items": [{"id": 1}, {"id": 2}]}"#, &schema);
1108        assert!(result.is_ok());
1109    }
1110
1111    #[test]
1112    fn test_skip_tape_metadata_zero_nodes() {
1113        let mut processor = SkipTapeProcessor::new();
1114        // Empty object should still produce valid tape
1115        let schema = CompiledSchema::compile(&["nonexistent".to_string()]).unwrap();
1116        let result = processor.process_line(r"{}", &schema);
1117        assert!(result.is_ok());
1118    }
1119}